Add general part (#2035)

### Pull Request Checklist: - [ ] This PR addresses an already opened issue (for bug fixes / features) - This PR fixes #xyz - [x] Tests for the changes have been added (for bug fixes / features) - [ ] (If applicable) Documentation has been added / updated (for bug fixes / features) - [x] CHANGELOG.rst has been updated (with summary of main changes) - [x] Link to issue (:issue:`number`) and pull request (:pull:`number`) has been added ### What kind of change does this PR introduce? * New function `general_partition` to calculate partition of uncertainty over a given list of dimensions. ### Does this PR introduce a breaking change? no ### Other information: Developped for "On the importance of the reference data: Uncertainty partitioning of bias-adjusted climate simulations over Quebec" by Lavoie et al. (submitted in 2024) (presented at the last jamboree) Code for the paper here: https://github.com/Ouranosinc/partition
Ouranosinc · Jan 13, 2025 · af8e88f · af8e88f
2 parents b73841a + 7e09f65
commit af8e88f
Show file tree

Hide file tree

Showing 6 changed files with 165 additions and 7 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -2,6 +2,14 @@
 Changelog
 =========
 
+v0.55.0 (unreleased)
+--------------------
+Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`)
+
+New features and enhancements
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+* New function ``ensemble.partition.general_partition`` (:pull:`2035`)
+
 v0.54.0 (2024-12-16)
 --------------------
 Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Pascal Bourgault (:user:`aulemahal`), Éric Dupuis (:user:`coxipi`), Sascha Hofmann (:user:`saschahofmann`).

diff --git a/environment.yml b/environment.yml
@@ -81,3 +81,4 @@ dependencies:
   - xdoctest >=1.1.5
   - yamllint >=1.35.1
   - pip >=24.2.0
+  - pygments <2.19 #FIXME: temporary fix, https://github.com/felix-hilden/sphinx-codeautolink/issues/153
diff --git a/pyproject.toml b/pyproject.toml
@@ -111,7 +111,8 @@ docs = [
   "sphinx-copybutton",
   "sphinx-mdinclude",
   "sphinxcontrib-bibtex",
-  "sphinxcontrib-svg2pdfconverter[Cairosvg]"
+  "sphinxcontrib-svg2pdfconverter[Cairosvg]",
+  "pygments <2.19" # FIXME: temporary fix, https://github.com/felix-hilden/sphinx-codeautolink/issues/153
 ]
 extras = ["fastnanquantile >=0.0.2", "flox >=0.9", "POT >=0.9.4"]
 all = ["xclim[dev]", "xclim[docs]", "xclim[extras]"]

diff --git a/src/xclim/ensembles/__init__.py b/src/xclim/ensembles/__init__.py
@@ -16,6 +16,7 @@
 )
 from xclim.ensembles._partitioning import (
     fractional_uncertainty,
+    general_partition,
     hawkins_sutton,
     lafferty_sriver,
 )

diff --git a/src/xclim/ensembles/_partitioning.py b/src/xclim/ensembles/_partitioning.py
@@ -302,6 +302,128 @@ def lafferty_sriver(
     return g, uncertainty
 
 
+def general_partition(
+    da: xr.DataArray,
+    sm: xr.DataArray | str = "poly",
+    var_first: list | None = None,
+    mean_first: list | None = None,
+    weights: list | None = None,
+) -> tuple[xr.DataArray, xr.DataArray]:
+    """
+    Return the mean and partitioned variance of an ensemble.
+
+    This is a general function that can be used to implemented methods from different papers.
+    The defaults are set to match the Lavoie et al. (2025, in preparation) method.
+
+    Parameters
+    ----------
+    da : xr.DataArray
+        Time series with dimensions 'time', 'mean_first', and 'var_first'.
+    sm : xr.DataArray or {"poly"}
+        Smoothed time series over time, with the same dimensions as `da`.
+        If 'poly', this is estimated using a 4th-order polynomial.
+        It is also possible to pass a precomputed smoothed time series.
+    var_first : list of str
+        List of dimensions where the variance is computed first of the dimension,
+        followed by the mean over the other dimensions.
+    mean_first : list of str
+        List of dimensions where the mean over the other dimensions is computed first,
+        followed by the variance over the dimension.
+    weights : list of str
+        List of dimensions where the first operation is weighted.
+
+    Returns
+    -------
+    xr.DataArray, xr.DataArray
+        The mean relative to the baseline, and the components of variance of the
+        ensemble. These components are coordinates along the `uncertainty` dimension:
+        element of var_first, elements of mean_first and `total`.
+
+    Notes
+    -----
+    To prepare input data, make sure `da` has dimensions list in both var_first and
+    mean_first, as well as time.
+    e.g. `da.rename({"experiment": "scenario"})`.
+
+    To get the fraction of the total variance instead of the variance itself, call `fractional_uncertainty` on the
+    output.
+    """
+    # set defaults
+    var_first = var_first or ["model", "reference", "adjustment"]
+    mean_first = mean_first or ["scenario"]
+    weights = weights or ["model", "reference", "adjustment"]
+
+    all_types = mean_first + var_first
+
+    if xr.infer_freq(da.time)[0] not in ["A", "Y"]:
+        raise ValueError("This algorithm expects annual time series.")
+
+    if not ({"time"} | set(all_types)).issubset(da.dims):
+        error_msg = f"DataArray dimensions should include {all_types} and time."
+        raise ValueError(error_msg)
+
+    if sm == "poly":
+        # Fit a 4th order polynomial
+        fit = da.polyfit(dim="time", deg=4, skipna=True)
+        sm = xr.polyval(coord=da.time, coeffs=fit.polyfit_coefficients).where(
+            da.notnull()
+        )
+    elif isinstance(sm, xr.DataArray):
+        sm = sm
+    else:
+        raise ValueError("sm should be 'poly' or a DataArray.")
+
+    # "Interannual variability is then estimated as the centered rolling 11-year variance of the difference
+    # between the extracted forced response and the raw outputs, averaged over all outputs."
+    # same as lafferty_sriver()
+    nv_u = (da - sm).rolling(time=11, center=True).var().mean(dim=all_types)
+
+    all_u = []
+    total = nv_u.copy()
+    for t in mean_first:
+        all_but_t = [x for x in all_types if x != t]
+        if t in weights:
+            tw = sm.count(t)
+            t_u = sm.mean(dim=all_but_t).weighted(tw).var(dim=t)
+
+        else:
+            t_u = sm.mean(dim=all_but_t).var(dim=t)
+        all_u.append(t_u)
+        total += t_u
+
+    for t in var_first:
+        all_but_t = [x for x in all_types if x != t]
+        if t in weights:
+            tw = sm.count(t)
+            t_u = sm.var(dim=t).weighted(tw).mean(dim=all_but_t)
+
+        else:
+            t_u = sm.var(dim=t).mean(dim=all_but_t)
+        all_u.append(t_u)
+        total += t_u
+
+    # Create output array with the uncertainty components
+    u = pd.Index([*all_types, "variability", "total"], name="uncertainty")
+    uncertainty = xr.concat([*all_u, nv_u, total], dim=u)
+
+    uncertainty.attrs["indicator_long_name"] = da.attrs.get("long_name", "unknown")
+    uncertainty.attrs["indicator_description"] = da.attrs.get("description", "unknown")
+    uncertainty.attrs["indicator_units"] = da.attrs.get("units", "unknown")
+    uncertainty.attrs["partition_fit"] = sm if isinstance(sm, str) else "unknown"
+    # Keep a trace of the elements for each uncertainty component
+    for t in all_types:
+        uncertainty.attrs[t] = da[t].values
+
+    # Mean projection:
+    # This is not part of the original algorithm,
+    # but we want all partition algos to have similar outputs.
+    g = sm.mean(dim=all_types[0])
+    for dim in all_types[1:]:
+        g = g.mean(dim=dim)
+
+    return g, uncertainty
+
+
 def fractional_uncertainty(u: xr.DataArray) -> xr.DataArray:
     """
     Return the fractional uncertainty.
@@ -316,8 +438,9 @@ def fractional_uncertainty(u: xr.DataArray) -> xr.DataArray:
     xr.DataArray
         Fractional, or relative uncertainty with respect to the total uncertainty.
     """
-    uncertainty = u / u.sel(uncertainty="total") * 100
-    uncertainty.attrs.update(u.attrs)
-    uncertainty.attrs["long_name"] = "Fraction of total variance"
-    uncertainty.attrs["units"] = "%"
-    return uncertainty
+    with xr.set_options(keep_attrs=True):
+        uncertainty = u / u.sel(uncertainty="total") * 100
+        uncertainty.attrs.update(u.attrs)
+        uncertainty.attrs["long_name"] = "Fraction of total variance"
+        uncertainty.attrs["units"] = "%"
+        return uncertainty
diff --git a/tests/test_partitioning.py b/tests/test_partitioning.py
@@ -3,7 +3,12 @@
 import numpy as np
 import xarray as xr
 
-from xclim.ensembles import fractional_uncertainty, hawkins_sutton, lafferty_sriver
+from xclim.ensembles import (
+    fractional_uncertainty,
+    general_partition,
+    hawkins_sutton,
+    lafferty_sriver,
+)
 from xclim.ensembles._filters import _concat_hist, _model_in_all_scens, _single_member
 
 
@@ -156,3 +161,22 @@ def graph():
         plt.show()
 
     # graph()
+
+
+def test_general_partition(lafferty_sriver_ds):
+    """Reproduce Lafferty & Sriver's results using general_partition."""
+    g1, u1 = lafferty_sriver(lafferty_sriver_ds.tas)
+    g2, u2 = general_partition(
+        lafferty_sriver_ds.tas,
+        var_first=["model", "downscaling"],
+        mean_first=["scenario"],
+        weights=["model", "downscaling"],
+        sm="poly",
+    )
+    # fix order
+    u2 = u2.sel(
+        uncertainty=["model", "scenario", "downscaling", "variability", "total"]
+    )
+
+    assert u1.equals(u2)
+    np.testing.assert_allclose(g1.values, g2.values, atol=0.1)