Skip to content

Commit

Permalink
Add general part (#2035)
Browse files Browse the repository at this point in the history
<!--Please ensure the PR fulfills the following requirements! -->
<!-- If this is your first PR, make sure to add your details to the
AUTHORS.rst! -->
### Pull Request Checklist:
- [ ] This PR addresses an already opened issue (for bug fixes /
features)
    - This PR fixes #xyz
- [x] Tests for the changes have been added (for bug fixes / features)
- [ ] (If applicable) Documentation has been added / updated (for bug
fixes / features)
- [x] CHANGELOG.rst has been updated (with summary of main changes)
- [x] Link to issue (:issue:`number`) and pull request (:pull:`number`)
has been added

### What kind of change does this PR introduce?

* New function `general_partition` to calculate partition of uncertainty
over a given list of dimensions.

### Does this PR introduce a breaking change?
no

### Other information:
Developped for "On the importance of the reference data: Uncertainty
partitioning of bias-adjusted climate simulations over Quebec" by Lavoie
et al. (submitted in 2024) (presented at the last jamboree)
Code for the paper here: https://github.com/Ouranosinc/partition
  • Loading branch information
juliettelavoie authored Jan 13, 2025
2 parents b73841a + 7e09f65 commit af8e88f
Show file tree
Hide file tree
Showing 6 changed files with 165 additions and 7 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@
Changelog
=========

v0.55.0 (unreleased)
--------------------
Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`)

New features and enhancements
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
* New function ``ensemble.partition.general_partition`` (:pull:`2035`)

v0.54.0 (2024-12-16)
--------------------
Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Pascal Bourgault (:user:`aulemahal`), Éric Dupuis (:user:`coxipi`), Sascha Hofmann (:user:`saschahofmann`).
Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,4 @@ dependencies:
- xdoctest >=1.1.5
- yamllint >=1.35.1
- pip >=24.2.0
- pygments <2.19 #FIXME: temporary fix, https://github.com/felix-hilden/sphinx-codeautolink/issues/153
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ docs = [
"sphinx-copybutton",
"sphinx-mdinclude",
"sphinxcontrib-bibtex",
"sphinxcontrib-svg2pdfconverter[Cairosvg]"
"sphinxcontrib-svg2pdfconverter[Cairosvg]",
"pygments <2.19" # FIXME: temporary fix, https://github.com/felix-hilden/sphinx-codeautolink/issues/153
]
extras = ["fastnanquantile >=0.0.2", "flox >=0.9", "POT >=0.9.4"]
all = ["xclim[dev]", "xclim[docs]", "xclim[extras]"]
Expand Down
1 change: 1 addition & 0 deletions src/xclim/ensembles/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
)
from xclim.ensembles._partitioning import (
fractional_uncertainty,
general_partition,
hawkins_sutton,
lafferty_sriver,
)
Expand Down
133 changes: 128 additions & 5 deletions src/xclim/ensembles/_partitioning.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,128 @@ def lafferty_sriver(
return g, uncertainty


def general_partition(
da: xr.DataArray,
sm: xr.DataArray | str = "poly",
var_first: list | None = None,
mean_first: list | None = None,
weights: list | None = None,
) -> tuple[xr.DataArray, xr.DataArray]:
"""
Return the mean and partitioned variance of an ensemble.
This is a general function that can be used to implemented methods from different papers.
The defaults are set to match the Lavoie et al. (2025, in preparation) method.
Parameters
----------
da : xr.DataArray
Time series with dimensions 'time', 'mean_first', and 'var_first'.
sm : xr.DataArray or {"poly"}
Smoothed time series over time, with the same dimensions as `da`.
If 'poly', this is estimated using a 4th-order polynomial.
It is also possible to pass a precomputed smoothed time series.
var_first : list of str
List of dimensions where the variance is computed first of the dimension,
followed by the mean over the other dimensions.
mean_first : list of str
List of dimensions where the mean over the other dimensions is computed first,
followed by the variance over the dimension.
weights : list of str
List of dimensions where the first operation is weighted.
Returns
-------
xr.DataArray, xr.DataArray
The mean relative to the baseline, and the components of variance of the
ensemble. These components are coordinates along the `uncertainty` dimension:
element of var_first, elements of mean_first and `total`.
Notes
-----
To prepare input data, make sure `da` has dimensions list in both var_first and
mean_first, as well as time.
e.g. `da.rename({"experiment": "scenario"})`.
To get the fraction of the total variance instead of the variance itself, call `fractional_uncertainty` on the
output.
"""
# set defaults
var_first = var_first or ["model", "reference", "adjustment"]
mean_first = mean_first or ["scenario"]
weights = weights or ["model", "reference", "adjustment"]

all_types = mean_first + var_first

if xr.infer_freq(da.time)[0] not in ["A", "Y"]:
raise ValueError("This algorithm expects annual time series.")

if not ({"time"} | set(all_types)).issubset(da.dims):
error_msg = f"DataArray dimensions should include {all_types} and time."
raise ValueError(error_msg)

if sm == "poly":
# Fit a 4th order polynomial
fit = da.polyfit(dim="time", deg=4, skipna=True)
sm = xr.polyval(coord=da.time, coeffs=fit.polyfit_coefficients).where(
da.notnull()
)
elif isinstance(sm, xr.DataArray):
sm = sm
else:
raise ValueError("sm should be 'poly' or a DataArray.")

# "Interannual variability is then estimated as the centered rolling 11-year variance of the difference
# between the extracted forced response and the raw outputs, averaged over all outputs."
# same as lafferty_sriver()
nv_u = (da - sm).rolling(time=11, center=True).var().mean(dim=all_types)

all_u = []
total = nv_u.copy()
for t in mean_first:
all_but_t = [x for x in all_types if x != t]
if t in weights:
tw = sm.count(t)
t_u = sm.mean(dim=all_but_t).weighted(tw).var(dim=t)

else:
t_u = sm.mean(dim=all_but_t).var(dim=t)
all_u.append(t_u)
total += t_u

for t in var_first:
all_but_t = [x for x in all_types if x != t]
if t in weights:
tw = sm.count(t)
t_u = sm.var(dim=t).weighted(tw).mean(dim=all_but_t)

else:
t_u = sm.var(dim=t).mean(dim=all_but_t)
all_u.append(t_u)
total += t_u

# Create output array with the uncertainty components
u = pd.Index([*all_types, "variability", "total"], name="uncertainty")
uncertainty = xr.concat([*all_u, nv_u, total], dim=u)

uncertainty.attrs["indicator_long_name"] = da.attrs.get("long_name", "unknown")
uncertainty.attrs["indicator_description"] = da.attrs.get("description", "unknown")
uncertainty.attrs["indicator_units"] = da.attrs.get("units", "unknown")
uncertainty.attrs["partition_fit"] = sm if isinstance(sm, str) else "unknown"
# Keep a trace of the elements for each uncertainty component
for t in all_types:
uncertainty.attrs[t] = da[t].values

# Mean projection:
# This is not part of the original algorithm,
# but we want all partition algos to have similar outputs.
g = sm.mean(dim=all_types[0])
for dim in all_types[1:]:
g = g.mean(dim=dim)

return g, uncertainty


def fractional_uncertainty(u: xr.DataArray) -> xr.DataArray:
"""
Return the fractional uncertainty.
Expand All @@ -316,8 +438,9 @@ def fractional_uncertainty(u: xr.DataArray) -> xr.DataArray:
xr.DataArray
Fractional, or relative uncertainty with respect to the total uncertainty.
"""
uncertainty = u / u.sel(uncertainty="total") * 100
uncertainty.attrs.update(u.attrs)
uncertainty.attrs["long_name"] = "Fraction of total variance"
uncertainty.attrs["units"] = "%"
return uncertainty
with xr.set_options(keep_attrs=True):
uncertainty = u / u.sel(uncertainty="total") * 100
uncertainty.attrs.update(u.attrs)
uncertainty.attrs["long_name"] = "Fraction of total variance"
uncertainty.attrs["units"] = "%"
return uncertainty
26 changes: 25 additions & 1 deletion tests/test_partitioning.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
import numpy as np
import xarray as xr

from xclim.ensembles import fractional_uncertainty, hawkins_sutton, lafferty_sriver
from xclim.ensembles import (
fractional_uncertainty,
general_partition,
hawkins_sutton,
lafferty_sriver,
)
from xclim.ensembles._filters import _concat_hist, _model_in_all_scens, _single_member


Expand Down Expand Up @@ -156,3 +161,22 @@ def graph():
plt.show()

# graph()


def test_general_partition(lafferty_sriver_ds):
"""Reproduce Lafferty & Sriver's results using general_partition."""
g1, u1 = lafferty_sriver(lafferty_sriver_ds.tas)
g2, u2 = general_partition(
lafferty_sriver_ds.tas,
var_first=["model", "downscaling"],
mean_first=["scenario"],
weights=["model", "downscaling"],
sm="poly",
)
# fix order
u2 = u2.sel(
uncertainty=["model", "scenario", "downscaling", "variability", "total"]
)

assert u1.equals(u2)
np.testing.assert_allclose(g1.values, g2.values, atol=0.1)

0 comments on commit af8e88f

Please sign in to comment.