From d63ac5594747256ad7413b0b232a2bab8a663611 Mon Sep 17 00:00:00 2001 From: David Huard Date: Thu, 25 Apr 2024 15:20:40 -0400 Subject: [PATCH 1/2] Add support for method argument in ensemble_percentile --- CHANGES.rst | 6 ++++++ tests/test_ensembles.py | 14 ++++++++++++++ xclim/ensembles/_base.py | 38 +++++++++++++++++++++++++++++++++----- 3 files changed, 53 insertions(+), 5 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index bf9636021..163f6432f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -15,6 +15,12 @@ New indicators * New ``snw_season_length`` and ``snd_season_length`` computing the duration between the start and the end of the snow season, both defined as the first day of a continuous period with snow above/under a threshold. Previous versions of these indicators were renamed ``snw_days_above`` and ``snd_days_above`` to better reflect what they computed : the number of days with snow above a given threshold (with no notion of continuity). (:issue:`1703`, :pull:`1708`). * Added ``atmos.duff_moisture_code``, part of the Canadian Forest Fire Weather Index System. It was already an output of the `atmos.cffwis_indices`, but now has its own standalone indicator. (:issue:`1698`, :pull:`1712`). +New features and enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +* `ensemble_percentiles` now takes a `method` argument, accepting + 'interpolated_inverted_cdf', 'hazen', 'weibull', 'linear' (default), + 'median_unbiased' and 'normal_unbiased'. (:issue:`1694`, :pull:`1732`). + Breaking changes ^^^^^^^^^^^^^^^^ * The previously deprecated functions ``xclim.sdba.processing.construct_moving_yearly_window`` and ``xclim.sdba.processing.unpack_moving_yearly_window`` have been removed. These functions have been replaced by ``xclim.core.calendar.stack_periods`` and ``xclim.core.calendar.unstack_periods``. (:pull:`1717`). diff --git a/tests/test_ensembles.py b/tests/test_ensembles.py index 91fafb92e..5d7aac9d5 100644 --- a/tests/test_ensembles.py +++ b/tests/test_ensembles.py @@ -176,6 +176,20 @@ def test_calc_perc(self, transpose, ensemble_dataset_objects, open_dataset): ), out1["tg_mean_p90"].isel(time=0, lon=5, lat=5), ) + + # Specify method + np.testing.assert_array_almost_equal( + mquantiles( + ens["tg_mean"].isel(time=0, lon=5, lat=5), + alphap=0.5, + betap=0.5, + prob=0.90, + ), + ensembles.ensemble_percentiles( + ens.isel(time=0, lon=5, lat=5), values=[90], method="hazen" + ).tg_mean_p90, + ) + assert np.all(out1["tg_mean_p90"] > out1["tg_mean_p50"]) assert np.all(out1["tg_mean_p50"] > out1["tg_mean_p10"]) diff --git a/xclim/ensembles/_base.py b/xclim/ensembles/_base.py index a76fed8d1..cf82e6364 100644 --- a/xclim/ensembles/_base.py +++ b/xclim/ensembles/_base.py @@ -17,6 +17,16 @@ from xclim.core.formatting import update_history from xclim.core.utils import calc_perc +# The alpha and beta parameters for the quantile function +_quantile_params = { + "interpolated_inverted_cdf": (0, 1), + "hazen": (0.5, 0.5), + "weibull": (0, 0), + "linear": (1, 1), + "median_unbiased": (1 / 3, 1 / 3), + "normal_unbiased": (3 / 8, 3 / 8), +} + def create_ensemble( datasets: Any, @@ -207,6 +217,7 @@ def ensemble_percentiles( min_members: int | None = 1, weights: xr.DataArray | None = None, split: bool = True, + method: str = "linear", ) -> xr.DataArray | xr.Dataset: """Calculate ensemble statistics between a results from an ensemble of climate simulations. @@ -230,10 +241,21 @@ def ensemble_percentiles( The default (1) essentially skips this check. weights : xr.DataArray, optional Weights to apply along the 'realization' dimension. This array cannot contain missing values. - When given, the function uses xarray's quantile method which is slower than xclim's NaN-optimized algorithm. + When given, the function uses xarray's quantile method which is slower than xclim's NaN-optimized algorithm, + and does not support `method` values other than `linear`. split : bool Whether to split each percentile into a new variable or concatenate the output along a new "percentiles" dimension. + method : str, optional + This parameter specifies the method to use for estimating the percentile, see the `numpy.percentile` + documentation for more information. The following methods are supported + + - 'interpolated_inverted_cdf' + - 'hazen' + - 'weibull' + - 'linear' (default) + - 'median_unbiased' + - 'normal_unbiased' Returns ------- @@ -274,6 +296,7 @@ def ensemble_percentiles( split=split, min_members=min_members, weights=weights, + method=method, ) for da in ens.data_vars.values() if "realization" in da.dims @@ -310,20 +333,25 @@ def ensemble_percentiles( ens = ens.chunk({"realization": -1}) if weights is None: + alpha, beta = _quantile_params[method] + out = xr.apply_ufunc( calc_perc, ens, input_core_dims=[["realization"]], output_core_dims=[["percentiles"]], keep_attrs=True, - kwargs=dict( - percentiles=values, - ), + kwargs=dict(percentiles=values, alpha=alpha, beta=beta), dask="parallelized", output_dtypes=[ens.dtype], dask_gufunc_kwargs=dict(output_sizes={"percentiles": len(values)}), ) else: + if method != "linear": + raise ValueError( + "Only the 'linear' method is supported when using weights." + ) + with xr.set_options(keep_attrs=True): # xclim's calc_perc does not support weighted arrays, so xarray's native function is used instead. qt = np.array(values) / 100 # xarray requires values between 0 and 1 @@ -350,7 +378,7 @@ def ensemble_percentiles( out = out.rename(name_dict={p: f"{ens.name}_p{int(p):02d}"}) out.attrs["history"] = update_history( - f"Computation of the percentiles on {ens.realization.size} ensemble members.", + f"Computation of the percentiles on {ens.realization.size} ensemble members using method {method}.", ens, ) From 8928bf0e06066aed3a4b71f5c76157304c94e6ce Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 29 Apr 2024 14:34:23 -0400 Subject: [PATCH 2/2] Use Literal to list allowed percentile methods --- CHANGES.rst | 9 +++------ xclim/ensembles/_base.py | 23 +++++++++++------------ 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index e796b48ae..17b0bfd16 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -13,18 +13,15 @@ Announcements New features and enhancements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * Indicator ``xclim.atmos.potential_evapotranspiration`` and indice ``xclim.indices.potential_evapotranspiration`` now accept a new value (`DA02`) for argument `method` implementing potential evapotranspiration based on Droogers and Allen (2002). (:issue:`1710`, :pull:`1723`). +* `ensemble_percentiles` now takes a `method` argument, accepting + 'interpolated_inverted_cdf', 'hazen', 'weibull', 'linear' (default), + 'median_unbiased' and 'normal_unbiased'. (:issue:`1694`, :pull:`1732`). New indicators ^^^^^^^^^^^^^^ * New ``snw_season_length`` and ``snd_season_length`` computing the duration between the start and the end of the snow season, both defined as the first day of a continuous period with snow above/under a threshold. Previous versions of these indicators were renamed ``snw_days_above`` and ``snd_days_above`` to better reflect what they computed : the number of days with snow above a given threshold (with no notion of continuity). (:issue:`1703`, :pull:`1708`). * Added ``atmos.duff_moisture_code``, part of the Canadian Forest Fire Weather Index System. It was already an output of the `atmos.cffwis_indices`, but now has its own standalone indicator. (:issue:`1698`, :pull:`1712`). -New features and enhancements -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -* `ensemble_percentiles` now takes a `method` argument, accepting - 'interpolated_inverted_cdf', 'hazen', 'weibull', 'linear' (default), - 'median_unbiased' and 'normal_unbiased'. (:issue:`1694`, :pull:`1732`). - Breaking changes ^^^^^^^^^^^^^^^^ * The previously deprecated functions ``xclim.sdba.processing.construct_moving_yearly_window`` and ``xclim.sdba.processing.unpack_moving_yearly_window`` have been removed. These functions have been replaced by ``xclim.core.calendar.stack_periods`` and ``xclim.core.calendar.unstack_periods``. (:pull:`1717`). diff --git a/xclim/ensembles/_base.py b/xclim/ensembles/_base.py index cf82e6364..2b9a5987f 100644 --- a/xclim/ensembles/_base.py +++ b/xclim/ensembles/_base.py @@ -8,7 +8,7 @@ from collections.abc import Sequence from glob import glob from pathlib import Path -from typing import Any +from typing import Any, Literal import numpy as np import xarray as xr @@ -217,7 +217,14 @@ def ensemble_percentiles( min_members: int | None = 1, weights: xr.DataArray | None = None, split: bool = True, - method: str = "linear", + method: Literal[ + "linear", + "interpolated_inverted_cdf", + "hazen", + "weibull", + "median_unbiased", + "normal_unbiased", + ] = "linear", ) -> xr.DataArray | xr.Dataset: """Calculate ensemble statistics between a results from an ensemble of climate simulations. @@ -246,16 +253,8 @@ def ensemble_percentiles( split : bool Whether to split each percentile into a new variable or concatenate the output along a new "percentiles" dimension. - method : str, optional - This parameter specifies the method to use for estimating the percentile, see the `numpy.percentile` - documentation for more information. The following methods are supported - - - 'interpolated_inverted_cdf' - - 'hazen' - - 'weibull' - - 'linear' (default) - - 'median_unbiased' - - 'normal_unbiased' + method : {"linear", "interpolated_inverted_cdf", "hazen", "weibull", "median_unbiased", "normal_unbiased"} + Method to use for estimating the percentile, see the `numpy.percentile` documentation for more information. Returns -------