From d63ac5594747256ad7413b0b232a2bab8a663611 Mon Sep 17 00:00:00 2001
From: David Huard <huard.david@ouranos.ca>
Date: Thu, 25 Apr 2024 15:20:40 -0400
Subject: [PATCH 1/2] Add support for method argument in ensemble_percentile

---
 CHANGES.rst              |  6 ++++++
 tests/test_ensembles.py  | 14 ++++++++++++++
 xclim/ensembles/_base.py | 38 +++++++++++++++++++++++++++++++++-----
 3 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index bf9636021..163f6432f 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -15,6 +15,12 @@ New indicators
 * New ``snw_season_length`` and ``snd_season_length`` computing the duration between the start and the end of the snow season, both defined as the first day of a continuous period with snow above/under a threshold. Previous versions of these indicators were renamed ``snw_days_above`` and ``snd_days_above`` to better reflect what they computed : the number of days with snow above a given threshold (with no notion of continuity). (:issue:`1703`, :pull:`1708`).
 * Added ``atmos.duff_moisture_code``, part of the Canadian Forest Fire Weather Index System. It was already an output of the `atmos.cffwis_indices`, but now has its own standalone indicator. (:issue:`1698`, :pull:`1712`).
 
+New features and enhancements
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+* `ensemble_percentiles` now takes a `method` argument, accepting
+  'interpolated_inverted_cdf', 'hazen', 'weibull', 'linear'  (default),
+  'median_unbiased' and 'normal_unbiased'. (:issue:`1694`, :pull:`1732`).
+
 Breaking changes
 ^^^^^^^^^^^^^^^^
 * The previously deprecated functions ``xclim.sdba.processing.construct_moving_yearly_window`` and ``xclim.sdba.processing.unpack_moving_yearly_window`` have been removed. These functions have been replaced by ``xclim.core.calendar.stack_periods`` and ``xclim.core.calendar.unstack_periods``. (:pull:`1717`).
diff --git a/tests/test_ensembles.py b/tests/test_ensembles.py
index 91fafb92e..5d7aac9d5 100644
--- a/tests/test_ensembles.py
+++ b/tests/test_ensembles.py
@@ -176,6 +176,20 @@ def test_calc_perc(self, transpose, ensemble_dataset_objects, open_dataset):
             ),
             out1["tg_mean_p90"].isel(time=0, lon=5, lat=5),
         )
+
+        # Specify method
+        np.testing.assert_array_almost_equal(
+            mquantiles(
+                ens["tg_mean"].isel(time=0, lon=5, lat=5),
+                alphap=0.5,
+                betap=0.5,
+                prob=0.90,
+            ),
+            ensembles.ensemble_percentiles(
+                ens.isel(time=0, lon=5, lat=5), values=[90], method="hazen"
+            ).tg_mean_p90,
+        )
+
         assert np.all(out1["tg_mean_p90"] > out1["tg_mean_p50"])
         assert np.all(out1["tg_mean_p50"] > out1["tg_mean_p10"])
 
diff --git a/xclim/ensembles/_base.py b/xclim/ensembles/_base.py
index a76fed8d1..cf82e6364 100644
--- a/xclim/ensembles/_base.py
+++ b/xclim/ensembles/_base.py
@@ -17,6 +17,16 @@
 from xclim.core.formatting import update_history
 from xclim.core.utils import calc_perc
 
+# The alpha and beta parameters for the quantile function
+_quantile_params = {
+    "interpolated_inverted_cdf": (0, 1),
+    "hazen": (0.5, 0.5),
+    "weibull": (0, 0),
+    "linear": (1, 1),
+    "median_unbiased": (1 / 3, 1 / 3),
+    "normal_unbiased": (3 / 8, 3 / 8),
+}
+
 
 def create_ensemble(
     datasets: Any,
@@ -207,6 +217,7 @@ def ensemble_percentiles(
     min_members: int | None = 1,
     weights: xr.DataArray | None = None,
     split: bool = True,
+    method: str = "linear",
 ) -> xr.DataArray | xr.Dataset:
     """Calculate ensemble statistics between a results from an ensemble of climate simulations.
 
@@ -230,10 +241,21 @@ def ensemble_percentiles(
         The default (1) essentially skips this check.
     weights : xr.DataArray, optional
         Weights to apply along the 'realization' dimension. This array cannot contain missing values.
-        When given, the function uses xarray's quantile method which is slower than xclim's NaN-optimized algorithm.
+        When given, the function uses xarray's quantile method which is slower than xclim's NaN-optimized algorithm,
+        and does not support `method` values other than `linear`.
     split : bool
         Whether to split each percentile into a new variable
         or concatenate the output along a new "percentiles" dimension.
+    method : str, optional
+        This parameter specifies the method to use for estimating the percentile, see the `numpy.percentile`
+        documentation for more information. The following methods are supported
+
+          - 'interpolated_inverted_cdf'
+          - 'hazen'
+          - 'weibull'
+          - 'linear'  (default)
+          - 'median_unbiased'
+          - 'normal_unbiased'
 
     Returns
     -------
@@ -274,6 +296,7 @@ def ensemble_percentiles(
                     split=split,
                     min_members=min_members,
                     weights=weights,
+                    method=method,
                 )
                 for da in ens.data_vars.values()
                 if "realization" in da.dims
@@ -310,20 +333,25 @@ def ensemble_percentiles(
             ens = ens.chunk({"realization": -1})
 
     if weights is None:
+        alpha, beta = _quantile_params[method]
+
         out = xr.apply_ufunc(
             calc_perc,
             ens,
             input_core_dims=[["realization"]],
             output_core_dims=[["percentiles"]],
             keep_attrs=True,
-            kwargs=dict(
-                percentiles=values,
-            ),
+            kwargs=dict(percentiles=values, alpha=alpha, beta=beta),
             dask="parallelized",
             output_dtypes=[ens.dtype],
             dask_gufunc_kwargs=dict(output_sizes={"percentiles": len(values)}),
         )
     else:
+        if method != "linear":
+            raise ValueError(
+                "Only the 'linear' method is supported when using weights."
+            )
+
         with xr.set_options(keep_attrs=True):
             # xclim's calc_perc does not support weighted arrays, so xarray's native function is used instead.
             qt = np.array(values) / 100  # xarray requires values between 0 and 1
@@ -350,7 +378,7 @@ def ensemble_percentiles(
             out = out.rename(name_dict={p: f"{ens.name}_p{int(p):02d}"})
 
     out.attrs["history"] = update_history(
-        f"Computation of the percentiles on {ens.realization.size} ensemble members.",
+        f"Computation of the percentiles on {ens.realization.size} ensemble members using method {method}.",
         ens,
     )
 

From 8928bf0e06066aed3a4b71f5c76157304c94e6ce Mon Sep 17 00:00:00 2001
From: David Huard <huard.david@ouranos.ca>
Date: Mon, 29 Apr 2024 14:34:23 -0400
Subject: [PATCH 2/2] Use Literal to list allowed percentile methods

---
 CHANGES.rst              |  9 +++------
 xclim/ensembles/_base.py | 23 +++++++++++------------
 2 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index e796b48ae..17b0bfd16 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -13,18 +13,15 @@ Announcements
 New features and enhancements
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 * Indicator ``xclim.atmos.potential_evapotranspiration`` and indice ``xclim.indices.potential_evapotranspiration`` now accept a new value (`DA02`) for argument `method` implementing potential evapotranspiration based on Droogers and Allen (2002). (:issue:`1710`, :pull:`1723`).
+* `ensemble_percentiles` now takes a `method` argument, accepting
+  'interpolated_inverted_cdf', 'hazen', 'weibull', 'linear'  (default),
+  'median_unbiased' and 'normal_unbiased'. (:issue:`1694`, :pull:`1732`).
 
 New indicators
 ^^^^^^^^^^^^^^
 * New ``snw_season_length`` and ``snd_season_length`` computing the duration between the start and the end of the snow season, both defined as the first day of a continuous period with snow above/under a threshold. Previous versions of these indicators were renamed ``snw_days_above`` and ``snd_days_above`` to better reflect what they computed : the number of days with snow above a given threshold (with no notion of continuity). (:issue:`1703`, :pull:`1708`).
 * Added ``atmos.duff_moisture_code``, part of the Canadian Forest Fire Weather Index System. It was already an output of the `atmos.cffwis_indices`, but now has its own standalone indicator. (:issue:`1698`, :pull:`1712`).
 
-New features and enhancements
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-* `ensemble_percentiles` now takes a `method` argument, accepting
-  'interpolated_inverted_cdf', 'hazen', 'weibull', 'linear'  (default),
-  'median_unbiased' and 'normal_unbiased'. (:issue:`1694`, :pull:`1732`).
-
 Breaking changes
 ^^^^^^^^^^^^^^^^
 * The previously deprecated functions ``xclim.sdba.processing.construct_moving_yearly_window`` and ``xclim.sdba.processing.unpack_moving_yearly_window`` have been removed. These functions have been replaced by ``xclim.core.calendar.stack_periods`` and ``xclim.core.calendar.unstack_periods``. (:pull:`1717`).
diff --git a/xclim/ensembles/_base.py b/xclim/ensembles/_base.py
index cf82e6364..2b9a5987f 100644
--- a/xclim/ensembles/_base.py
+++ b/xclim/ensembles/_base.py
@@ -8,7 +8,7 @@
 from collections.abc import Sequence
 from glob import glob
 from pathlib import Path
-from typing import Any
+from typing import Any, Literal
 
 import numpy as np
 import xarray as xr
@@ -217,7 +217,14 @@ def ensemble_percentiles(
     min_members: int | None = 1,
     weights: xr.DataArray | None = None,
     split: bool = True,
-    method: str = "linear",
+    method: Literal[
+        "linear",
+        "interpolated_inverted_cdf",
+        "hazen",
+        "weibull",
+        "median_unbiased",
+        "normal_unbiased",
+    ] = "linear",
 ) -> xr.DataArray | xr.Dataset:
     """Calculate ensemble statistics between a results from an ensemble of climate simulations.
 
@@ -246,16 +253,8 @@ def ensemble_percentiles(
     split : bool
         Whether to split each percentile into a new variable
         or concatenate the output along a new "percentiles" dimension.
-    method : str, optional
-        This parameter specifies the method to use for estimating the percentile, see the `numpy.percentile`
-        documentation for more information. The following methods are supported
-
-          - 'interpolated_inverted_cdf'
-          - 'hazen'
-          - 'weibull'
-          - 'linear'  (default)
-          - 'median_unbiased'
-          - 'normal_unbiased'
+    method : {"linear", "interpolated_inverted_cdf", "hazen", "weibull", "median_unbiased", "normal_unbiased"}
+        Method to use for estimating the percentile, see the `numpy.percentile` documentation for more information.
 
     Returns
     -------