From 9d530d99b449b294e6b4c6e99c7fdd30c5c4012f Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 30 Oct 2024 18:04:26 -0400 Subject: [PATCH 1/2] Fix indexer injection --- CHANGELOG.rst | 1 + docs/notebooks/example/example.yml | 5 ++- docs/notebooks/extendxclim.ipynb | 69 ++++++++++++++++++++++-------- tests/test_modules.py | 6 +++ xclim/core/indicator.py | 46 +++++++++++++------- 5 files changed, 91 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a20bd834e..c80fba2a4 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -15,6 +15,7 @@ Bug fixes * Fixed a bug where the units could be changed before a conversion of the magnitudes could occur. Conversion of units for multivariate ``DataArray`` is now properly handled in `sdba.TrainAdjust` and `sdba.Adjust`. (:pull:`1972`). * Fixed a units formatting bug with indicators that output "delta" Celsius degrees. (:pull:`1973`). * Corrected the ``"choices"`` of parameter ``op`` in the docstring of ``frost_free_spell_max_length``. (:pull:`1977`). +* Reorganised how ``Indicator`` subclasses can added arguments to the call signature. Injecting such arguments now works. For xclim's subclasses, this bug only affected the ``indexer`` argument of indicators subclassing ``xc.core.indicator.IndexingIndicator``. (:pull:`1981`). v0.53.1 (2024-10-21) -------------------- diff --git a/docs/notebooks/example/example.yml b/docs/notebooks/example/example.yml index 6b5201b5e..30cdb575c 100644 --- a/docs/notebooks/example/example.yml +++ b/docs/notebooks/example/example.yml @@ -12,10 +12,13 @@ variables: description: Precipitation flux on the outer surface of the forest standard_name: precipitation_flux_onto_canopy indicators: - RX1day: + RX1day_summer: base: rx1day cf_attrs: long_name: Highest 1-day precipitation amount + parameters: + indexer: + month: [5, 6, 7, 8, 9] context: hydro RX5day_canopy: base: max_n_day_precipitation_amount diff --git a/docs/notebooks/extendxclim.ipynb b/docs/notebooks/extendxclim.ipynb index 8293e3ba5..f71f814cb 100644 --- a/docs/notebooks/extendxclim.ipynb +++ b/docs/notebooks/extendxclim.ipynb @@ -4,7 +4,8 @@ "cell_type": "code", "execution_count": null, "metadata": { - "nbsphinx": "hidden" + "nbsphinx": "hidden", + "tags": [] }, "outputs": [], "source": [ @@ -89,7 +90,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import xarray as xr\n", @@ -214,7 +217,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "from xclim.core.indicator import registry\n", @@ -231,7 +236,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "display(Code(tg_mean_c.__doc__, language=\"rst\"))" @@ -249,7 +256,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "tg_mean_c.__module__ == xclim.atmos.tg_mean.__module__" @@ -265,7 +274,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# Passing module\n", @@ -277,7 +288,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# Conventional class inheritance, uses the current module name\n", @@ -319,7 +332,8 @@ "cell_type": "code", "execution_count": null, "metadata": { - "nbsphinx": "hidden" + "nbsphinx": "hidden", + "tags": [] }, "outputs": [], "source": [ @@ -338,7 +352,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# These variables were generated by a hidden cell above that syntax-colored them.\n", @@ -362,7 +378,7 @@ "\n", "\n", "\n", - "- `RX1day` is simply the same as `registry['RX1DAY']`, but with an updated `long_name`.\n", + "- `RX1day` is as `registry['RX1DAY']`, but with an updated `long_name` and an injected argument : its `indexer` arg is not set to only compute over may to september.\n", "- `RX5day_canopy` is based on `registry['MAX_N_DAY_PRECIPITATION_AMOUNT']`, changed the `long_name` and injects the `window` and `freq` arguments.\n", " * It also requests a different variable than the original indicator : `prveg` instead of `pr`. As xclim doesn't know about `prveg`, a definition is given in the `variables` section.\n", "- `R75pdays` is based on `registry['DAYS_OVER_PRECIP_THRESH']`, injects the `thresh` argument and changes the description of the `per` argument.\n", @@ -397,7 +413,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "from importlib.resources import files\n", @@ -431,7 +449,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import xclim as xc\n", @@ -444,7 +464,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "docstring = f\"{example.__doc__}\\n---\\n\\n{xc.indicators.example.R99p.__doc__}\"\n", @@ -461,7 +483,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "from xclim.testing import open_dataset\n", @@ -505,7 +529,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "out = xr.merge(outs)\n", @@ -527,7 +553,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "from xclim.core.indicator import build_indicator_module, registry\n", @@ -565,7 +593,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "print(xc.indicators.awesome.__doc__)" @@ -575,7 +605,8 @@ "cell_type": "code", "execution_count": null, "metadata": { - "nbsphinx": "hidden" + "nbsphinx": "hidden", + "tags": [] }, "outputs": [], "source": [ @@ -597,7 +628,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.5" + "version": "3.12.6" }, "toc": { "base_numbering": 1, diff --git a/tests/test_modules.py b/tests/test_modules.py index d2817b37b..1acad05de 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -104,6 +104,12 @@ def test_custom_indices(open_dataset): example_path / "example.yml", name="ex4", mode="ignore" ) + # Check that indexer was added and injected correctly + assert "indexer" not in ex1.RX1day_summer.parameters + assert ex1.RX1day_summer.injected_parameters["indexer"] == { + "month": [5, 6, 7, 8, 9] + } + @pytest.mark.requires_docs def test_indicator_module_translations(): diff --git a/xclim/core/indicator.py b/xclim/core/indicator.py index 768a580c8..eda17e7c5 100644 --- a/xclim/core/indicator.py +++ b/xclim/core/indicator.py @@ -88,6 +88,8 @@ In the following, the section under `` is referred to as `data`. When creating indicators from a dictionary, with :py:meth:`Indicator.from_dict`, the input dict must follow the same structure of `data`. +Note that kwargs-like parameters like ``indexer`` must be injected as a dictionary (``param data`` above should be a dictionary). + When a module is built from a yaml file, the yaml is first validated against the schema (see xclim/data/schema.yml) using the YAMALE library (:cite:p:`lopker_yamale_2022`). See the "Extending xclim" notebook for more info. @@ -301,7 +303,7 @@ class Indicator(IndicatorRegistrar): Both are simply views of :py:attr:`xclim.core.indicator.Indicator._all_parameters`. Compared to their base `compute` function, indicators add the possibility of using dataset as input, - with the injected argument `ds` in the call signature. All arguments that were indicated + with the added argument `ds` in the call signature. All arguments that were indicated by the compute function to be variables (DataArrays) through annotations will be promoted to also accept strings that correspond to variable names in the `ds` dataset. @@ -425,12 +427,13 @@ def __new__(cls, **kwds): # noqa: C901 # title, abstract, references, notes, long_name kwds.setdefault(name, value) - # Inject parameters (subclasses can override or extend this through _injected_parameters) - for name, param in cls._injected_parameters(): + # Inject parameters + # Subclasses can override or extend this through the classmethod _added_parameters + for name, param in cls._added_parameters(): if name in parameters: raise ValueError( f"Class {cls.__name__} can't wrap indices that have a `{name}`" - " argument as it conflicts with arguments it injects." + " argument as it conflicts with an argument it adds." ) parameters[name] = param else: # inherit parameters from base class @@ -529,8 +532,13 @@ def _parse_indice(compute, passed_parameters): # noqa: F841 return parameters, docmeta @classmethod - def _injected_parameters(cls): - """Create a list of tuples for arguments to inject, (name, Parameter).""" + def _added_parameters(cls): + """Create a list of tuples for arguments to add to the call signature (name, Parameter). + + These can't be in the compute function signature, the class is in charge of removing them + from the params passed to the compute function, likely through an override of + _preprocess_and_checks. + """ return [ ( "ds", @@ -1536,16 +1544,22 @@ def _preprocess_and_checks(self, das, params): class IndexingIndicator(Indicator): """Indicator that also injects "indexer" kwargs to subset the inputs before computation.""" - def __init__(self, *args, **kwargs): - self._all_parameters["indexer"] = Parameter( - kind=InputKind.KWARGS, - description=( - "Indexing parameters to compute the indicator on a temporal " - "subset of the data. It accepts the same arguments as " - ":py:func:`xclim.indices.generic.select_time`." - ), - ) - super().__init__(*args, **kwargs) + @classmethod + def _added_parameters(cls): + """Create a list of tuples for arguments to add (name, Parameter).""" + return super()._added_parameters() + [ + ( + "indexer", + Parameter( + kind=InputKind.KWARGS, + description=( + "Indexing parameters to compute the indicator on a temporal " + "subset of the data. It accepts the same arguments as " + ":py:func:`xclim.indices.generic.select_time`." + ), + ), + ) + ] def _preprocess_and_checks(self, das: dict[str, DataArray], params: dict[str, Any]): """Perform parent's checks and also check if freq is allowed.""" From 5f2d6dc063946d90087b6084169b611ee45486bc Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 30 Oct 2024 18:12:08 -0400 Subject: [PATCH 2/2] language fixes --- docs/notebooks/extendxclim.ipynb | 2 +- xclim/core/indicator.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/notebooks/extendxclim.ipynb b/docs/notebooks/extendxclim.ipynb index f71f814cb..179bc200e 100644 --- a/docs/notebooks/extendxclim.ipynb +++ b/docs/notebooks/extendxclim.ipynb @@ -378,7 +378,7 @@ "\n", "\n", "\n", - "- `RX1day` is as `registry['RX1DAY']`, but with an updated `long_name` and an injected argument : its `indexer` arg is not set to only compute over may to september.\n", + "- `RX1day` is as `registry['RX1DAY']`, but with an updated `long_name` and an injected argument : its `indexer` arg is now set to only compute over may to september.\n", "- `RX5day_canopy` is based on `registry['MAX_N_DAY_PRECIPITATION_AMOUNT']`, changed the `long_name` and injects the `window` and `freq` arguments.\n", " * It also requests a different variable than the original indicator : `prveg` instead of `pr`. As xclim doesn't know about `prveg`, a definition is given in the `variables` section.\n", "- `R75pdays` is based on `registry['DAYS_OVER_PRECIP_THRESH']`, injects the `thresh` argument and changes the description of the `per` argument.\n", diff --git a/xclim/core/indicator.py b/xclim/core/indicator.py index eda17e7c5..1822d04c4 100644 --- a/xclim/core/indicator.py +++ b/xclim/core/indicator.py @@ -427,7 +427,7 @@ def __new__(cls, **kwds): # noqa: C901 # title, abstract, references, notes, long_name kwds.setdefault(name, value) - # Inject parameters + # Added parameters # Subclasses can override or extend this through the classmethod _added_parameters for name, param in cls._added_parameters(): if name in parameters: @@ -957,7 +957,7 @@ def _preprocess_and_checks(self, das, params): def _get_compute_args(self, das, params): """Rename variables and parameters to match the compute function's names and split VAR_KEYWORD arguments.""" # Get correct variable names for the compute function. - # Exclude param without a mapping inside the compute functions (those injected by the indicator class) + # Exclude param without a mapping inside the compute functions (those added by the indicator class) args = {} for key, p in self._all_parameters.items(): if p.compute_name is not _empty: @@ -1542,7 +1542,7 @@ def _preprocess_and_checks(self, das, params): class IndexingIndicator(Indicator): - """Indicator that also injects "indexer" kwargs to subset the inputs before computation.""" + """Indicator that also adds the "indexer" kwargs to subset the inputs before computation.""" @classmethod def _added_parameters(cls): @@ -1573,7 +1573,7 @@ def _preprocess_and_checks(self, das: dict[str, DataArray], params: dict[str, An class ResamplingIndicatorWithIndexing(ResamplingIndicator, IndexingIndicator): - """Resampling indicator that also injects "indexer" kwargs to subset the inputs before computation.""" + """Resampling indicator that also adds "indexer" kwargs to subset the inputs before computation.""" class Daily(ResamplingIndicator):