Skip to content

Commit

Permalink
Merge branch 'master' into prepare_v0390
Browse files Browse the repository at this point in the history
  • Loading branch information
aulemahal authored Nov 1, 2022
2 parents 12ebbfb + 932fc7f commit 9830e13
Show file tree
Hide file tree
Showing 10 changed files with 312 additions and 116 deletions.
4 changes: 4 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ New features and enhancements
* Log-logistic distribution added to `stats.py` for use with ``standardized_precipitation_index`` and ``standardized_precipitation_evapotranspiration_index``. (:issue:`1141`, :pull:`1183`).
* New option in many indices allowing for resampling in given periods after ``run_length`` operations. (:issue:`505`, :issue:`916`, :issue:`917`, :pull:`1161`).
* New base indicator class for sdba: ``StatisticalPropertyMeasure``, those measures that also reduce the time (as a property does). (:pull:`1198`).
* ``xclim.core.calendar.common_calendar`` to find the best calendar to use when uniformizing an heterogeneous collection of data. (:pull:`1217`).
* ``xclim.ensembles.create_ensemble`` now accepts ``calendar=None``, and uses the above function to guess the best one. It also now accepts ``cal_kwargs`` to fine tune the calendar conversion. (:issue:`1190`, :pull:`1217`).
* New data check : ``xclim.core.datachecks.check_common_time`` that ensures all inputs of multivariate indicators have the same frequency (and the same time anchoring for daily and hourly data). (:issue:`1111`, :pull:`1217`).

New indicators
^^^^^^^^^^^^^^
Expand Down Expand Up @@ -41,6 +44,7 @@ Breaking changes
* The ``freshet_start`` indice is now deprecated in favour of ``first_day_temperature_above`` with `thresh='0 degC', window=5`. The `freshet_start` indicator is now based on ``first_day_temperature_above``, but is otherwise unaffected. (:issue:`1195`, :pull:`1196`).
* Call signatures for several indices/indicators have been modified to optionally accept `op` for manually setting threshold comparison operators (:issue:`1194`, :pull:`1197`). The affected indices and indicators as follows:
- ``hot_spell_max_length``, ``hot_spell_frequency``, ``cold_spell_days``, ``cold_spell_frequency``, ``heat_wave_index``, ``warm_day_frequency`` (indice only), ``warm_night_frequency`` (indice only), ``dry_days``, ``wetdays``, ``wetdays_prop``.
* Cleaner ``xclim.core.calendar.parse_offset`` : fails on invalid frequencies, return implicit anchors (YS -> JAN, Y -> DEC) and implicit ``is_start_anchored`` (D -> True). (:issue:`1213`, , :pull:`1217`).

Bug fixes
^^^^^^^^^
Expand Down
Empty file modified setup.py
100755 → 100644
Empty file.
90 changes: 76 additions & 14 deletions xclim/core/calendar.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
from __future__ import annotations

import datetime as pydt
import re
from typing import Any, NewType, Sequence
from typing import Any, Sequence

import cftime
import numpy as np
Expand Down Expand Up @@ -146,6 +145,55 @@ def get_calendar(obj: Any, dim: str = "time") -> str:
raise ValueError(f"Calendar could not be inferred from object of type {type(obj)}.")


def common_calendar(calendars: Sequence[str], join="outer") -> str:
"""Return a calendar common to all calendars from a list.
Uses the hierarchy: 360_day < noleap < standard < all_leap.
Returns "default" only if all calendars are "default."
Parameters
----------
calendars: Sequence of string
List of calendar names.
join : {'inner', 'outer'}
The criterion for the common calendar.
- 'outer': the common calendar is the smallest calendar (in number of days by year)
that will include all the dates of the other calendars. When converting
the data to this calendar, no timeseries will lose elements, but some
might be missing (gaps or NaNs in the series).
- 'inner': the common calender is the smallest calendar of the list. When converting
the data to this calendar, no timeseries will have missing elements (no gaps or NaNs),
but some might be dropped.
Examples
--------
>>> common_calendar(["360_day", "noleap", "default"], join="outer")
'standard'
>>> common_calendar(["360_day", "noleap", "default"], join="inner")
'360_day'
"""
if all(cal == "default" for cal in calendars):
return "default"

trans = {
"proleptic_gregorian": "standard",
"gregorian": "standard",
"default": "standard",
"366_day": "all_leap",
"365_day": "noleap",
"julian": "standard",
}
ranks = {"360_day": 0, "noleap": 1, "standard": 2, "all_leap": 3}
calendars = sorted([trans.get(cal, cal) for cal in calendars], key=ranks.get)

if join == "outer":
return calendars[-1]
if join == "inner":
return calendars[0]
raise NotImplementedError(f"Unknown join criterion `{join}`.")


def convert_calendar(
source: xr.DataArray | xr.Dataset,
target: xr.DataArray | str,
Expand Down Expand Up @@ -613,19 +661,28 @@ def parse_offset(freq: str) -> Sequence[str]:
Returns
-------
multiplier (int), offset base (str), is start anchored (bool), anchor (str or None)
"[n]W" is always replaced with "[7n]D", as xarray doesn't support "W" for cftime indexes.
"Y" is always replaced with "A".
multiplier : int
Multiplier of the base frequency. "[n]W" is always replaced with "[7n]D", as xarray doesn't support "W" for cftime indexes.
offset_base : str
Base frequency. "Y" is always replaced with "A".
is_start_anchored : bool
Whether coordinates of this frequency should correspond to the beginning of the period (`True`) or its end (`False`).
Can only be False when base is A, Q or M.
anchor : str or None
Anchor date for bases A or Q. As xarray doesn't support "W", neither does xclim (anchor information is lost when given).
"""
patt = r"(\d*)(\w)(S)?(?:-(\w{2,3}))?"
mult, base, start, anchor = re.search(patt, freq).groups()
mult = int(mult or "1")
base = base.replace("Y", "A")
# Useful to raise on invalid freqs, convert Y to A and get default anchor (A, Q)
offset = pd.tseries.frequencies.to_offset(freq)
base, *anchor = offset.name.split("-")
anchor = anchor[0] if len(anchor) > 0 else None
start = ("S" in base) or (base[0] not in "AQM")
base = base[0]
mult = offset.n
if base == "W":
mult = 7 * mult
base = "D"
anchor = ""
return mult, base, start == "S", anchor
anchor = None
return mult, base, start, anchor


def construct_offset(mult: int, base: str, start_anchored: bool, anchor: str | None):
Expand All @@ -638,9 +695,9 @@ def construct_offset(mult: int, base: str, start_anchored: bool, anchor: str | N
base : str
The base period string (one char).
start_anchored: bool
If True, adds the "S" flag.
If True and base in [Y, A, Q, M], adds the "S" flag.
anchor: str, optional
The month anchor of the offset.
The month anchor of the offset. Defaults to JAN for bases AS, Y and QS and to DEC for bases A and Q.
Returns
-------
Expand All @@ -651,7 +708,12 @@ def construct_offset(mult: int, base: str, start_anchored: bool, anchor: str | N
-----
This provides the mirror opposite functionality of :py:func:`parse_offset`.
"""
return f"{mult if mult > 1 else ''}{base}{'S' if start_anchored else ''}{'-' if anchor else ''}{anchor or ''}"
start = "S" if start_anchored and base in "YAQM" else ""
if anchor is None and base in "AQY":
anchor = "JAN" if start_anchored else "DEC"
return (
f"{mult if mult > 1 else ''}{base}{start}{'-' if anchor else ''}{anchor or ''}"
)


def _interpolate_doy_calendar(
Expand Down
49 changes: 49 additions & 0 deletions xclim/core/datachecks.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ def check_freq(var: xr.DataArray, freq: str | Sequence[str], strict: bool = True
strict : bool
Whether multiples of the frequencies are considered invalid or not. With `strict` set to False, a '3H' series
will not raise an error if freq is set to 'H'.
Raises
------
ValidationError
- If the frequency of `var` is not inferrable.
- If the frequency of `var` does not match the requested `freq`.
"""
if isinstance(freq, str):
freq = [freq]
Expand Down Expand Up @@ -59,3 +65,46 @@ def check_daily(var: xr.DataArray):
This does not check for gaps in series.
"""
return check_freq(var, "D")


@datacheck
def check_common_time(inputs: Sequence[xr.DataArray]):
"""Raise an error if the list of inputs doesn't have a single common frequency.
Raises
------
ValidationError
- if the frequency of any input can't be inferred
- if inputs have different frequencies
- if inputs have a daily or hourly frequency, but they are not given at the same time of day.
Parameters
----------
inputs : Sequence of xr.DataArray
Input arrays.
"""
# Check all have the same freq
freqs = [xr.infer_freq(da.time) for da in inputs]
if None in freqs:
raise ValidationError(
"Unable to infer the frequency of the time series. "
"To mute this, set xclim's option data_validation='log'."
)
if len(set(freqs)) != 1:
raise ValidationError(
f"Inputs have different frequencies. Got : {freqs}."
"To mute this, set xclim's option data_validation='log'."
)

# Check if anchor is the same
freq = freqs[0]
base = parse_offset(freq)[1]
fmt = {"H": ":%M", "D": "%H:%M"}
if base in fmt:
outs = {da.indexes["time"][0].strftime(fmt[base]) for da in inputs}
if len(outs) > 1:
raise ValidationError(
f"All inputs have the same frequency ({freq}), but they are not anchored on the same minutes (got {outs}). "
f"xarray's alignment would silently fail. You can try to fix this with `da.resample('{freq}').mean()`."
"To mute this, set xclim's option data_validation='log'."
)
9 changes: 9 additions & 0 deletions xclim/core/indicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -1273,12 +1273,21 @@ def datacheck(self, **das):
* assert no temperature has the same value 5 days in a row
This base datacheck checks that the input data has a valid sampling frequency, as given in self.src_freq.
If there are multiple inputs, it also checks if they all have the same frequency and the same anchor.
"""
if self.src_freq is not None:
for key, da in das.items():
if "time" in da.coords and da.time.ndim == 1 and len(da.time) > 3:
datachecks.check_freq(da, self.src_freq, strict=True)

datachecks.check_common_time(
[
da
for da in das.values()
if "time" in da.coords and da.time.ndim == 1 and len(da.time) > 3
]
)

def __getattr__(self, attr):
"""Return the attribute."""
if attr in self._cf_names:
Expand Down
1 change: 0 additions & 1 deletion xclim/core/units.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
from .utils import ValidationError

__all__ = [
"ValidationError",
"amount2rate",
"check_units",
"convert_units_to",
Expand Down
63 changes: 37 additions & 26 deletions xclim/ensembles/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import numpy as np
import xarray as xr

from xclim.core.calendar import convert_calendar, get_calendar
from xclim.core.calendar import common_calendar, convert_calendar, get_calendar
from xclim.core.formatting import update_history
from xclim.core.utils import calc_perc

Expand All @@ -21,8 +21,9 @@ def create_ensemble(
datasets: Any,
mf_flag: bool = False,
resample_freq: str | None = None,
calendar: str = "default",
calendar: str | None = None,
realizations: Sequence[Any] | None = None,
cal_kwargs: dict | None = None,
**xr_kwargs,
) -> xr.Dataset:
"""Create an xarray dataset of an ensemble of climate simulation from a list of netcdf files.
Expand All @@ -37,27 +38,29 @@ def create_ensemble(
Parameters
----------
datasets : list or dict or string
List of netcdf file paths or xarray Dataset/DataArray objects . If mf_flag is True, ncfiles should be a list of
lists where each sublist contains input .nc files of an xarray multifile Dataset.
If DataArray objects are passed, they should have a name in order to be transformed into Datasets.
A dictionary can be passed instead of a list, in which case the keys are used as coordinates along the new
`realization` axis.
If a string is passed, it is assumed to be a glob pattern for finding datasets.
List of netcdf file paths or xarray Dataset/DataArray objects . If mf_flag is True, ncfiles should be a list of
lists where each sublist contains input .nc files of an xarray multifile Dataset.
If DataArray objects are passed, they should have a name in order to be transformed into Datasets.
A dictionary can be passed instead of a list, in which case the keys are used as coordinates along the new
`realization` axis.
If a string is passed, it is assumed to be a glob pattern for finding datasets.
mf_flag : bool
If True, climate simulations are treated as xarray multifile Datasets before concatenation.
Only applicable when "datasets" is sequence of list of file paths.
If True, climate simulations are treated as xarray multifile Datasets before concatenation.
Only applicable when "datasets" is sequence of list of file paths.
resample_freq : Optional[str]
If the members of the ensemble have the same frequency but not the same offset, they cannot be properly aligned.
If resample_freq is set, the time coordinate of each member will be modified to fit this frequency.
calendar : str
The calendar of the time coordinate of the ensemble.
For conversions involving '360_day', the align_on='date' option is used.
See :py:func:`xclim.core.calendar.convert_calendar`.
'default' is the standard calendar using np.datetime64 objects.
If the members of the ensemble have the same frequency but not the same offset, they cannot be properly aligned.
If resample_freq is set, the time coordinate of each member will be modified to fit this frequency.
calendar : str, optional
The calendar of the time coordinate of the ensemble.
By default, the smallest common calendar is chosen. For example, a mixed input of "noleap" and "360_day" will default to "noleap".
'default' is the standard calendar using np.datetime64 objects (xarray's "standard" with `use_cftime=False`).
realizations: sequence, optional
The coordinate values for the new `realization` axis.
If None (default), the new axis has a simple integer coordinate.
This argument shouldn't be used if `datasets` is a glob pattern as the dataset order is random.
cal_kwargs : dict, optional
Additionnal arguments to pass to py:func:`xclim.core.calendar.convert_calendar`.
For conversions involving '360_day', the align_on='date' option is used by default.
**xr_kwargs
Any keyword arguments to be given to `xr.open_dataset` when opening the files
(or to `xr.open_mfdataset` if mf_flag is True)
Expand Down Expand Up @@ -101,7 +104,12 @@ def create_ensemble(
)

ds = _ens_align_datasets(
datasets, mf_flag, resample_freq, calendar=calendar, **xr_kwargs
datasets,
mf_flag,
resample_freq,
calendar=calendar,
cal_kwargs=cal_kwargs or {},
**xr_kwargs,
)

if realizations is None:
Expand Down Expand Up @@ -329,6 +337,7 @@ def _ens_align_datasets(
mf_flag: bool = False,
resample_freq: str | None = None,
calendar: str = "default",
cal_kwargs: dict | None = None,
**xr_kwargs,
) -> list[xr.Dataset]:
"""Create a list of aligned xarray Datasets for ensemble Dataset creation.
Expand Down Expand Up @@ -365,6 +374,7 @@ def _ens_align_datasets(
datasets = glob(datasets)

ds_all = []
calendars = []
for i, n in enumerate(datasets):
if mf_flag:
ds = xr.open_mfdataset(n, combine="by_coords", **xr_kwargs)
Expand All @@ -390,14 +400,15 @@ def _ens_align_datasets(
time = counts.time

ds["time"] = time

cal = get_calendar(time)
ds = convert_calendar(
ds,
calendar,
align_on="date" if "360_day" in [cal, calendar] else None,
)
calendars.append(get_calendar(time))

ds_all.append(ds)

return ds_all
if not calendars:
# no time
return ds_all

if calendar is None:
calendar = common_calendar(calendars, join="outer")
cal_kwargs.setdefault("align_on", "date")
return [convert_calendar(ds, calendar, **cal_kwargs) for ds in ds_all]
Loading

0 comments on commit 9830e13

Please sign in to comment.