Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pandas 2.2 and xarray 2023.11 #1614

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ New features and enhancements

Breaking changes
^^^^^^^^^^^^^^^^
* With pandas 2.2 and xarray 2023.11.0, many frequency strings where changed : Y becomes YE, M -> ME, Q -> QE. A and AS are removed. T, L, U, N become min, ms, us and ns.
* `bump2version` has been replaced with `bump-my-version` to bump the version number using configurations set in the `pyproject.toml` file. (:issue:`1557`, :pull:`1569`).
* `xclim`'s units registry and units formatting are now extended from `cf-xarray`. The exponent sign "^" is now never added in the ``units`` attribute. For example, square meters are given as "m2" instead of "m^2" by xclim, both are still accepted as input. (:issue:`1010`, :pull:`1590`).
* `yamale` is now listed as a core dependency (was previously listed in the `dev` installation recipe). (:issue:`1595`, :pull:`1596`).
Expand Down
4 changes: 2 additions & 2 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@ dependencies:
- lmoments3
- numba
- numpy >=1.16
- pandas >=0.23,<2.2
- pandas >=2.2
- pint >=0.9
- poppler >=0.67
- pyyaml
- scikit-learn >=0.21.3
- scipy >=1.2
- statsmodels
- xarray >=2022.06.0,<2023.11.0
- xarray >=2023.11.0
- yamale
# Extras
- eofs
Expand Down
5 changes: 2 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,13 @@ dependencies = [
"lmoments3>=1.0.5",
"numba",
"numpy>=1.16",
"pandas>=0.23,<2.0; python_version == '3.8'",
"pandas>=0.23,<2.2; python_version >= '3.9'",
"pandas>=2.2; python_version >= '3.9'",
"pint>=0.10",
"pyyaml",
"scikit-learn>=0.21.3",
"scipy>=1.2",
"statsmodels",
"xarray>=2022.06.0,<2023.11.0",
"xarray>=2023.11.0",
"yamale"
]

Expand Down
2 changes: 1 addition & 1 deletion tests/test_atmos.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def test_wind_power_potential_from_3h_series():
from xclim.testing.helpers import test_timeseries

w = test_timeseries(
np.ones(96) * 15, variable="sfcWind", start="7/1/2000", units="m s-1", freq="3H"
np.ones(96) * 15, variable="sfcWind", start="7/1/2000", units="m s-1", freq="3h"
)
out = atmos.wind_power_potential(wind_speed=w)

Expand Down
6 changes: 3 additions & 3 deletions tests/test_bootstrapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ class Test_bootstrap:
"var,p,index,freq, cftime",
(
["tas", 98, tg90p, "MS", False],
["tasmin", 98, tn90p, "A-JUL", False],
["tasmax", 98, tx90p, "Q-APR", False],
["tasmax", 98, tx90p, "Q-APR", True],
["tasmin", 98, tn90p, "YE-JUL", False],
["tasmax", 98, tx90p, "QE-APR", False],
["tasmax", 98, tx90p, "QE-APR", True],
["tasmin", 2, tn10p, "MS", False],
["tasmax", 2, tx10p, "YS", False],
["tasmax", 2, tx10p, "YS", True],
Expand Down
50 changes: 25 additions & 25 deletions tests/test_calendar.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def da(index):
)


@pytest.mark.parametrize("freq", ["6480H", "302431T", "23144781S"])
@pytest.mark.parametrize("freq", ["6480h", "302431min", "23144781s"])
def test_time_bnds(freq, datetime_index, cftime_index):
da_datetime = da(datetime_index).resample(time=freq)
da_cftime = da(cftime_index).resample(time=freq)
Expand Down Expand Up @@ -91,11 +91,11 @@ def test_time_bnds_irregular(typ):
start = xr.cftime_range("1990-01-01", periods=24, freq="MS")
# Well. xarray string parsers do not support sub-second resolution, but cftime does.
end = xr.cftime_range(
"1990-01-01T23:59:59", periods=24, freq="M"
"1990-01-01T23:59:59", periods=24, freq="ME"
) + pd.Timedelta(0.999999, "s")
elif typ == "pd":
start = pd.date_range("1990-01-01", periods=24, freq="MS")
end = pd.date_range("1990-01-01 23:59:59.999999999", periods=24, freq="M")
end = pd.date_range("1990-01-01 23:59:59.999999999", periods=24, freq="ME")

time = start + (end - start) / 2

Expand Down Expand Up @@ -147,7 +147,7 @@ def test_percentile_doy_invalid():
tas = xr.DataArray(
[0, 1],
dims=("time",),
coords={"time": pd.date_range("2000-01-01", periods=2, freq="H")},
coords={"time": pd.date_range("2000-01-01", periods=2, freq="h")},
)
with pytest.raises(ValueError):
percentile_doy(tas)
Expand All @@ -156,10 +156,10 @@ def test_percentile_doy_invalid():
@pytest.mark.parametrize(
"freqA,op,freqB,exp",
[
("D", ">", "H", True),
("D", ">", "h", True),
("2YS", "<=", "QS-DEC", False),
("4W", "==", "3W", False),
("24H", "==", "D", True),
("24h", "==", "D", True),
],
)
def test_compare_offsets(freqA, op, freqB, exp):
Expand Down Expand Up @@ -276,8 +276,8 @@ def test_get_calendar_errors(obj):
("standard", "noleap", True, "D"),
("noleap", "default", True, "D"),
("noleap", "all_leap", False, "D"),
("proleptic_gregorian", "noleap", False, "4H"),
("default", "noleap", True, "4H"),
("proleptic_gregorian", "noleap", False, "4h"),
("default", "noleap", True, "4h"),
],
)
def test_convert_calendar(source, target, target_as_str, freq):
Expand Down Expand Up @@ -312,7 +312,7 @@ def test_convert_calendar(source, target, target_as_str, freq):
[
("standard", "360_day", "D"),
("360_day", "default", "D"),
("proleptic_gregorian", "360_day", "4H"),
("proleptic_gregorian", "360_day", "4h"),
],
)
@pytest.mark.parametrize("align_on", ["date", "year"])
Expand All @@ -332,17 +332,17 @@ def test_convert_calendar_360_days(source, target, freq, align_on):

if align_on == "date":
np.testing.assert_array_equal(
conv.time.resample(time="M").last().dt.day,
conv.time.resample(time="ME").last().dt.day,
[30, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30],
)
elif target == "360_day":
np.testing.assert_array_equal(
conv.time.resample(time="M").last().dt.day,
conv.time.resample(time="ME").last().dt.day,
[30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 29],
)
else:
np.testing.assert_array_equal(
conv.time.resample(time="M").last().dt.day,
conv.time.resample(time="ME").last().dt.day,
[30, 29, 30, 30, 31, 30, 30, 31, 30, 31, 29, 31],
)
if source == "360_day" and align_on == "year":
Expand All @@ -357,7 +357,7 @@ def test_convert_calendar_360_days_random():
dims=("time",),
coords={
"time": date_range(
"2004-01-01", "2004-12-31T23:59:59", freq="12H", calendar="default"
"2004-01-01", "2004-12-31T23:59:59", freq="12h", calendar="default"
)
},
)
Expand All @@ -366,7 +366,7 @@ def test_convert_calendar_360_days_random():
dims=("time",),
coords={
"time": date_range(
"2004-01-01", "2004-12-30T23:59:59", freq="12H", calendar="360_day"
"2004-01-01", "2004-12-30T23:59:59", freq="12h", calendar="360_day"
)
},
)
Expand Down Expand Up @@ -395,8 +395,8 @@ def test_convert_calendar_360_days_random():
"source,target,freq",
[
("standard", "noleap", "D"),
("noleap", "default", "4H"),
("noleap", "all_leap", "M"),
("noleap", "default", "4h"),
("noleap", "all_leap", "ME"),
("360_day", "noleap", "D"),
("noleap", "360_day", "D"),
],
Expand Down Expand Up @@ -556,7 +556,7 @@ def test_clim_mean_doy(tas_series):

def test_doy_to_days_since():
# simple test
time = date_range("2020-07-01", "2022-07-01", freq="AS-JUL")
time = date_range("2020-07-01", "2022-07-01", freq="YS-JUL")
da = xr.DataArray(
[190, 360, 3],
dims=("time",),
Expand Down Expand Up @@ -587,7 +587,7 @@ def test_doy_to_days_since():
xr.testing.assert_identical(da, da2)

# with start
time = date_range("2020-12-31", "2022-12-31", freq="Y")
time = date_range("2020-12-31", "2022-12-31", freq="YE")
da = xr.DataArray(
[190, 360, 3],
dims=("time",),
Expand Down Expand Up @@ -624,10 +624,10 @@ def test_doy_to_days_since():
@pytest.mark.parametrize(
"freq,em,eb,es,ea",
[
("4AS-JUL", 4, "A", True, "JUL"),
("M", 1, "M", False, None),
("YS", 1, "A", True, "JAN"),
("3A", 3, "A", False, "DEC"),
("4YS-JUL", 4, "Y", True, "JUL"),
("ME", 1, "M", False, None),
("YS", 1, "Y", True, "JAN"),
("3YE", 3, "Y", False, "DEC"),
("D", 1, "D", True, None),
("3W", 21, "D", True, None),
],
Expand All @@ -649,8 +649,8 @@ def test_parse_offset_invalid():
@pytest.mark.parametrize(
"m,b,s,a,exp",
[
(1, "A", True, None, "AS-JAN"),
(2, "Q", False, "DEC", "2Q-DEC"),
(1, "Y", True, None, "YS-JAN"),
(2, "Q", False, "DEC", "2QE-DEC"),
(1, "D", False, None, "D"),
],
)
Expand Down Expand Up @@ -694,7 +694,7 @@ def test_convert_doy():
dims=("time",),
coords={
"time": xr.date_range(
"2000-01-01", periods=5, freq="AS-JUL", calendar="standard"
"2000-01-01", periods=5, freq="YS-JUL", calendar="standard"
)
},
attrs={"is_dayofyear": 1, "calendar": "standard"},
Expand Down
30 changes: 15 additions & 15 deletions tests/test_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,15 +108,15 @@ def test_assert_daily(self, date_range):
def test_bad_frequency(self, date_range):
with pytest.raises(ValidationError):
n = 365
times = date_range("2000-01-01", freq="12H", periods=n)
times = date_range("2000-01-01", freq="12h", periods=n)
da = xr.DataArray(np.arange(n), [("time", times)], attrs=self.tas_attrs)
tg_mean(da)

# Decreasing index
def test_decreasing_index(self, date_range):
with pytest.raises(ValidationError):
n = 365
times = date_range("2000-01-01", freq="12H", periods=n)
times = date_range("2000-01-01", freq="12h", periods=n)
da = xr.DataArray(
np.arange(n), [("time", times[::-1])], attrs=self.tas_attrs
)
Expand Down Expand Up @@ -149,25 +149,25 @@ def test_check_hourly(self, date_range, random):
}

n = 100
time = date_range("2000-01-01", freq="H", periods=n)
time = date_range("2000-01-01", freq="h", periods=n)
da = xr.DataArray(random.random(n), [("time", time)], attrs=tas_attrs)
datachecks.check_freq(da, "H")
datachecks.check_freq(da, "h")

time = date_range("2000-01-01", freq="3H", periods=n)
time = date_range("2000-01-01", freq="3h", periods=n)
da = xr.DataArray(random.random(n), [("time", time)], attrs=tas_attrs)
with pytest.raises(ValidationError):
datachecks.check_freq(da, "H")
datachecks.check_freq(da, "h")

with pytest.raises(ValidationError):
datachecks.check_freq(da, ["H", "D"])
datachecks.check_freq(da, ["h", "D"])

datachecks.check_freq(da, "H", strict=False)
datachecks.check_freq(da, ["H", "D"], strict=False)
datachecks.check_freq(da, "3H")
datachecks.check_freq(da, ["H", "3H"])
datachecks.check_freq(da, "h", strict=False)
datachecks.check_freq(da, ["h", "D"], strict=False)
datachecks.check_freq(da, "3h")
datachecks.check_freq(da, ["h", "3h"])

with pytest.raises(ValidationError, match="Unable to infer the frequency of"):
datachecks.check_freq(da.where(da.time.dt.dayofyear != 5, drop=True), "3H")
datachecks.check_freq(da.where(da.time.dt.dayofyear != 5, drop=True), "3h")

def test_common_time(self, tas_series, date_range, random):
tas_attrs = {
Expand All @@ -176,7 +176,7 @@ def test_common_time(self, tas_series, date_range, random):
}

n = 100
time = date_range("2000-01-01", freq="H", periods=n)
time = date_range("2000-01-01", freq="h", periods=n)
da = xr.DataArray(random.random(n), [("time", time)], attrs=tas_attrs)

# No freq
Expand All @@ -187,7 +187,7 @@ def test_common_time(self, tas_series, date_range, random):
datachecks.check_common_time([db, da])

# Not same freq
time = date_range("2000-01-01", freq="6H", periods=n)
time = date_range("2000-01-01", freq="6h", periods=n)
db = xr.DataArray(random.random(n), [("time", time)], attrs=tas_attrs)
with pytest.raises(ValidationError, match="Inputs have different frequencies"):
datachecks.check_common_time([db, da])
Expand All @@ -197,6 +197,6 @@ def test_common_time(self, tas_series, date_range, random):
db["time"] = db.time + pd.Timedelta(30, "min")
with pytest.raises(
ValidationError,
match=r"All inputs have the same frequency \(H\), but they are not anchored on the same minutes",
match=r"All inputs have the same frequency \(h\), but they are not anchored on the same minutes",
):
datachecks.check_common_time([db, da])
2 changes: 1 addition & 1 deletion tests/test_ensembles.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def test_create_unequal_times(self, ensemble_dataset_objects, open_dataset):
[(xr.cftime_range, {"calendar": "360_day"}), (pd.date_range, {})],
)
def test_create_unaligned_times(self, timegen, calkw):
t1 = timegen("2000-01-01", periods=24, freq="M", **calkw)
t1 = timegen("2000-01-01", periods=24, freq="ME", **calkw)
t2 = timegen("2000-01-01", periods=24, freq="MS", **calkw)

d1 = xr.DataArray(
Expand Down
2 changes: 1 addition & 1 deletion tests/test_ffdi.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def test_ffdi_indicators(self, open_dataset, init_kbdi, limiting_func):
# outputs look sensible
test_data = open_dataset(data_url)

pr_annual = test_data["pr"].resample(time="A").mean().mean("time")
pr_annual = test_data["pr"].resample(time="YS").mean().mean("time")
pr_annual.attrs["units"] = test_data["pr"].attrs["units"]

if init_kbdi:
Expand Down
6 changes: 3 additions & 3 deletions tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def test_season_default(self, q_series):

def test_season(self, q_series):
q = q_series(np.arange(1000))
o = generic.select_resample_op(q, "count", freq="AS-DEC", season="DJF")
o = generic.select_resample_op(q, "count", freq="YS-DEC", season="DJF")
assert o[0] == 31 + 29


Expand Down Expand Up @@ -97,7 +97,7 @@ def test_calendars(self):
)

out = generic.aggregate_between_dates(
data_std, start_std, end_std, op="sum", freq="AS-JUL"
data_std, start_std, end_std, op="sum", freq="YS-JUL"
)

# expected output
Expand All @@ -110,7 +110,7 @@ def test_calendars(self):

# check calendar conversion
out_noleap = generic.aggregate_between_dates(
data_std, start_std, end_noleap, op="sum", freq="AS-JUL"
data_std, start_std, end_noleap, op="sum", freq="YS-JUL"
)

np.testing.assert_allclose(out, out_noleap)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_generic_indicators.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def test_missing(self, ndq_series):
np.testing.assert_array_equal(out.sel(time="1902").isnull(), True)

def test_3hourly(self, pr_hr_series, random):
pr = pr_hr_series(random.random(366 * 24)).resample(time="3H").mean()
pr = pr_hr_series(random.random(366 * 24)).resample(time="3h").mean()
out = generic.stats(pr, freq="MS", op="var")
assert out.units == "kg2 m-4 s-2"
assert out.long_name == "Variance of variable"
2 changes: 1 addition & 1 deletion tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def test_day_lengths(method):


def test_cosine_of_solar_zenith_angle():
time = xr.date_range("1900-01-01T00:30", "1900-01-03", freq="H")
time = xr.date_range("1900-01-01T00:30", "1900-01-03", freq="h")
time = xr.DataArray(time, dims=("time",), coords={"time": time}, name="time")
lat = xr.DataArray(
[0, 45, 70], dims=("site",), name="lat", attrs={"units": "degree_north"}
Expand Down
2 changes: 1 addition & 1 deletion tests/test_indicators.py
Original file line number Diff line number Diff line change
Expand Up @@ -816,7 +816,7 @@ def test_resampling_indicator_with_indexing(tas_series):
np.testing.assert_allclose(out, [28, 29])

out = xclim.atmos.tx_days_above(
tas, thresh="0 degC", freq="AS-JUL", doy_bounds=(1, 50)
tas, thresh="0 degC", freq="YS-JUL", doy_bounds=(1, 50)
)
np.testing.assert_allclose(out, [50, 50, np.NaN])

Expand Down
Loading