From 00f39eba8e412639094c87a62ba935f9d94b6622 Mon Sep 17 00:00:00 2001 From: Philipp A Date: Tue, 24 Oct 2023 18:45:26 +0200 Subject: [PATCH] Treat warnings as errors in tests (#1182) Co-authored-by: Isaac Virshup Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .azure-pipelines.yml | 21 +++++- anndata/_core/aligned_mapping.py | 13 +--- anndata/_core/anndata.py | 29 ++++--- anndata/_core/merge.py | 75 ++++++++----------- anndata/_io/read.py | 12 +-- anndata/_io/utils.py | 4 +- anndata/_io/zarr.py | 10 +-- anndata/compat/__init__.py | 4 +- anndata/experimental/merge.py | 2 +- .../multi_files/_anncollection.py | 2 +- anndata/tests/test_awkward.py | 23 ++++-- anndata/tests/test_concatenate.py | 51 ++++++++++--- anndata/tests/test_concatenate_disk.py | 6 +- anndata/tests/test_dask.py | 29 ++++--- anndata/tests/test_dask_view_mem.py | 9 ++- anndata/tests/test_deprecations.py | 36 +++++---- anndata/tests/test_hdf5_backing.py | 11 ++- anndata/tests/test_io_elementwise.py | 3 +- anndata/tests/test_io_warnings.py | 1 - anndata/tests/test_layers.py | 6 +- anndata/tests/test_raw.py | 4 +- anndata/tests/test_readwrite.py | 32 +++++--- anndata/utils.py | 60 +++++++++++---- conftest.py | 15 +++- docs/benchmark-read-write.ipynb | 2 +- docs/concatenation.rst | 2 +- docs/release-notes/0.10.3.md | 6 ++ docs/release-notes/0.6.0.md | 2 +- pyproject.toml | 15 +++- 29 files changed, 300 insertions(+), 185 deletions(-) diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml index 7e875a4b8..d6f0c0688 100644 --- a/.azure-pipelines.yml +++ b/.azure-pipelines.yml @@ -55,12 +55,29 @@ jobs: - script: | pytest displayName: "PyTest" - condition: eq(variables['RUN_COVERAGE'], 'no') + condition: and(eq(variables['RUN_COVERAGE'], 'no'), eq(variables['PRERELEASE_DEPENDENCIES'], 'no')) - script: | pytest --cov --cov-report=xml --cov-context=test displayName: "PyTest (coverage)" - condition: eq(variables['RUN_COVERAGE'], 'yes') + condition: and(eq(variables['RUN_COVERAGE'], 'yes'), eq(variables['PRERELEASE_DEPENDENCIES'], 'no')) + + # TODO: fix all the exceptions here + # TODO: Centralize, see https://github.com/scverse/anndata/issues/1204 + - script: > + pytest + -W error + -W 'ignore:Support for Awkward Arrays is currently experimental' + -W 'ignore:Outer joins on awkward.Arrays' + -W 'default:Setting element:UserWarning' + -W 'default:Trying to modify attribute:UserWarning' + -W 'default:Transforming to str index:UserWarning' + -W 'default:Observation names are not unique. To make them unique:UserWarning' + -W 'default:Variable names are not unique. To make them unique:UserWarning' + -W 'default::scipy.sparse._base.SparseEfficiencyWarning' + -W 'default::dask.array.core.PerformanceWarning' + displayName: "PyTest (treat warnings as errors)" + condition: and(eq(variables['RUN_COVERAGE'], 'no'), eq(variables['PRERELEASE_DEPENDENCIES'], 'yes')) - task: PublishCodeCoverageResults@1 inputs: diff --git a/anndata/_core/aligned_mapping.py b/anndata/_core/aligned_mapping.py index 37557a212..4fdcb0b29 100644 --- a/anndata/_core/aligned_mapping.py +++ b/anndata/_core/aligned_mapping.py @@ -19,7 +19,7 @@ from anndata._warnings import ExperimentalFeatureWarning, ImplicitModificationWarning from anndata.compat import AwkArray -from ..utils import deprecated, dim_len, ensure_df_homogeneous +from ..utils import deprecated, dim_len, ensure_df_homogeneous, warn_once from .access import ElementRef from .index import _subset from .views import as_view, view_update @@ -61,19 +61,12 @@ def _ipython_key_completions_(self) -> list[str]: def _validate_value(self, val: V, key: str) -> V: """Raises an error if value is invalid""" if isinstance(val, AwkArray): - warnings.warn( + warn_once( "Support for Awkward Arrays is currently experimental. " "Behavior may change in the future. Please report any issues you may encounter!", ExperimentalFeatureWarning, # stacklevel=3, ) - # Prevent from showing up every time an awkward array is used - # You'd think `once` works, but it doesn't at the repl and in notebooks - warnings.filterwarnings( - "ignore", - category=ExperimentalFeatureWarning, - message="Support for Awkward Arrays is currently experimental.*", - ) for i, axis in enumerate(self.axes): if self.parent.shape[axis] != dim_len(val, i): right_shape = tuple(self.parent.shape[a] for a in self.axes) @@ -131,7 +124,7 @@ def _view(self, parent: AnnData, subset_idx: I): """Returns a subset copy-on-write view of the object.""" return self._view_class(self, parent, subset_idx) - @deprecated("dict(obj)") + @deprecated("dict(obj)", FutureWarning) def as_dict(self) -> dict: return dict(self) diff --git a/anndata/_core/anndata.py b/anndata/_core/anndata.py index 944fc66a4..3fc67dd92 100644 --- a/anndata/_core/anndata.py +++ b/anndata/_core/anndata.py @@ -39,7 +39,7 @@ _move_adj_mtx, ) from ..logging import anndata_logger as logger -from ..utils import convert_to_dict, dim_len, ensure_df_homogeneous +from ..utils import convert_to_dict, deprecated, dim_len, ensure_df_homogeneous from .access import ElementRef from .aligned_mapping import ( AxisArrays, @@ -875,23 +875,21 @@ def _prep_dim_index(self, value, attr: str) -> pd.Index: value = pd.Index(value) if not isinstance(value.name, (str, type(None))): value.name = None - # fmt: off if ( - not isinstance(value, pd.RangeIndex) + len(value) > 0 + and not isinstance(value, pd.RangeIndex) and infer_dtype(value) not in ("string", "bytes") ): sample = list(value[: min(len(value), 5)]) - warnings.warn(dedent( + msg = dedent( f""" AnnData expects .{attr}.index to contain strings, but got values like: {sample} Inferred to be: {infer_dtype(value)} """ - ), # noqa - stacklevel=2, ) - # fmt: on + warnings.warn(msg, stacklevel=2) return value def _set_dim_index(self, value: pd.Index, attr: str): @@ -1303,6 +1301,7 @@ def _inplace_subset_var(self, index: Index1D): Same as `adata = adata[:, index]`, but inplace. """ adata_subset = self[:, index].copy() + self._init_as_actual(adata_subset) def _inplace_subset_obs(self, index: Index1D): @@ -1312,6 +1311,7 @@ def _inplace_subset_obs(self, index: Index1D): Same as `adata = adata[index, :]`, but inplace. """ adata_subset = self[index].copy() + self._init_as_actual(adata_subset) # TODO: Update, possibly remove @@ -1597,6 +1597,13 @@ def copy(self, filename: PathLike | None = None) -> AnnData: write_h5ad(filename, self) return read_h5ad(filename, backed=mode) + @deprecated( + "anndata.concat", + FutureWarning, + "See the tutorial for concat at: " + "https://anndata.readthedocs.io/en/latest/concatenation.html", + hide=False, + ) def concatenate( self, *adatas: AnnData, @@ -1820,14 +1827,6 @@ def concatenate( """ from .merge import concat, merge_dataframes, merge_outer, merge_same - warnings.warn( - "The AnnData.concatenate method is deprecated in favour of the " - "anndata.concat function. Please use anndata.concat instead.\n\n" - "See the tutorial for concat at: " - "https://anndata.readthedocs.io/en/latest/concatenation.html", - FutureWarning, - ) - if self.isbacked: raise ValueError("Currently, concatenate only works in memory mode.") diff --git a/anndata/_core/merge.py b/anndata/_core/merge.py index 2df90e290..66a8c3459 100644 --- a/anndata/_core/merge.py +++ b/anndata/_core/merge.py @@ -13,11 +13,11 @@ MutableSet, Sequence, ) -from functools import reduce, singledispatch +from functools import partial, reduce, singledispatch from itertools import repeat from operator import and_, or_, sub from typing import Any, Literal, TypeVar -from warnings import filterwarnings, warn +from warnings import warn import numpy as np import pandas as pd @@ -35,7 +35,7 @@ DaskArray, _map_cat_to_str, ) -from ..utils import asarray, dim_len +from ..utils import asarray, dim_len, warn_once from .anndata import AnnData from .index import _subset, make_slice @@ -219,6 +219,7 @@ def unify_dtypes(dfs: Iterable[pd.DataFrame]) -> list[pd.DataFrame]: For catching cases where pandas would convert to object dtype. """ + dfs = list(dfs) # Get shared categorical columns df_dtypes = [dict(df.dtypes) for df in dfs] columns = reduce(lambda x, y: x.union(y), [df.columns for df in dfs]) @@ -752,9 +753,9 @@ def concat_arrays(arrays, reindexers, axis=0, index=None, fill_value=None): ) # TODO: behaviour here should be chosen through a merge strategy df = pd.concat( - unify_dtypes([f(x) for f, x in zip(reindexers, arrays)]), - ignore_index=True, + unify_dtypes(f(x) for f, x in zip(reindexers, arrays)), axis=axis, + ignore_index=True, ) df.index = index return df @@ -819,7 +820,7 @@ def concat_arrays(arrays, reindexers, axis=0, index=None, fill_value=None): ) -def inner_concat_aligned_mapping(mappings, reindexers=None, index=None, axis=0): +def inner_concat_aligned_mapping(mappings, *, reindexers=None, index=None, axis=0): result = {} for k in intersect_keys(mappings): @@ -878,17 +879,12 @@ def gen_outer_reindexers(els, shapes, new_index: pd.Index, *, axis=0): raise NotImplementedError( "Cannot concatenate an AwkwardArray with other array types." ) - warn( - "Outer joins on awkward.Arrays will have different return values in the future." + warn_once( + "Outer joins on awkward.Arrays will have different return values in the future. " "For details, and to offer input, please see:\n\n\t" "https://github.com/scverse/anndata/issues/898", ExperimentalFeatureWarning, ) - filterwarnings( - "ignore", - category=ExperimentalFeatureWarning, - message=r"Outer joins on awkward.Arrays will have different return values.*", - ) # all_keys = union_keys(el.fields for el in els if not_missing(el)) reindexers = [] for el in els: @@ -912,7 +908,7 @@ def gen_outer_reindexers(els, shapes, new_index: pd.Index, *, axis=0): def outer_concat_aligned_mapping( - mappings, reindexers=None, index=None, fill_value=None, axis=0 + mappings, *, reindexers=None, index=None, axis=0, fill_value=None ): result = {} ns = [m.parent.shape[axis] for m in mappings] @@ -1261,7 +1257,7 @@ def concat( # Annotation for concatenation axis check_combinable_cols([getattr(a, dim).columns for a in adatas], join=join) concat_annot = pd.concat( - unify_dtypes([getattr(a, dim) for a in adatas]), + unify_dtypes(getattr(a, dim) for a in adatas), join=join, ignore_index=True, ) @@ -1277,37 +1273,30 @@ def concat( X = concat_Xs(adatas, reindexers, axis=axis, fill_value=fill_value) if join == "inner": - layers = inner_concat_aligned_mapping( - [a.layers for a in adatas], axis=axis, reindexers=reindexers - ) - concat_mapping = inner_concat_aligned_mapping( - [getattr(a, f"{dim}m") for a in adatas], index=concat_indices - ) - if pairwise: - concat_pairwise = concat_pairwise_mapping( - mappings=[getattr(a, f"{dim}p") for a in adatas], - shapes=[a.shape[axis] for a in adatas], - join_keys=intersect_keys, - ) - else: - concat_pairwise = {} + concat_aligned_mapping = inner_concat_aligned_mapping + join_keys = intersect_keys elif join == "outer": - layers = outer_concat_aligned_mapping( - [a.layers for a in adatas], reindexers, axis=axis, fill_value=fill_value + concat_aligned_mapping = partial( + outer_concat_aligned_mapping, fill_value=fill_value ) - concat_mapping = outer_concat_aligned_mapping( - [getattr(a, f"{dim}m") for a in adatas], - index=concat_indices, - fill_value=fill_value, + join_keys = union_keys + else: + assert False, f"{join=} should have been validated above by pd.concat" + + layers = concat_aligned_mapping( + [a.layers for a in adatas], axis=axis, reindexers=reindexers + ) + concat_mapping = concat_aligned_mapping( + [getattr(a, f"{dim}m") for a in adatas], index=concat_indices + ) + if pairwise: + concat_pairwise = concat_pairwise_mapping( + mappings=[getattr(a, f"{dim}p") for a in adatas], + shapes=[a.shape[axis] for a in adatas], + join_keys=join_keys, ) - if pairwise: - concat_pairwise = concat_pairwise_mapping( - mappings=[getattr(a, f"{dim}p") for a in adatas], - shapes=[a.shape[axis] for a in adatas], - join_keys=union_keys, - ) - else: - concat_pairwise = {} + else: + concat_pairwise = {} # TODO: Reindex lazily, so we don't have to make those copies until we're sure we need the element alt_mapping = merge( diff --git a/anndata/_io/read.py b/anndata/_io/read.py index 68f7fbd27..a50c4b2ef 100644 --- a/anndata/_io/read.py +++ b/anndata/_io/read.py @@ -274,16 +274,16 @@ def read_loom( uns = {} if cleanup: uns_obs = {} - for key in list(obs.keys()): - if len(set(obs[key])) == 1: - uns_obs[f"{key}"] = obs[key][0] + for key in obs.columns: + if len(obs[key].unique()) == 1: + uns_obs[key] = obs[key].iloc[0] del obs[key] if uns_obs: uns["loom-obs"] = uns_obs uns_var = {} - for key in list(var.keys()): - if len(set(var[key])) == 1: - uns_var[f"{key}"] = var[key][0] + for key in var.columns: + if len(var[key].unique()) == 1: + uns_var[key] = var[key].iloc[0] del var[key] if uns_var: uns["loom-var"] = uns_var diff --git a/anndata/_io/utils.py b/anndata/_io/utils.py index 964f94811..cd90be473 100644 --- a/anndata/_io/utils.py +++ b/anndata/_io/utils.py @@ -5,7 +5,7 @@ from warnings import warn import h5py -from packaging import version +from packaging.version import Version from anndata.compat import H5Group, ZarrGroup, add_note @@ -13,7 +13,7 @@ # For allowing h5py v3 # https://github.com/scverse/anndata/issues/442 -H5PY_V3 = version.parse(h5py.__version__).major >= 3 +H5PY_V3 = Version(h5py.__version__).major >= 3 # ------------------------------------------------------------------------------- # Type conversion diff --git a/anndata/_io/zarr.py b/anndata/_io/zarr.py index 022ee8a1d..00f9766f0 100644 --- a/anndata/_io/zarr.py +++ b/anndata/_io/zarr.py @@ -12,16 +12,10 @@ from anndata._warnings import OldFormatWarning from .._core.anndata import AnnData -from ..compat import ( - _clean_uns, - _from_fixed_length_strings, -) +from ..compat import _clean_uns, _from_fixed_length_strings from ..experimental import read_dispatched, write_dispatched from .specs import read_elem -from .utils import ( - _read_legacy_raw, - report_read_key_on_error, -) +from .utils import _read_legacy_raw, report_read_key_on_error if TYPE_CHECKING: from collections.abc import MutableMapping diff --git a/anndata/compat/__init__.py b/anndata/compat/__init__.py index 0b0542a0d..a0a77977f 100644 --- a/anndata/compat/__init__.py +++ b/anndata/compat/__init__.py @@ -14,7 +14,7 @@ import h5py import numpy as np import pandas as pd -from packaging.version import parse as _parse_version +from packaging.version import Version from scipy.sparse import issparse, spmatrix from .exceptiongroups import add_note # noqa: F401 @@ -395,7 +395,7 @@ def _safe_transpose(x): def _map_cat_to_str(cat: pd.Categorical) -> pd.Categorical: - if _parse_version(pd.__version__) >= _parse_version("2.0"): + if Version(pd.__version__) >= Version("2.0"): # Argument added in pandas 2.0 return cat.map(str, na_action="ignore") else: diff --git a/anndata/experimental/merge.py b/anndata/experimental/merge.py index 59c0623a8..95b0b215f 100644 --- a/anndata/experimental/merge.py +++ b/anndata/experimental/merge.py @@ -384,7 +384,7 @@ def _write_alt_annot(groups, output_group, alt_dim, alt_indices, merge): def _write_dim_annot(groups, output_group, dim, concat_indices, label, label_col, join): concat_annot = pd.concat( - unify_dtypes([read_elem(g[dim]) for g in groups]), + unify_dtypes(read_elem(g[dim]) for g in groups), join=join, ignore_index=True, ) diff --git a/anndata/experimental/multi_files/_anncollection.py b/anndata/experimental/multi_files/_anncollection.py index acacdc8d3..ca74092ad 100644 --- a/anndata/experimental/multi_files/_anncollection.py +++ b/anndata/experimental/multi_files/_anncollection.py @@ -209,7 +209,7 @@ def __getitem__(self, key, use_convert=True): else: if vidx is not None: idx = np.ix_(*idx) if not isinstance(idx[1], slice) else idx - arrs.append(arr[idx]) + arrs.append(arr.iloc[idx] if isinstance(arr, pd.Series) else arr[idx]) if len(arrs) > 1: _arr = _merge(arrs) diff --git a/anndata/tests/test_awkward.py b/anndata/tests/test_awkward.py index 993fb91de..9e780c8a8 100644 --- a/anndata/tests/test_awkward.py +++ b/anndata/tests/test_awkward.py @@ -1,13 +1,19 @@ """Tests related to awkward arrays""" from __future__ import annotations +from contextlib import nullcontext + import numpy as np import numpy.testing as npt import pandas as pd import pytest import anndata -from anndata import AnnData, ImplicitModificationWarning, read_h5ad +from anndata import ( + AnnData, + ImplicitModificationWarning, + read_h5ad, +) from anndata.compat import awkward as ak from anndata.tests.helpers import assert_equal, gen_adata, gen_awkward from anndata.utils import dim_len @@ -196,8 +202,8 @@ def reversed(self): ] ), # categorical array - ak.to_categorical(ak.Array([["a", "b", "c"], ["a", "b"]])), - ak.to_categorical(ak.Array([[1, 1, 2], [3, 3]])), + ak.str.to_categorical(ak.Array([["a", "b", "c"], ["a", "b"]])), + ak.str.to_categorical(ak.Array([[1, 1, 2], [3, 3]])), # tyical record type with AIRR data consisting of different dtypes ak.Array( [ @@ -375,10 +381,17 @@ def test_concat_mixed_types(key, arrays, expected, join): to_concat.append(tmp_adata) if isinstance(expected, type) and issubclass(expected, Exception): - with pytest.raises(expected): + ctx = ( + pytest.warns( + FutureWarning, + match=r"The behavior of DataFrame concatenation with empty or all-NA entries is deprecated", + ) + if any(df.empty for df in arrays if isinstance(df, pd.DataFrame)) + else nullcontext() + ) + with pytest.raises(expected), ctx: anndata.concat(to_concat, axis=axis, join=join) else: - print(to_concat) result_adata = anndata.concat(to_concat, axis=axis, join=join) result = getattr(result_adata, key).get("test", None) assert_equal(expected, result, exact=True) diff --git a/anndata/tests/test_concatenate.py b/anndata/tests/test_concatenate.py index 318e78b3f..17c11ef70 100644 --- a/anndata/tests/test_concatenate.py +++ b/anndata/tests/test_concatenate.py @@ -2,10 +2,11 @@ import warnings from collections.abc import Hashable +from contextlib import nullcontext from copy import deepcopy from functools import partial, singledispatch from itertools import chain, permutations, product -from typing import Any, Callable +from typing import Any, Callable, Literal import numpy as np import pandas as pd @@ -30,6 +31,10 @@ ) from anndata.utils import asarray +mark_legacy_concatenate = pytest.mark.filterwarnings( + r"ignore:.*AnnData\.concatenate is deprecated:FutureWarning" +) + @singledispatch def filled_like(a, fill_value=None): @@ -93,7 +98,7 @@ def fill_val(request): @pytest.fixture(params=[0, 1]) -def axis(request): +def axis(request) -> Literal[0, 1]: return request.param @@ -145,6 +150,7 @@ def test_concat_interface_errors(): concat([]) +@mark_legacy_concatenate @pytest.mark.parametrize( ["concat_func", "backwards_compat"], [ @@ -173,6 +179,7 @@ def test_concatenate_roundtrip(join_type, array_type, concat_func, backwards_com assert_equal(result[orig.obs_names].copy(), orig) +@mark_legacy_concatenate def test_concatenate_dense(): # dense data X1 = np.array([[1, 2, 3], [4, 5, 6]]) @@ -248,6 +255,7 @@ def test_concatenate_dense(): assert np.allclose(var_ma.compressed(), var_ma_ref.compressed()) +@mark_legacy_concatenate def test_concatenate_layers(array_type, join_type): adatas = [] for _ in range(5): @@ -307,6 +315,7 @@ def gen_index(n): ] +@mark_legacy_concatenate def test_concatenate_obsm_inner(obsm_adatas): adata = obsm_adatas[0].concatenate(obsm_adatas[1:], join="inner") @@ -336,6 +345,7 @@ def test_concatenate_obsm_inner(obsm_adatas): pd.testing.assert_frame_equal(true_df, cur_df) +@mark_legacy_concatenate def test_concatenate_obsm_outer(obsm_adatas, fill_val): outer = obsm_adatas[0].concatenate( obsm_adatas[1:], join="outer", fill_value=fill_val @@ -406,6 +416,7 @@ def test_concat_annot_join(obsm_adatas, join_type): ) +@mark_legacy_concatenate def test_concatenate_layers_misaligned(array_type, join_type): adatas = [] for _ in range(5): @@ -419,6 +430,7 @@ def test_concatenate_layers_misaligned(array_type, join_type): assert_equal(merged.X, merged.layers["a"]) +@mark_legacy_concatenate def test_concatenate_layers_outer(array_type, fill_val): # Testing that issue #368 is fixed a = AnnData( @@ -434,6 +446,7 @@ def test_concatenate_layers_outer(array_type, fill_val): ) +@mark_legacy_concatenate def test_concatenate_fill_value(fill_val): def get_obs_els(adata): return { @@ -479,6 +492,7 @@ def get_obs_els(adata): ptr += orig.n_obs +@mark_legacy_concatenate def test_concatenate_dense_duplicates(): X1 = np.array([[1, 2, 3], [4, 5, 6]]) X2 = np.array([[1, 2, 3], [4, 5, 6]]) @@ -530,6 +544,7 @@ def test_concatenate_dense_duplicates(): ] +@mark_legacy_concatenate def test_concatenate_sparse(): # sparse data from scipy.sparse import csr_matrix @@ -575,6 +590,7 @@ def test_concatenate_sparse(): ] +@mark_legacy_concatenate def test_concatenate_mixed(): X1 = sparse.csr_matrix(np.array([[1, 2, 0], [4, 0, 6], [0, 0, 9]])) X2 = sparse.csr_matrix(np.array([[0, 2, 3], [4, 0, 0], [7, 0, 9]])) @@ -610,6 +626,7 @@ def test_concatenate_mixed(): assert isinstance(adata_all.layers["counts"], sparse.csr_matrix) +@mark_legacy_concatenate def test_concatenate_with_raw(): # dense data X1 = np.array([[1, 2, 3], [4, 5, 6]]) @@ -814,7 +831,8 @@ def gen_dim_array(m): # Check values of included elements full_inds = np.arange(w_pairwise.shape[axis]) - groups = getattr(w_pairwise, dim).groupby("orig").indices + obs_var: pd.DataFrame = getattr(w_pairwise, dim) + groups = obs_var.groupby("orig", observed=True).indices for k, inds in groups.items(): orig_arr = getattr(adatas[k], dim_attr)["arr"] full_arr = getattr(w_pairwise, dim_attr)["arr"] @@ -1089,7 +1107,7 @@ def test_concatenate_uns(unss, merge_strategy, result, value_gen): print(merge_strategy, "\n", unss, "\n", result) result, *unss = permute_nested_values([result] + unss, value_gen) adatas = [uns_ad(uns) for uns in unss] - with pytest.warns(FutureWarning, match=r"concatenate method is deprecated"): + with pytest.warns(FutureWarning, match=r"concatenate is deprecated"): merged = AnnData.concatenate(*adatas, uns_merge=merge_strategy).uns assert_equal(merged, result, elem_name="uns") @@ -1314,14 +1332,24 @@ def test_concat_size_0_dim(axis, join_type, merge_strategy, shape): dim = ("obs", "var")[axis] expected_size = expected_shape(a, b, axis=axis, join=join_type) - result = concat( - {"a": a, "b": b}, - axis=axis, - join=join_type, - merge=merge_strategy, - pairwise=True, - index_unique="-", + + ctx_concat_empty = ( + pytest.warns( + FutureWarning, + match=r"The behavior of DataFrame concatenation with empty or all-NA entries is deprecated", + ) + if shape[axis] == 0 + else nullcontext() ) + with ctx_concat_empty: + result = concat( + {"a": a, "b": b}, + axis=axis, + join=join_type, + merge=merge_strategy, + pairwise=True, + index_unique="-", + ) assert result.shape == expected_size if join_type == "outer": @@ -1370,6 +1398,7 @@ def test_concat_outer_aligned_mapping(elem): check_filled_like(result, elem_name=f"obsm/{elem}") +@mark_legacy_concatenate def test_concatenate_size_0_dim(): # https://github.com/scverse/anndata/issues/526 diff --git a/anndata/tests/test_concatenate_disk.py b/anndata/tests/test_concatenate_disk.py index f9eab9540..659fb98cf 100644 --- a/anndata/tests/test_concatenate_disk.py +++ b/anndata/tests/test_concatenate_disk.py @@ -109,7 +109,7 @@ def test_anndatas_without_reindex( M = 50 sparse_fmt = "csr" adatas = [] - for _ in range(5): + for i in range(5): if axis == 0: M = np.random.randint(1, 100) else: @@ -122,6 +122,10 @@ def test_anndatas_without_reindex( sparse_fmt=sparse_fmt, **GEN_ADATA_OOC_CONCAT_ARGS, ) + if axis == 0: + a.obs_names = f"{i}-" + a.obs_names + else: + a.var_names = f"{i}-" + a.var_names adatas.append(a) assert_eq_concat_on_disk( diff --git a/anndata/tests/test_dask.py b/anndata/tests/test_dask.py index 7bd353f24..56cb0f8c8 100644 --- a/anndata/tests/test_dask.py +++ b/anndata/tests/test_dask.py @@ -107,21 +107,20 @@ def test_dask_distributed_write(adata, tmp_path, diskfmt): pth = tmp_path / f"test_write.{diskfmt}" g = as_group(pth, mode="w") - with dd.LocalCluster(n_workers=1, threads_per_worker=1, processes=False) as cluster: - with dd.Client(cluster): - M, N = adata.X.shape - adata.obsm["a"] = da.random.random((M, 10)) - adata.obsm["b"] = da.random.random((M, 10)) - adata.varm["a"] = da.random.random((N, 10)) - orig = adata - if diskfmt == "h5ad": - with pytest.raises( - ValueError, match="Cannot write dask arrays to hdf5" - ): - write_elem(g, "", orig) - return - write_elem(g, "", orig) - curr = read_elem(g) + with dd.LocalCluster( + n_workers=1, threads_per_worker=1, processes=False + ) as cluster, dd.Client(cluster): + M, N = adata.X.shape + adata.obsm["a"] = da.random.random((M, 10)) + adata.obsm["b"] = da.random.random((M, 10)) + adata.varm["a"] = da.random.random((N, 10)) + orig = adata + if diskfmt == "h5ad": + with pytest.raises(ValueError, match="Cannot write dask arrays to hdf5"): + write_elem(g, "", orig) + return + write_elem(g, "", orig) + curr = read_elem(g) with pytest.raises(Exception): assert_equal(curr.obsm["a"], curr.obsm["b"]) diff --git a/anndata/tests/test_dask_view_mem.py b/anndata/tests/test_dask_view_mem.py index bb758a223..d597d40aa 100644 --- a/anndata/tests/test_dask_view_mem.py +++ b/anndata/tests/test_dask_view_mem.py @@ -1,9 +1,14 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import pytest import anndata as ad +if TYPE_CHECKING: + import pandas as pd + pytest.importorskip("pytest_memray") # ------------------------------------------------------------------------------ @@ -155,5 +160,5 @@ def test_modify_view_mapping_obs_var_memory(attr_name, give_chunks): ) subset = adata[:N, :N] assert subset.is_view - m = getattr(subset, attr_name)["m"] - m[0] = 100 + m: pd.Series = getattr(subset, attr_name)["m"] + m.iloc[0] = 100 diff --git a/anndata/tests/test_deprecations.py b/anndata/tests/test_deprecations.py index a6bcfd478..39176e315 100644 --- a/anndata/tests/test_deprecations.py +++ b/anndata/tests/test_deprecations.py @@ -40,26 +40,24 @@ def test_get_obsvar_array_warn(adata): adata._get_var_array("s1") -# TODO: Why doesn’t this mark work? -# @pytest.mark.filterwarnings("ignore::DeprecationWarning") +@pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_get_obsvar_array(adata): - with pytest.warns(DeprecationWarning): # Just to hide warnings - assert np.allclose(adata._get_obs_array("a"), adata.obs_vector("a")) - assert np.allclose( - adata._get_obs_array("a", layer="x2"), - adata.obs_vector("a", layer="x2"), - ) - assert np.allclose( - adata._get_obs_array("a", use_raw=True), adata.raw.obs_vector("a") - ) - assert np.allclose(adata._get_var_array("s1"), adata.var_vector("s1")) - assert np.allclose( - adata._get_var_array("s1", layer="x2"), - adata.var_vector("s1", layer="x2"), - ) - assert np.allclose( - adata._get_var_array("s1", use_raw=True), adata.raw.var_vector("s1") - ) + assert np.allclose(adata._get_obs_array("a"), adata.obs_vector("a")) + assert np.allclose( + adata._get_obs_array("a", layer="x2"), + adata.obs_vector("a", layer="x2"), + ) + assert np.allclose( + adata._get_obs_array("a", use_raw=True), adata.raw.obs_vector("a") + ) + assert np.allclose(adata._get_var_array("s1"), adata.var_vector("s1")) + assert np.allclose( + adata._get_var_array("s1", layer="x2"), + adata.var_vector("s1", layer="x2"), + ) + assert np.allclose( + adata._get_var_array("s1", use_raw=True), adata.raw.var_vector("s1") + ) def test_obsvar_vector_Xlayer(adata): diff --git a/anndata/tests/test_hdf5_backing.py b/anndata/tests/test_hdf5_backing.py index 03e65d7b4..94f2af4b0 100644 --- a/anndata/tests/test_hdf5_backing.py +++ b/anndata/tests/test_hdf5_backing.py @@ -304,10 +304,13 @@ def test_backed_modification_sparse(adata, backing_h5ad, sparse_format): assert adata.filename == backing_h5ad assert adata.isbacked - adata.X[0, [0, 2]] = 10 - adata.X[1, [0, 2]] = [11, 12] - with pytest.raises(ValueError): - adata.X[2, 1] = 13 + with pytest.warns( + PendingDeprecationWarning, match=r"__setitem__ will likely be removed" + ): + adata.X[0, [0, 2]] = 10 + adata.X[1, [0, 2]] = [11, 12] + with pytest.raises(ValueError): + adata.X[2, 1] = 13 assert adata.isbacked diff --git a/anndata/tests/test_io_elementwise.py b/anndata/tests/test_io_elementwise.py index 34a42e7ff..08853b6c4 100644 --- a/anndata/tests/test_io_elementwise.py +++ b/anndata/tests/test_io_elementwise.py @@ -298,4 +298,5 @@ def test_read_zarr_from_group(tmp_path, consolidated): read_func = zarr.open with read_func(pth) as z: - assert_equal(ad.read_zarr(z["table/table"]), adata) + expected = ad.read_zarr(z["table/table"]) + assert_equal(adata, expected) diff --git a/anndata/tests/test_io_warnings.py b/anndata/tests/test_io_warnings.py index 1dc341ffd..ac704c249 100644 --- a/anndata/tests/test_io_warnings.py +++ b/anndata/tests/test_io_warnings.py @@ -26,7 +26,6 @@ def test_old_format_warning_not_thrown(tmp_path): with warnings.catch_warnings(record=True) as record: warnings.simplefilter("always", ad.OldFormatWarning) - warnings.simplefilter("ignore", ad.ExperimentalFeatureWarning) ad.read_h5ad(pth) diff --git a/anndata/tests/test_layers.py b/anndata/tests/test_layers.py index 4b6a7f287..34b088976 100644 --- a/anndata/tests/test_layers.py +++ b/anndata/tests/test_layers.py @@ -6,6 +6,7 @@ import numpy as np import pandas as pd import pytest +from numba.core.errors import NumbaDeprecationWarning from anndata import AnnData, read_h5ad, read_loom from anndata.tests.helpers import gen_typed_df_t2_size @@ -78,7 +79,10 @@ def test_readwrite(backing_h5ad): def test_readwrite_loom(tmp_path): loom_path = tmp_path / "test.loom" adata = AnnData(X=X, layers=dict(L=L.copy())) - adata.write_loom(loom_path) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", NumbaDeprecationWarning) + adata.write_loom(loom_path) adata_read = read_loom(loom_path, X_name="") assert adata.layers.keys() == adata_read.layers.keys() diff --git a/anndata/tests/test_raw.py b/anndata/tests/test_raw.py index 7e4689d60..b51376b9a 100644 --- a/anndata/tests/test_raw.py +++ b/anndata/tests/test_raw.py @@ -81,7 +81,7 @@ def test_raw_of_view(adata_raw: ad.AnnData): def test_raw_rw(adata_raw: ad.AnnData, backing_h5ad): adata_raw.write(backing_h5ad) - adata_read = ad.read(backing_h5ad) + adata_read = ad.read_h5ad(backing_h5ad) assert_equal(adata_read, adata_raw, exact=True) @@ -96,7 +96,7 @@ def test_raw_view_rw(adata_raw: ad.AnnData, backing_h5ad): assert_equal(adata_raw_view, adata_raw) with pytest.warns(ImplicitModificationWarning, match="initializing view as actual"): adata_raw_view.write(backing_h5ad) - adata_read = ad.read(backing_h5ad) + adata_read = ad.read_h5ad(backing_h5ad) assert_equal(adata_read, adata_raw_view, exact=True) diff --git a/anndata/tests/test_readwrite.py b/anndata/tests/test_readwrite.py index 98de43a61..22b1aaffc 100644 --- a/anndata/tests/test_readwrite.py +++ b/anndata/tests/test_readwrite.py @@ -13,6 +13,7 @@ import pandas as pd import pytest import zarr +from numba.core.errors import NumbaDeprecationWarning from scipy.sparse import csc_matrix, csr_matrix import anndata as ad @@ -88,7 +89,7 @@ def rw(backing_h5ad): M, N = 100, 101 orig = gen_adata((M, N)) orig.write(backing_h5ad) - curr = ad.read(backing_h5ad) + curr = ad.read_h5ad(backing_h5ad) return curr, orig @@ -139,7 +140,7 @@ def test_readwrite_kitchensink(tmp_path, storage, typ, backing_h5ad, dataset_kwa if storage == "h5ad": adata_src.write(backing_h5ad, **dataset_kwargs) - adata_mid = ad.read(backing_h5ad) + adata_mid = ad.read_h5ad(backing_h5ad) adata_mid.write(tmp_path / "mid.h5ad", **dataset_kwargs) adata = ad.read_h5ad(tmp_path / "mid.h5ad") else: @@ -179,7 +180,7 @@ def test_readwrite_maintain_X_dtype(typ, backing_h5ad): adata_src = ad.AnnData(X) adata_src.write(backing_h5ad) - adata = ad.read(backing_h5ad) + adata = ad.read_h5ad(backing_h5ad) assert adata.X.dtype == adata_src.X.dtype @@ -212,7 +213,7 @@ def test_readwrite_h5ad_one_dimension(typ, backing_h5ad): adata_src = ad.AnnData(X, obs=obs_dict, var=var_dict, uns=uns_dict) adata_one = adata_src[:, 0].copy() adata_one.write(backing_h5ad) - adata = ad.read(backing_h5ad) + adata = ad.read_h5ad(backing_h5ad) assert adata.shape == (3, 1) assert_equal(adata, adata_one) @@ -224,7 +225,7 @@ def test_readwrite_backed(typ, backing_h5ad): adata_src.filename = backing_h5ad # change to backed mode adata_src.write() - adata = ad.read(backing_h5ad) + adata = ad.read_h5ad(backing_h5ad) assert isinstance(adata.obs["oanno1"].dtype, pd.CategoricalDtype) assert not isinstance(adata.obs["oanno2"].dtype, pd.CategoricalDtype) assert adata.obs.index.tolist() == ["name1", "name2", "name3"] @@ -324,7 +325,8 @@ def check_compressed(key, value): msg = "\n\t".join(not_compressed) raise AssertionError(f"These elements were not compressed correctly:\n\t{msg}") - assert_equal(adata, ad.read_h5ad(pth)) + expected = ad.read_h5ad(pth) + assert_equal(adata, expected) def test_zarr_compression(tmp_path): @@ -349,7 +351,8 @@ def check_compressed(key, value): msg = "\n\t".join(not_compressed) raise AssertionError(f"These elements were not compressed correctly:\n\t{msg}") - assert_equal(adata, ad.read_zarr(pth)) + expected = ad.read_zarr(pth) + assert_equal(adata, expected) def test_changed_obs_var_names(tmp_path, diskfmt): @@ -388,7 +391,9 @@ def test_readwrite_loom(typ, obsm_mapping, varm_mapping, tmp_path): adata_src.obsm["X_a"] = np.zeros((adata_src.n_obs, 2)) adata_src.varm["X_b"] = np.zeros((adata_src.n_vars, 3)) - adata_src.write_loom(tmp_path / "test.loom", write_obsm_varm=True) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", NumbaDeprecationWarning) + adata_src.write_loom(tmp_path / "test.loom", write_obsm_varm=True) adata = ad.read_loom( tmp_path / "test.loom", @@ -430,7 +435,7 @@ def test_readloom_deprecations(tmp_path): depr_result = ad.read_loom(loom_pth, obsm_names=obsm_mapping) actual_result = ad.read_loom(loom_pth, obsm_mapping=obsm_mapping) assert_equal(actual_result, depr_result) - with pytest.raises(ValueError, match="ambiguous"): + with pytest.raises(ValueError, match="ambiguous"), pytest.warns(FutureWarning): ad.read_loom(loom_pth, obsm_mapping=obsm_mapping, obsm_names=obsm_mapping) # varm_names -> varm_mapping @@ -439,7 +444,7 @@ def test_readloom_deprecations(tmp_path): depr_result = ad.read_loom(loom_pth, varm_names=varm_mapping) actual_result = ad.read_loom(loom_pth, varm_mapping=varm_mapping) assert_equal(actual_result, depr_result) - with pytest.raises(ValueError, match="ambiguous"): + with pytest.raises(ValueError, match="ambiguous"), pytest.warns(FutureWarning): ad.read_loom(loom_pth, varm_mapping=varm_mapping, varm_names=varm_mapping) # positional -> keyword @@ -728,10 +733,13 @@ def test_scanpy_krumsiek11(tmp_path, diskfmt): filepth = tmp_path / f"test.{diskfmt}" import scanpy as sc - orig = sc.datasets.krumsiek11() + # TODO: this should be fixed in scanpy instead + with pytest.warns(UserWarning, match=r"Observation names are not unique"): + orig = sc.datasets.krumsiek11() del orig.uns["highlights"] # Can’t write int keys getattr(orig, f"write_{diskfmt}")(filepth) - read = getattr(ad, f"read_{diskfmt}")(filepth) + with pytest.warns(UserWarning, match=r"Observation names are not unique"): + read = getattr(ad, f"read_{diskfmt}")(filepth) assert_equal(orig, read, exact=True) diff --git a/anndata/utils.py b/anndata/utils.py index b5fc5c16c..9c700e28b 100644 --- a/anndata/utils.py +++ b/anndata/utils.py @@ -1,5 +1,6 @@ from __future__ import annotations +import re import warnings from functools import singledispatch, wraps from typing import TYPE_CHECKING, Any @@ -19,6 +20,24 @@ logger = get_logger(__name__) +def import_name(name: str) -> Any: + from importlib import import_module + + parts = name.split(".") + obj = import_module(parts[0]) + for i, name in enumerate(parts[1:]): + try: + obj = import_module(f"{obj.__name__}.{name}") + except ModuleNotFoundError: + break + for name in parts[i + 1 :]: + try: + obj = getattr(obj, name) + except AttributeError: + raise RuntimeError(f"{parts[:i]}, {parts[i+1:]}, {obj} {name}") + return obj + + @singledispatch def asarray(x): """Convert x to a numpy array""" @@ -311,7 +330,19 @@ def convert_dictionary_to_structured_array(source: Mapping[str, Sequence[Any]]): return arr -def deprecated(new_name: str): +def warn_once(msg: str, category: type[Warning], stacklevel: int = 1): + warnings.warn(msg, category, stacklevel=stacklevel) + # Prevent from showing up every time an awkward array is used + # You'd think `'once'` works, but it doesn't at the repl and in notebooks + warnings.filterwarnings("ignore", category=category, message=re.escape(msg)) + + +def deprecated( + new_name: str, + category: type[Warning] = DeprecationWarning, + add_msg: str = "", + hide: bool = True, +): """\ This is a decorator which can be used to mark functions as deprecated. It will result in a warning being emitted @@ -319,20 +350,20 @@ def deprecated(new_name: str): """ def decorator(func): + name = func.__qualname__ + msg = ( + f"Use {new_name} instead of {name}, " + f"{name} is deprecated and will be removed in the future." + ) + if add_msg: + msg += f" {add_msg}" + @wraps(func) def new_func(*args, **kwargs): - # turn off filter - warnings.simplefilter("always", DeprecationWarning) - warnings.warn( - f"Use {new_name} instead of {func.__name__}, " - f"{func.__name__} will be removed in the future.", - category=DeprecationWarning, - stacklevel=2, - ) - warnings.simplefilter("default", DeprecationWarning) # reset filter + warnings.warn(msg, category=category, stacklevel=2) return func(*args, **kwargs) - setattr(new_func, "__deprecated", True) + setattr(new_func, "__deprecated", (category, msg, hide)) return new_func return decorator @@ -345,13 +376,14 @@ class DeprecationMixinMeta(type): """ def __dir__(cls): - def is_deprecated(attr): + def is_hidden(attr) -> bool: if isinstance(attr, property): attr = attr.fget - return getattr(attr, "__deprecated", False) + _, _, hide = getattr(attr, "__deprecated", (None, None, False)) + return hide return [ item for item in type.__dir__(cls) - if not is_deprecated(getattr(cls, item, None)) + if not is_hidden(getattr(cls, item, None)) ] diff --git a/conftest.py b/conftest.py index 1825ef24c..588a82054 100644 --- a/conftest.py +++ b/conftest.py @@ -4,11 +4,14 @@ # TODO: Fix that, e.g. with the `pytest -p anndata.testing._pytest` pattern. from __future__ import annotations +import re +import warnings from typing import TYPE_CHECKING import pytest from anndata.compat import chdir +from anndata.utils import import_name if TYPE_CHECKING: from pathlib import Path @@ -17,9 +20,19 @@ @pytest.fixture -def doctest_env(cache: pytest.Cache, tmp_path: Path) -> None: +def doctest_env( + request: pytest.FixtureRequest, cache: pytest.Cache, tmp_path: Path +) -> None: from scanpy import settings + # request.node.parent is either a DoctestModule or a DoctestTextFile. + # Only DoctestModule has a .obj attribute (the imported module). + if request.node.parent.obj: + func = import_name(request.node.name) + if warning_detail := getattr(func, "__deprecated", None): + cat, msg, _ = warning_detail # type: tuple[type[Warning], str, bool] + warnings.filterwarnings("ignore", category=cat, message=re.escape(msg)) + old_dd, settings.datasetdir = settings.datasetdir, cache.mkdir("scanpy-data") with chdir(tmp_path): yield diff --git a/docs/benchmark-read-write.ipynb b/docs/benchmark-read-write.ipynb index 44356459d..3420ebc9a 100644 --- a/docs/benchmark-read-write.ipynb +++ b/docs/benchmark-read-write.ipynb @@ -103,7 +103,7 @@ ], "source": [ "%%time\n", - "adata = ad.read('test.h5ad')" + "adata = ad.read_h5ad('test.h5ad')" ] }, { diff --git a/docs/concatenation.rst b/docs/concatenation.rst index 17674188d..e8b07272b 100644 --- a/docs/concatenation.rst +++ b/docs/concatenation.rst @@ -33,7 +33,7 @@ Let's start off with an example: If we split this object up by clusters of observations, then stack those subsets we'll obtain the same values – just ordered differently. - >>> groups = pbmc.obs.groupby("louvain").indices + >>> groups = pbmc.obs.groupby("louvain", observed=True).indices >>> pbmc_concat = ad.concat([pbmc[inds] for inds in groups.values()], merge="same") >>> assert np.array_equal(pbmc.X, pbmc_concat[pbmc.obs_names].X) >>> pbmc_concat diff --git a/docs/release-notes/0.10.3.md b/docs/release-notes/0.10.3.md index 7a0b9c163..9f1db07f5 100644 --- a/docs/release-notes/0.10.3.md +++ b/docs/release-notes/0.10.3.md @@ -5,6 +5,12 @@ ```{rubric} Documentation ``` +* Stop showing “Support for Awkward Arrays is currently experimental” warnings when + reading, concatenating, slicing, or transposing AnnData objects {pr}`1182` {user}`flying-sheep` ```{rubric} Performance ``` + +```{rubric} Other updates +``` +* Fail canary CI job when tests raise unexpected warnings. {pr}`1182` {user}`flying-sheep` diff --git a/docs/release-notes/0.6.0.md b/docs/release-notes/0.6.0.md index b2cc1b506..ab4316f64 100644 --- a/docs/release-notes/0.6.0.md +++ b/docs/release-notes/0.6.0.md @@ -26,7 +26,7 @@ ### 0.6.0 {small}`1 May, 2018` - compatibility with Seurat converter -- tremendous speedup for {func}`~anndata.AnnData.concatenate` +- tremendous speedup for {meth}`~anndata.AnnData.concatenate` - bug fix for deep copy of unstructured annotation after slicing - bug fix for reading HDF5 stored single-category annotations - `'outer join'` concatenation: adds zeros for concatenation of sparse data and nans for dense data diff --git a/pyproject.toml b/pyproject.toml index 7bfbe496a..8c2508787 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,6 +91,7 @@ test = [ "httpx", # For data downloading "dask[array,distributed]", "awkward>=2.3", + "pyarrow", "pytest_memray", ] gpu = ["cupy"] @@ -104,13 +105,21 @@ version-file = "anndata/_version.py" [tool.coverage.run] source = ["anndata"] -omit = ["setup.py", "versioneer.py", "anndata/_version.py", "**/test_*.py"] +omit = ["anndata/_version.py", "**/test_*.py"] [tool.pytest.ini_options] -addopts = "--doctest-modules" +addopts = [ + "--strict-markers", + "--doctest-modules", + "--ignore=anndata/core.py", # deprecated + "--ignore=anndata/readwrite.py", # deprecated +] +filterwarnings = [ + 'ignore:Support for Awkward Arrays is currently experimental', + 'ignore:Outer joins on awkward\.Arrays', +] python_files = "test_*.py" testpaths = ["anndata", "docs/concatenation.rst"] -filterwarnings = ['ignore:X\.dtype being converted to np.float32:FutureWarning'] # For some reason this effects how logging is shown when tests are run xfail_strict = true markers = ["gpu: mark test to run on GPU"]