From 1562343e7c1740ce4bf6cf6cb96a58ee4813ecf1 Mon Sep 17 00:00:00 2001 From: Philipp A Date: Wed, 20 Dec 2023 17:39:47 +0100 Subject: [PATCH 1/6] Switch to ruff formatting (#1275) --- .gitignore | 1 + .pre-commit-config.yaml | 11 +++----- anndata/_core/merge.py | 12 ++++++--- anndata/_core/sparse_dataset.py | 12 ++++----- .../multi_files/_anncollection.py | 18 ++++++++----- docs/benchmark-read-write.ipynb | 15 ++++++----- pyproject.toml | 25 +++++++++++-------- 7 files changed, 55 insertions(+), 39 deletions(-) diff --git a/.gitignore b/.gitignore index dded609a6..88f0e90c3 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ # Caches for compiled and downloaded files __pycache__/ /*cache/ +/node_modules/ /data/ # Distribution / packaging diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 67ab9d1f4..c157b9e19 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,14 +1,12 @@ repos: - - repo: https://github.com/psf/black - rev: 23.11.0 - hooks: - - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - # Ruff version. - rev: "v0.1.7" + rev: v0.1.8 hooks: - id: ruff + types_or: [python, pyi, jupyter] args: ["--fix"] + - id: ruff-format + types_or: [python, pyi, jupyter] - repo: https://github.com/pre-commit/mirrors-prettier rev: v4.0.0-alpha.4 hooks: @@ -26,7 +24,6 @@ repos: - id: detect-private-key - id: no-commit-to-branch args: ["--branch=main"] - - repo: https://github.com/codespell-project/codespell rev: v2.2.6 hooks: diff --git a/anndata/_core/merge.py b/anndata/_core/merge.py index 66a8c3459..4463399bc 100644 --- a/anndata/_core/merge.py +++ b/anndata/_core/merge.py @@ -1108,12 +1108,18 @@ def concat( ... X=sparse.csr_matrix(np.array([[0, 1], [2, 3]])), ... obs=pd.DataFrame({"group": ["a", "b"]}, index=["s1", "s2"]), ... var=pd.DataFrame(index=["var1", "var2"]), - ... varm={"ones": np.ones((2, 5)), "rand": np.random.randn(2, 3), "zeros": np.zeros((2, 5))}, + ... varm={ + ... "ones": np.ones((2, 5)), + ... "rand": np.random.randn(2, 3), + ... "zeros": np.zeros((2, 5)), + ... }, ... uns={"a": 1, "b": 2, "c": {"c.a": 3, "c.b": 4}}, ... ) >>> b = ad.AnnData( ... X=sparse.csr_matrix(np.array([[4, 5, 6], [7, 8, 9]])), - ... obs=pd.DataFrame({"group": ["b", "c"], "measure": [1.2, 4.3]}, index=["s3", "s4"]), + ... obs=pd.DataFrame( + ... {"group": ["b", "c"], "measure": [1.2, 4.3]}, index=["s3", "s4"] + ... ), ... var=pd.DataFrame(index=["var1", "var2", "var3"]), ... varm={"ones": np.ones((3, 5)), "rand": np.random.randn(3, 5)}, ... uns={"a": 1, "b": 3, "c": {"c.b": 4}}, @@ -1147,7 +1153,7 @@ def concat( >>> (inner.obs_names, inner.var_names) # doctest: +NORMALIZE_WHITESPACE (Index(['s1', 's2', 's3', 's4'], dtype='object'), Index(['var1', 'var2'], dtype='object')) - >>> outer = ad.concat([a, b], join="outer") # Joining on union of variables + >>> outer = ad.concat([a, b], join="outer") # Joining on union of variables >>> outer AnnData object with n_obs × n_vars = 4 × 3 obs: 'group', 'measure' diff --git a/anndata/_core/sparse_dataset.py b/anndata/_core/sparse_dataset.py index 12a70a272..e88388a58 100644 --- a/anndata/_core/sparse_dataset.py +++ b/anndata/_core/sparse_dataset.py @@ -499,12 +499,12 @@ def sparse_dataset(group: ZarrGroup | H5Group) -> CSRDataset | CSCDataset: >>> import zarr >>> from anndata.experimental import sparse_dataset - >>> group = zarr.open_group('./my_test_store.zarr') - >>> group['data'] = [10, 20, 30, 40, 50, 60, 70, 80] - >>> group['indices'] = [0, 1, 1, 3, 2, 3, 4, 5] - >>> group['indptr'] = [0, 2, 4, 7, 8] - >>> group.attrs['shape'] = (4, 6) - >>> group.attrs['encoding-type'] = 'csr_matrix' + >>> group = zarr.open_group("./my_test_store.zarr") + >>> group["data"] = [10, 20, 30, 40, 50, 60, 70, 80] + >>> group["indices"] = [0, 1, 1, 3, 2, 3, 4, 5] + >>> group["indptr"] = [0, 2, 4, 7, 8] + >>> group.attrs["shape"] = (4, 6) + >>> group.attrs["encoding-type"] = "csr_matrix" >>> sparse_dataset(group) CSRDataset: backend zarr, shape (4, 6), data_dtype int64 """ diff --git a/anndata/experimental/multi_files/_anncollection.py b/anndata/experimental/multi_files/_anncollection.py index ca74092ad..09533522a 100644 --- a/anndata/experimental/multi_files/_anncollection.py +++ b/anndata/experimental/multi_files/_anncollection.py @@ -493,9 +493,12 @@ def convert(self): :: { - 'X': lambda a: a.toarray() if issparse(a) else a, # densify .X - 'obsm': lambda a: np.asarray(a, dtype='float32'), # change dtype for all keys of .obsm - 'obs': dict(key1 = lambda c: c.astype(str)) # change type only for one key of .obs + # densify .X + "X": lambda a: a.toarray() if issparse(a) else a, + # change dtype for all keys of .obsm + "obsm": lambda a: np.asarray(a, dtype="float32"), + # change type only for one key of .obs + "obs": dict(key1=lambda c: c.astype(str)), } """ return self._convert @@ -817,9 +820,12 @@ def convert(self): :: { - 'X': lambda a: a.toarray() if issparse(a) else a, # densify .X - 'obsm': lambda a: np.asarray(a, dtype='float32'), # change dtype for all keys of .obsm - 'obs': dict(key1 = lambda c: c.astype(str)) # change type only for one key of .obs + # densify .X + "X": lambda a: a.toarray() if issparse(a) else a, + # change dtype for all keys of .obsm + "obsm": lambda a: np.asarray(a, dtype="float32"), + # change type only for one key of .obs + "obs": dict(key1=lambda c: c.astype(str)), } """ return self._convert diff --git a/docs/benchmark-read-write.ipynb b/docs/benchmark-read-write.ipynb index 3420ebc9a..886bfa0f6 100644 --- a/docs/benchmark-read-write.ipynb +++ b/docs/benchmark-read-write.ipynb @@ -20,8 +20,11 @@ "metadata": {}, "outputs": [], "source": [ - "import anndata as ad\n", - "import scanpy as sc" + "from __future__ import annotations\n", + "\n", + "import scanpy as sc\n", + "\n", + "import anndata as ad" ] }, { @@ -84,7 +87,7 @@ ], "source": [ "%%time\n", - "adata.write('test.h5ad')" + "adata.write(\"test.h5ad\")" ] }, { @@ -103,7 +106,7 @@ ], "source": [ "%%time\n", - "adata = ad.read_h5ad('test.h5ad')" + "adata = ad.read_h5ad(\"test.h5ad\")" ] }, { @@ -129,7 +132,7 @@ ], "source": [ "%%time\n", - "adata.write_loom('test.loom')" + "adata.write_loom(\"test.loom\")" ] }, { @@ -156,7 +159,7 @@ ], "source": [ "%%time\n", - "adata = ad.read_loom('test.loom')" + "adata = ad.read_loom(\"test.loom\")" ] } ], diff --git a/pyproject.toml b/pyproject.toml index fc88e76a6..c705aff43 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -133,15 +133,10 @@ testpaths = ["anndata", "docs/concatenation.rst"] xfail_strict = true markers = ["gpu: mark test to run on GPU"] -[tool.ruff] -ignore = [ - # line too long -> we accept long comment lines; black gets rid of long code lines - "E501", - # Do not assign a lambda expression, use a def -> AnnData allows lambda expression assignments, - "E731", - # allow I, O, l as variable names -> I is the identity matrix, i, j, k, l is reasonable indexing notation - "E741", -] +[tool.ruff.format] +docstring-code-format = true + +[tool.ruff.lint] select = [ "E", # Error detected by Pycodestyle "F", # Errors detected by Pyflakes @@ -152,10 +147,18 @@ select = [ "ICN", # Follow import conventions "PTH", # Pathlib instead of os.path ] -[tool.ruff.per-file-ignores] +ignore = [ + # line too long -> we accept long comment lines; formatter gets rid of long code lines + "E501", + # Do not assign a lambda expression, use a def -> AnnData allows lambda expression assignments, + "E731", + # allow I, O, l as variable names -> I is the identity matrix, i, j, k, l is reasonable indexing notation + "E741", +] +[tool.ruff.lint.per-file-ignores] # E721 comparing types, but we specifically are checking that we aren't getting subtypes (views) "anndata/tests/test_readwrite.py" = ["E721"] -[tool.ruff.isort] +[tool.ruff.lint.isort] known-first-party = ["anndata"] required-imports = ["from __future__ import annotations"] From 63e6b29b8d73bee56f37487e361dd9b8d9d599e2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 26 Dec 2023 13:57:49 +0100 Subject: [PATCH 2/6] [pre-commit.ci] pre-commit autoupdate (#1278) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c157b9e19..f9626c3d4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.8 + rev: v0.1.9 hooks: - id: ruff types_or: [python, pyi, jupyter] @@ -8,7 +8,7 @@ repos: - id: ruff-format types_or: [python, pyi, jupyter] - repo: https://github.com/pre-commit/mirrors-prettier - rev: v4.0.0-alpha.4 + rev: v4.0.0-alpha.8 hooks: - id: prettier - repo: https://github.com/pre-commit/pre-commit-hooks From 45a65f08d0cb5e80a443c6054b59287f94ec48a6 Mon Sep 17 00:00:00 2001 From: Philipp A Date: Thu, 4 Jan 2024 14:04:21 +0100 Subject: [PATCH 3/6] Fix IO error reporting (#1273) --- .pre-commit-config.yaml | 2 + anndata/_core/anndata.py | 2 +- anndata/_io/h5ad.py | 19 ++++--- anndata/_io/specs/registry.py | 81 +++++++++++++--------------- anndata/_io/utils.py | 63 ++++++++++++---------- anndata/_io/zarr.py | 2 +- anndata/compat/__init__.py | 17 +++++- anndata/experimental/_dispatch_io.py | 4 +- anndata/tests/test_awkward.py | 13 ++--- anndata/tests/test_io_utils.py | 37 +++++++++---- anndata/tests/test_readwrite.py | 2 +- docs/release-notes/0.10.4.md | 1 + pyproject.toml | 2 +- 13 files changed, 140 insertions(+), 105 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f9626c3d4..2ce52a6d7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,6 +11,8 @@ repos: rev: v4.0.0-alpha.8 hooks: - id: prettier + exclude_types: + - markdown - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.5.0 hooks: diff --git a/anndata/_core/anndata.py b/anndata/_core/anndata.py index b37e33177..a2b010bdf 100644 --- a/anndata/_core/anndata.py +++ b/anndata/_core/anndata.py @@ -74,7 +74,7 @@ class StorageType(Enum): DaskArray = DaskArray CupyArray = CupyArray CupySparseMatrix = CupySparseMatrix - BackedSparseMAtrix = BaseCompressedSparseDataset + BackedSparseMatrix = BaseCompressedSparseDataset @classmethod def classes(cls): diff --git a/anndata/_io/h5ad.py b/anndata/_io/h5ad.py index 337f13f92..5f31da04a 100644 --- a/anndata/_io/h5ad.py +++ b/anndata/_io/h5ad.py @@ -6,6 +6,7 @@ from types import MappingProxyType from typing import ( TYPE_CHECKING, + Any, Callable, Literal, TypeVar, @@ -112,7 +113,13 @@ def write_h5ad( @report_write_key_on_error @write_spec(IOSpec("array", "0.2.0")) -def write_sparse_as_dense(f, key, value, dataset_kwargs=MappingProxyType({})): +def write_sparse_as_dense( + f: h5py.Group, + key: str, + value: sparse.spmatrix | BaseCompressedSparseDataset, + *, + dataset_kwargs: Mapping[str, Any] = MappingProxyType({}), +): real_key = None # Flag for if temporary key was used if key in f: if isinstance(value, BaseCompressedSparseDataset) and ( @@ -269,7 +276,7 @@ def callback(func, elem_name: str, elem, iospec): def _read_raw( f: h5py.File | AnnDataFileManager, as_sparse: Collection[str] = (), - rdasp: Callable[[h5py.Dataset], sparse.spmatrix] = None, + rdasp: Callable[[h5py.Dataset], sparse.spmatrix] | None = None, *, attrs: Collection[str] = ("X", "var", "varm"), ) -> dict: @@ -286,7 +293,7 @@ def _read_raw( @report_read_key_on_error -def read_dataframe_legacy(dataset) -> pd.DataFrame: +def read_dataframe_legacy(dataset: h5py.Dataset) -> pd.DataFrame: """Read pre-anndata 0.7 dataframes.""" warn( f"'{dataset.name}' was written with a very old version of AnnData. " @@ -305,7 +312,7 @@ def read_dataframe_legacy(dataset) -> pd.DataFrame: return df -def read_dataframe(group) -> pd.DataFrame: +def read_dataframe(group: h5py.Group | h5py.Dataset) -> pd.DataFrame: """Backwards compat function""" if not isinstance(group, h5py.Group): return read_dataframe_legacy(group) @@ -352,7 +359,7 @@ def read_dense_as_sparse( raise ValueError(f"Cannot read dense array as type: {sparse_format}") -def read_dense_as_csr(dataset, axis_chunk=6000): +def read_dense_as_csr(dataset: h5py.Dataset, axis_chunk: int = 6000): sub_matrices = [] for idx in idx_chunks_along_axis(dataset.shape, 0, axis_chunk): dense_chunk = dataset[idx] @@ -361,7 +368,7 @@ def read_dense_as_csr(dataset, axis_chunk=6000): return sparse.vstack(sub_matrices, format="csr") -def read_dense_as_csc(dataset, axis_chunk=6000): +def read_dense_as_csc(dataset: h5py.Dataset, axis_chunk: int = 6000): sub_matrices = [] for idx in idx_chunks_along_axis(dataset.shape, 1, axis_chunk): sub_matrix = sparse.csc_matrix(dataset[idx]) diff --git a/anndata/_io/specs/registry.py b/anndata/_io/specs/registry.py index 1f0b137f4..a8357295d 100644 --- a/anndata/_io/specs/registry.py +++ b/anndata/_io/specs/registry.py @@ -1,6 +1,6 @@ from __future__ import annotations -from collections.abc import Callable, Iterable, Mapping +from collections.abc import Mapping from dataclasses import dataclass from functools import singledispatch, wraps from types import MappingProxyType @@ -10,12 +10,13 @@ from anndata.compat import _read_attr if TYPE_CHECKING: + from collections.abc import Callable, Generator, Iterable + from anndata._types import GroupStorageType, StorageType + # TODO: This probably should be replaced by a hashable Mapping due to conversion b/w "_" and "-" # TODO: Should filetype be included in the IOSpec if it changes the encoding? Or does the intent that these things be "the same" overrule that? - - @dataclass(frozen=True) class IOSpec: encoding_type: str @@ -25,7 +26,9 @@ class IOSpec: # TODO: Should this subclass from LookupError? class IORegistryError(Exception): @classmethod - def _from_write_parts(cls, dest_type, typ, modifiers) -> IORegistryError: + def _from_write_parts( + cls, dest_type: type, typ: type, modifiers: frozenset[str] + ) -> IORegistryError: msg = f"No method registered for writing {typ} into {dest_type}" if modifiers: msg += f" with {modifiers}" @@ -36,7 +39,7 @@ def _from_read_parts( cls, method: str, registry: Mapping, - src_typ: StorageType, + src_typ: type[StorageType], spec: IOSpec, ) -> IORegistryError: # TODO: Improve error message if type exists, but version does not @@ -50,7 +53,7 @@ def _from_read_parts( def write_spec(spec: IOSpec): def decorator(func: Callable): @wraps(func) - def wrapper(g, k, *args, **kwargs): + def wrapper(g: GroupStorageType, k: str, *args, **kwargs): result = func(g, k, *args, **kwargs) g[k].attrs.setdefault("encoding-type", spec.encoding_type) g[k].attrs.setdefault("encoding-version", spec.encoding_version) @@ -193,12 +196,12 @@ def proc_spec(spec) -> IOSpec: @proc_spec.register(IOSpec) -def proc_spec_spec(spec) -> IOSpec: +def proc_spec_spec(spec: IOSpec) -> IOSpec: return spec @proc_spec.register(Mapping) -def proc_spec_mapping(spec) -> IOSpec: +def proc_spec_mapping(spec: Mapping[str, str]) -> IOSpec: return IOSpec(**{k.replace("-", "_"): v for k, v in spec.items()}) @@ -213,7 +216,9 @@ def get_spec( ) -def _iter_patterns(elem): +def _iter_patterns( + elem, +) -> Generator[tuple[type, type | str] | tuple[type, type, str], None, None]: """Iterates over possible patterns for an element in order of precedence.""" from anndata.compat import DaskArray @@ -236,40 +241,27 @@ def __init__(self, registry: IORegistry, callback: Callable | None = None) -> No def read_elem( self, elem: StorageType, - modifiers: frozenset(str) = frozenset(), + modifiers: frozenset[str] = frozenset(), ) -> Any: """Read an element from a store. See exported function for more details.""" from functools import partial - read_func = self.registry.get_reader( - type(elem), get_spec(elem), frozenset(modifiers) + iospec = get_spec(elem) + read_func = partial( + self.registry.get_reader(type(elem), iospec, modifiers), + _reader=self, ) - read_func = partial(read_func, _reader=self) - if self.callback is not None: - return self.callback(read_func, elem.name, elem, iospec=get_spec(elem)) - else: + if self.callback is None: return read_func(elem) + return self.callback(read_func, elem.name, elem, iospec=iospec) class Writer: - def __init__( - self, - registry: IORegistry, - callback: Callable[ - [ - GroupStorageType, - str, - StorageType, - dict, - ], - None, - ] - | None = None, - ): + def __init__(self, registry: IORegistry, callback: Callable | None = None): self.registry = registry self.callback = callback - def find_writer(self, dest_type, elem, modifiers): + def find_writer(self, dest_type: type, elem, modifiers: frozenset[str]): for pattern in _iter_patterns(elem): if self.registry.has_writer(dest_type, pattern, modifiers): return self.registry.get_writer(dest_type, pattern, modifiers) @@ -281,10 +273,10 @@ def write_elem( self, store: GroupStorageType, k: str, - elem, + elem: Any, *, - dataset_kwargs=MappingProxyType({}), - modifiers=frozenset(), + dataset_kwargs: Mapping[str, Any] = MappingProxyType({}), + modifiers: frozenset[str] = frozenset(), ): from functools import partial from pathlib import PurePosixPath @@ -313,17 +305,16 @@ def write_elem( _writer=self, ) - if self.callback is not None: - return self.callback( - write_func, - store, - k, - elem, - dataset_kwargs=dataset_kwargs, - iospec=self.registry.get_spec(elem), - ) - else: + if self.callback is None: return write_func(store, k, elem, dataset_kwargs=dataset_kwargs) + return self.callback( + write_func, + store, + k, + elem, + dataset_kwargs=dataset_kwargs, + iospec=self.registry.get_spec(elem), + ) def read_elem(elem: StorageType) -> Any: @@ -346,7 +337,7 @@ def write_elem( k: str, elem: Any, *, - dataset_kwargs: Mapping = MappingProxyType({}), + dataset_kwargs: Mapping[str, Any] = MappingProxyType({}), ) -> None: """ Write an element to a storage group using anndata encoding. diff --git a/anndata/_io/utils.py b/anndata/_io/utils.py index cd90be473..6f46ad7eb 100644 --- a/anndata/_io/utils.py +++ b/anndata/_io/utils.py @@ -1,15 +1,19 @@ from __future__ import annotations from functools import wraps -from typing import Callable, Literal +from typing import TYPE_CHECKING, Callable, Literal, Union, cast from warnings import warn import h5py from packaging.version import Version -from anndata.compat import H5Group, ZarrGroup, add_note - from .._core.sparse_dataset import BaseCompressedSparseDataset +from ..compat import H5Group, ZarrGroup, add_note, pairwise + +if TYPE_CHECKING: + from .._types import StorageType + + Storage = Union[StorageType, BaseCompressedSparseDataset] # For allowing h5py v3 # https://github.com/scverse/anndata/issues/442 @@ -151,31 +155,26 @@ class AnnDataReadError(OSError): pass -def _get_parent(elem): - try: - import zarr - except ImportError: - zarr = None - if zarr and isinstance(elem, (zarr.Group, zarr.Array)): - parent = elem.store # Not sure how to always get a name out of this - elif isinstance(elem, BaseCompressedSparseDataset): - parent = elem.group.file.name - else: - parent = elem.file.name - return parent +def _get_display_path(store: Storage) -> str: + """Return an absolute path of an element (always starts with “/”).""" + if isinstance(store, BaseCompressedSparseDataset): + store = store.group + path = store.name or "??" # can be None + return f'/{path.removeprefix("/")}' -def add_key_note(e: BaseException, elem, key, op=Literal["read", "writ"]) -> None: +def add_key_note( + e: BaseException, store: Storage, path: str, key: str, op: Literal["read", "writ"] +) -> None: if any( f"Error raised while {op}ing key" in note for note in getattr(e, "__notes__", []) ): return - parent = _get_parent(elem) - add_note( - e, - f"Error raised while {op}ing key {key!r} of {type(elem)} to " f"{parent}", - ) + + dir = "to" if op == "writ" else "from" + msg = f"Error raised while {op}ing key {key!r} of {type(store)} {dir} {path}" + add_note(e, msg) def report_read_key_on_error(func): @@ -198,13 +197,17 @@ def func_wrapper(*args, **kwargs): from anndata._io.specs import Reader # Figure out signature (method vs function) by going through args - for elem in args: - if not isinstance(elem, Reader): + for arg in args: + if not isinstance(arg, Reader): + store = cast("Storage", arg) break + else: + raise ValueError("No element found in args.") try: return func(*args, **kwargs) except Exception as e: - add_key_note(e, elem, elem.name, "read") + path, key = _get_display_path(store).rsplit("/", 1) + add_key_note(e, store, path or "/", key, "read") raise return func_wrapper @@ -230,15 +233,17 @@ def func_wrapper(*args, **kwargs): from anndata._io.specs import Writer # Figure out signature (method vs function) by going through args - for i in range(len(args)): - elem = args[i] - key = args[i + 1] - if not isinstance(elem, Writer): + for arg, key in pairwise(args): + if not isinstance(arg, Writer): + store = cast("Storage", arg) break + else: + raise ValueError("No element found in args.") try: return func(*args, **kwargs) except Exception as e: - add_key_note(e, elem, key, "writ") + path = _get_display_path(store) + add_key_note(e, store, path, key, "writ") raise return func_wrapper diff --git a/anndata/_io/zarr.py b/anndata/_io/zarr.py index 00f9766f0..864475848 100644 --- a/anndata/_io/zarr.py +++ b/anndata/_io/zarr.py @@ -133,7 +133,7 @@ def read_dataframe_legacy(dataset: zarr.Array) -> pd.DataFrame: @report_read_key_on_error -def read_dataframe(group) -> pd.DataFrame: +def read_dataframe(group: zarr.Group | zarr.Array) -> pd.DataFrame: # Fast paths if isinstance(group, zarr.Array): return read_dataframe_legacy(group) diff --git a/anndata/compat/__init__.py b/anndata/compat/__init__.py index 24f3875b3..39323d73a 100644 --- a/anndata/compat/__init__.py +++ b/anndata/compat/__init__.py @@ -1,6 +1,7 @@ from __future__ import annotations import os +import sys from codecs import decode from collections.abc import Mapping from contextlib import AbstractContextManager @@ -35,9 +36,9 @@ class Empty: ############################# -try: +if sys.version_info >= (3, 11): from contextlib import chdir -except ImportError: # Python < 3.11 +else: @dataclass class chdir(AbstractContextManager): @@ -52,6 +53,18 @@ def __exit__(self, *_exc_info) -> None: os.chdir(self._old_cwd.pop()) +if sys.version_info >= (3, 10): + from itertools import pairwise +else: + + def pairwise(iterable): + from itertools import tee + + a, b = tee(iterable) + next(b, None) + return zip(a, b) + + ############################# # Optional deps ############################# diff --git a/anndata/experimental/_dispatch_io.py b/anndata/experimental/_dispatch_io.py index 4df4d417a..2a399d540 100644 --- a/anndata/experimental/_dispatch_io.py +++ b/anndata/experimental/_dispatch_io.py @@ -4,6 +4,8 @@ from typing import TYPE_CHECKING, Any, Callable if TYPE_CHECKING: + from collections.abc import Mapping + from anndata._io.specs import IOSpec from anndata._types import GroupStorageType, StorageType @@ -55,7 +57,7 @@ def write_dispatched( None, ], *, - dataset_kwargs=MappingProxyType({}), + dataset_kwargs: Mapping[str, Any] = MappingProxyType({}), ) -> None: """ Write elem to store, recursively calling callback at each sub-element. diff --git a/anndata/tests/test_awkward.py b/anndata/tests/test_awkward.py index 9e780c8a8..2e996bfc6 100644 --- a/anndata/tests/test_awkward.py +++ b/anndata/tests/test_awkward.py @@ -1,7 +1,7 @@ """Tests related to awkward arrays""" from __future__ import annotations -from contextlib import nullcontext +import warnings import numpy as np import numpy.testing as npt @@ -381,15 +381,12 @@ def test_concat_mixed_types(key, arrays, expected, join): to_concat.append(tmp_adata) if isinstance(expected, type) and issubclass(expected, Exception): - ctx = ( - pytest.warns( + with pytest.raises(expected), warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + r"The behavior of DataFrame concatenation with empty or all-NA entries is deprecated", FutureWarning, - match=r"The behavior of DataFrame concatenation with empty or all-NA entries is deprecated", ) - if any(df.empty for df in arrays if isinstance(df, pd.DataFrame)) - else nullcontext() - ) - with pytest.raises(expected), ctx: anndata.concat(to_concat, axis=axis, join=join) else: result_adata = anndata.concat(to_concat, axis=axis, join=join) diff --git a/anndata/tests/test_io_utils.py b/anndata/tests/test_io_utils.py index c70091474..803a4ad72 100644 --- a/anndata/tests/test_io_utils.py +++ b/anndata/tests/test_io_utils.py @@ -1,6 +1,7 @@ from __future__ import annotations -from contextlib import suppress +from contextlib import AbstractContextManager, suppress +from typing import TYPE_CHECKING import h5py import pandas as pd @@ -9,13 +10,15 @@ import anndata as ad from anndata._io.specs.registry import IORegistryError -from anndata._io.utils import ( - report_read_key_on_error, -) +from anndata._io.utils import report_read_key_on_error from anndata.compat import _clean_uns from anndata.experimental import read_elem, write_elem from anndata.tests.helpers import pytest_8_raises +if TYPE_CHECKING: + from collections.abc import Callable + from pathlib import Path + @pytest.fixture(params=["h5ad", "zarr"]) def diskfmt(request): @@ -29,20 +32,32 @@ def diskfmt(request): pytest.param(lambda p: h5py.File(p / "test.h5", mode="a"), id="h5py"), ], ) -def test_key_error(tmp_path, group_fn): +@pytest.mark.parametrize("nested", [True, False], ids=["nested", "root"]) +def test_key_error( + tmp_path, group_fn: Callable[[Path], zarr.Group | h5py.Group], nested: bool +): @report_read_key_on_error def read_attr(_): raise NotImplementedError() group = group_fn(tmp_path) - with group if hasattr(group, "__enter__") else suppress(): + with group if isinstance(group, AbstractContextManager) else suppress(): + if nested: + group = group.create_group("nested") + path = "/nested" + else: + path = "/" group["X"] = [1, 2, 3] group.create_group("group") - with pytest_8_raises(NotImplementedError, match=r"/X"): + with pytest_8_raises( + NotImplementedError, match=rf"reading key 'X'.*from {path}$" + ): read_attr(group["X"]) - with pytest_8_raises(NotImplementedError, match=r"/group"): + with pytest_8_raises( + NotImplementedError, match=rf"reading key 'group'.*from {path}$" + ): read_attr(group["group"]) @@ -53,7 +68,9 @@ def test_write_error_info(diskfmt, tmp_path): # Assuming we don't define a writer for tuples a = ad.AnnData(uns={"a": {"b": {"c": (1, 2, 3)}}}) - with pytest_8_raises(IORegistryError, match=r"Error raised while writing key 'c'"): + with pytest_8_raises( + IORegistryError, match=r"Error raised while writing key 'c'.*to /uns/a/b" + ): write(a) @@ -89,7 +106,7 @@ class Foo: # (?!...) is a negative lookahead # (?s) enables the dot to match newlines # https://stackoverflow.com/a/406408/130164 <- copilot suggested lol - pattern = r"(?s)((?!Error raised while writing key '/?a').)*$" + pattern = r"(?s)^((?!Error raised while writing key '/?a').)*$" with pytest_8_raises(IORegistryError, match=pattern): write_elem(group, "/", {"a": {"b": Foo()}}) diff --git a/anndata/tests/test_readwrite.py b/anndata/tests/test_readwrite.py index 22b1aaffc..67341fe57 100644 --- a/anndata/tests/test_readwrite.py +++ b/anndata/tests/test_readwrite.py @@ -277,7 +277,7 @@ def test_read_full_io_error(tmp_path, name, read, write): store["obs"].attrs["encoding-type"] = "invalid" with pytest_8_raises( IORegistryError, - match=r"raised while reading key '/obs'", + match=r"raised while reading key 'obs'.*from /$", ) as exc_info: read(path) assert re.search( diff --git a/docs/release-notes/0.10.4.md b/docs/release-notes/0.10.4.md index 2739f5c7f..ee1560ebe 100644 --- a/docs/release-notes/0.10.4.md +++ b/docs/release-notes/0.10.4.md @@ -7,6 +7,7 @@ * `adata[:, []]` now returns an `AnnData` object empty on the appropriate dimensions instead of erroring {pr}`1243` {user}`ilan-gold` * `adata.X[mask]` works in newer `numpy` versions when `X` is `backed` {pr}`1255` {user}`ilan-gold` * `adata.X[...]` fixed for `X` as a `BaseCompressedSparseDataset` with `zarr` backend {pr}`1265` {user}`ilan-gold` +* Improve read/write error reporting {pr}`1273` {user}`flying-sheep` ```{rubric} Documentation ``` diff --git a/pyproject.toml b/pyproject.toml index c705aff43..ffb275bde 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,7 +80,7 @@ doc = [ ] test = [ "loompy>=3.0.5", - "pytest>=6.0", + "pytest >=6.0, !=8.0.0rc1", # https://github.com/pytest-dev/pytest/issues/11759 "pytest-cov>=2.10", "zarr", "matplotlib", From 4a1f87fd7b363eb7dd60b120051f57ccdd18149e Mon Sep 17 00:00:00 2001 From: Philipp A Date: Thu, 4 Jan 2024 15:10:10 +0100 Subject: [PATCH 4/6] Prepare v0.10.4 release (#1280) --- docs/release-notes/0.10.4.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/release-notes/0.10.4.md b/docs/release-notes/0.10.4.md index ee1560ebe..5588f534a 100644 --- a/docs/release-notes/0.10.4.md +++ b/docs/release-notes/0.10.4.md @@ -1,4 +1,4 @@ -### 0.10.4 {small}`the future` +### 0.10.4 {small}`2024-01-04` ```{rubric} Bugfix ``` @@ -12,6 +12,3 @@ ```{rubric} Documentation ``` * Improve aligned mapping error messages {pr}`1252` {user}`flying-sheep` - -```{rubric} Performance -``` From 2abc89a38c6222e3b782bd23041a0cdfe0af1f4f Mon Sep 17 00:00:00 2001 From: Philipp A Date: Thu, 4 Jan 2024 15:30:52 +0100 Subject: [PATCH 5/6] Start 0.10.5 (#1283) --- docs/release-notes/0.10.5.md | 10 ++++++++++ docs/release-notes/release-latest.md | 3 +++ 2 files changed, 13 insertions(+) create mode 100644 docs/release-notes/0.10.5.md diff --git a/docs/release-notes/0.10.5.md b/docs/release-notes/0.10.5.md new file mode 100644 index 000000000..5c229cd17 --- /dev/null +++ b/docs/release-notes/0.10.5.md @@ -0,0 +1,10 @@ +### 0.10.5 {small}`the future` + +```{rubric} Bugfix +``` + +```{rubric} Documentation +``` + +```{rubric} Performance +``` diff --git a/docs/release-notes/release-latest.md b/docs/release-notes/release-latest.md index 3f0a65d08..9f6682d3f 100644 --- a/docs/release-notes/release-latest.md +++ b/docs/release-notes/release-latest.md @@ -5,6 +5,9 @@ ## Version 0.10 +```{include} /release-notes/0.10.5.md +``` + ```{include} /release-notes/0.10.4.md ``` From 522d7ea288d4d203101aee0847d3df307f10135f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Jan 2024 09:07:00 +0100 Subject: [PATCH 6/6] [pre-commit.ci] pre-commit autoupdate (#1289) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2ce52a6d7..0d365144a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.9 + rev: v0.1.11 hooks: - id: ruff types_or: [python, pyi, jupyter]