diff --git a/src/anndata/_core/anndata.py b/src/anndata/_core/anndata.py index 2d4ba0861..cd07f5f28 100644 --- a/src/anndata/_core/anndata.py +++ b/src/anndata/_core/anndata.py @@ -772,7 +772,7 @@ def _prep_dim_index(self, value, attr: str) -> pd.Index: if ( len(value) > 0 and not isinstance(value, pd.RangeIndex) - and infer_dtype(value) not in ("string", "bytes") + and infer_dtype(value) not in {"string", "bytes"} ): sample = list(value[: min(len(value), 5)]) msg = dedent( diff --git a/src/anndata/_core/storage.py b/src/anndata/_core/storage.py index c9b09ce93..652056f8e 100644 --- a/src/anndata/_core/storage.py +++ b/src/anndata/_core/storage.py @@ -15,6 +15,7 @@ CupyArray, CupySparseMatrix, DaskArray, + H5Array, ZappyArray, ZarrArray, ) @@ -26,19 +27,23 @@ class StorageType(Enum): + # Memory Array = (np.ndarray, "np.ndarray") Masked = (ma.MaskedArray, "numpy.ma.core.MaskedArray") Sparse = (sparse.spmatrix, "scipy.sparse.spmatrix") + AwkArray = (AwkArray, "awkward.Array") + # Backed + HDF5Dataset = (H5Array, "h5py.Dataset") ZarrArray = (ZarrArray, "zarr.Array") ZappyArray = (ZappyArray, "zappy.base.ZappyArray") - DaskArray = (DaskArray, "dask.array.Array") - CupyArray = (CupyArray, "cupy.ndarray") - CupySparseMatrix = (CupySparseMatrix, "cupyx.scipy.sparse.spmatrix") BackedSparseMatrix = ( BaseCompressedSparseDataset, "anndata.experimental.[CSC,CSR]Dataset", ) - AwkArray = (AwkArray, "awkward.Array") + # Distributed + DaskArray = (DaskArray, "dask.array.Array") + CupyArray = (CupyArray, "cupy.ndarray") + CupySparseMatrix = (CupySparseMatrix, "cupyx.scipy.sparse.spmatrix") @property def cls(self): diff --git a/src/anndata/tests/helpers.py b/src/anndata/tests/helpers.py index 2402ee3fa..f8f7cbc06 100644 --- a/src/anndata/tests/helpers.py +++ b/src/anndata/tests/helpers.py @@ -25,6 +25,7 @@ CupyCSRMatrix, CupySparseMatrix, DaskArray, + ZarrArray, ) from anndata.utils import asarray @@ -413,7 +414,7 @@ def assert_equal_ndarray(a, b, exact=False, elem_name=None): b = asarray(b) if not exact and is_numeric_dtype(a) and is_numeric_dtype(b): assert a.shape == b.shape, format_msg(elem_name) - assert np.allclose(a, b, equal_nan=True), format_msg(elem_name) + np.testing.assert_allclose(a, b, equal_nan=True, err_msg=format_msg(elem_name)) elif ( # Structured dtype not exact and hasattr(a, "dtype") @@ -449,6 +450,7 @@ def assert_equal_cupy_sparse(a, b, exact=False, elem_name=None): @assert_equal.register(h5py.Dataset) +@assert_equal.register(ZarrArray) def assert_equal_h5py_dataset(a, b, exact=False, elem_name=None): a = asarray(a) assert_equal(b, a, exact, elem_name=elem_name) diff --git a/tests/test_backed_dense.py b/tests/test_backed_dense.py new file mode 100644 index 000000000..75d91ee73 --- /dev/null +++ b/tests/test_backed_dense.py @@ -0,0 +1,91 @@ +"""Tests for backing by just sticking zarr/h5py objects into AnnData.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Literal + +import h5py +import numpy as np +import pytest +import zarr + +from anndata import AnnData +from anndata._io.specs import write_elem +from anndata.tests.helpers import assert_equal + +if TYPE_CHECKING: + from pathlib import Path + + +@pytest.fixture(params=["h5ad", "zarr"]) +def diskfmt(request): + return request.param + + +@pytest.fixture() +def file(tmp_path: Path, diskfmt: Literal["h5ad", "zarr"]) -> h5py.File | zarr.Group: + path = tmp_path / f"test.{diskfmt}" + if diskfmt == "zarr": + return zarr.open_group(path, "a") + if diskfmt == "h5ad": + return h5py.File(path, "a") + pytest.fail(f"Unknown diskfmt: {diskfmt}") + + +@pytest.mark.parametrize("assign", ["init", "assign"]) +@pytest.mark.parametrize("attr", ["X", "obsm", "varm", "layers"]) +def test_create_delete( + diskfmt: Literal["h5ad", "zarr"], + file: h5py.File | zarr.Group, + assign: Literal["init", "assign"], + attr: Literal["X", "obsm", "varm", "layers"], +): + x = np.random.randn(10, 10) + write_elem(file, "a", x) + + # initialize (and if applicable, assign) + if assign == "init": + kw = ( + dict(X=file["a"]) + if attr == "X" + else {attr: dict(a=file["a"]), "shape": x.shape} + ) + adata = AnnData(**kw) + elif assign == "assign": + adata = AnnData(shape=x.shape) + if attr == "X": + adata.X = file["a"] + else: + getattr(adata, attr)["a"] = file["a"] + else: + pytest.fail(f"Unexpected assign: {assign}") + + # check equality + if attr == "X": + # TODO: should that be inverted, e.g. when the Dataset’s path matches the backed mode path? + assert not adata.isbacked + backed_array = adata.X + else: + backed_array = getattr(adata, attr)["a"] + assert isinstance(backed_array, zarr.Array if diskfmt == "zarr" else h5py.Dataset) + assert_equal(backed_array, x) + + # check that there’s no error deleting it either + if attr == "X": + del adata.X + else: + del getattr(adata, attr)["a"] + + +def test_assign_x_subset(file: h5py.File | zarr.Group): + x = np.ones((10, 10)) + write_elem(file, "a", x) + + adata = AnnData(file["a"]) + + view = adata[3:7, 6:8] + view.X = np.zeros((4, 2)) + + expected = x.copy() + expected[3:7, 6:8] = np.zeros((4, 2)) + assert_equal(adata.X, expected) diff --git a/tests/test_hdf5_backing.py b/tests/test_backed_hdf5.py similarity index 98% rename from tests/test_hdf5_backing.py rename to tests/test_backed_hdf5.py index b1c7029b7..f86634fa9 100644 --- a/tests/test_hdf5_backing.py +++ b/tests/test_backed_hdf5.py @@ -1,3 +1,5 @@ +"""Tests for backing using the `.file` and `.isbacked` attributes.""" + from __future__ import annotations from pathlib import Path @@ -18,6 +20,8 @@ from anndata.utils import asarray subset_func2 = subset_func + + # ------------------------------------------------------------------------------- # Some test data # ------------------------------------------------------------------------------- @@ -99,6 +103,7 @@ def test_read_write_X(tmp_path, mtx_format, backed_mode, as_dense): # this is very similar to the views test +@pytest.mark.filterwarnings("ignore::anndata.ImplicitModificationWarning") def test_backing(adata, tmp_path, backing_h5ad): assert not adata.isbacked