Skip to content

Commit

Permalink
Backport PR #1537: Fix assigning and initializing with h5py datasets (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
flying-sheep authored Jun 27, 2024
1 parent 656cd64 commit 6f26e9a
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 6 deletions.
2 changes: 1 addition & 1 deletion src/anndata/_core/anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,7 +772,7 @@ def _prep_dim_index(self, value, attr: str) -> pd.Index:
if (
len(value) > 0
and not isinstance(value, pd.RangeIndex)
and infer_dtype(value) not in ("string", "bytes")
and infer_dtype(value) not in {"string", "bytes"}
):
sample = list(value[: min(len(value), 5)])
msg = dedent(
Expand Down
13 changes: 9 additions & 4 deletions src/anndata/_core/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
CupyArray,
CupySparseMatrix,
DaskArray,
H5Array,
ZappyArray,
ZarrArray,
)
Expand All @@ -26,19 +27,23 @@


class StorageType(Enum):
# Memory
Array = (np.ndarray, "np.ndarray")
Masked = (ma.MaskedArray, "numpy.ma.core.MaskedArray")
Sparse = (sparse.spmatrix, "scipy.sparse.spmatrix")
AwkArray = (AwkArray, "awkward.Array")
# Backed
HDF5Dataset = (H5Array, "h5py.Dataset")
ZarrArray = (ZarrArray, "zarr.Array")
ZappyArray = (ZappyArray, "zappy.base.ZappyArray")
DaskArray = (DaskArray, "dask.array.Array")
CupyArray = (CupyArray, "cupy.ndarray")
CupySparseMatrix = (CupySparseMatrix, "cupyx.scipy.sparse.spmatrix")
BackedSparseMatrix = (
BaseCompressedSparseDataset,
"anndata.experimental.[CSC,CSR]Dataset",
)
AwkArray = (AwkArray, "awkward.Array")
# Distributed
DaskArray = (DaskArray, "dask.array.Array")
CupyArray = (CupyArray, "cupy.ndarray")
CupySparseMatrix = (CupySparseMatrix, "cupyx.scipy.sparse.spmatrix")

@property
def cls(self):
Expand Down
4 changes: 3 additions & 1 deletion src/anndata/tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
CupyCSRMatrix,
CupySparseMatrix,
DaskArray,
ZarrArray,
)
from anndata.utils import asarray

Expand Down Expand Up @@ -413,7 +414,7 @@ def assert_equal_ndarray(a, b, exact=False, elem_name=None):
b = asarray(b)
if not exact and is_numeric_dtype(a) and is_numeric_dtype(b):
assert a.shape == b.shape, format_msg(elem_name)
assert np.allclose(a, b, equal_nan=True), format_msg(elem_name)
np.testing.assert_allclose(a, b, equal_nan=True, err_msg=format_msg(elem_name))
elif ( # Structured dtype
not exact
and hasattr(a, "dtype")
Expand Down Expand Up @@ -449,6 +450,7 @@ def assert_equal_cupy_sparse(a, b, exact=False, elem_name=None):


@assert_equal.register(h5py.Dataset)
@assert_equal.register(ZarrArray)
def assert_equal_h5py_dataset(a, b, exact=False, elem_name=None):
a = asarray(a)
assert_equal(b, a, exact, elem_name=elem_name)
Expand Down
91 changes: 91 additions & 0 deletions tests/test_backed_dense.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""Tests for backing by just sticking zarr/h5py objects into AnnData."""

from __future__ import annotations

from typing import TYPE_CHECKING, Literal

import h5py
import numpy as np
import pytest
import zarr

from anndata import AnnData
from anndata._io.specs import write_elem
from anndata.tests.helpers import assert_equal

if TYPE_CHECKING:
from pathlib import Path


@pytest.fixture(params=["h5ad", "zarr"])
def diskfmt(request):
return request.param


@pytest.fixture()
def file(tmp_path: Path, diskfmt: Literal["h5ad", "zarr"]) -> h5py.File | zarr.Group:
path = tmp_path / f"test.{diskfmt}"
if diskfmt == "zarr":
return zarr.open_group(path, "a")
if diskfmt == "h5ad":
return h5py.File(path, "a")
pytest.fail(f"Unknown diskfmt: {diskfmt}")


@pytest.mark.parametrize("assign", ["init", "assign"])
@pytest.mark.parametrize("attr", ["X", "obsm", "varm", "layers"])
def test_create_delete(
diskfmt: Literal["h5ad", "zarr"],
file: h5py.File | zarr.Group,
assign: Literal["init", "assign"],
attr: Literal["X", "obsm", "varm", "layers"],
):
x = np.random.randn(10, 10)
write_elem(file, "a", x)

# initialize (and if applicable, assign)
if assign == "init":
kw = (
dict(X=file["a"])
if attr == "X"
else {attr: dict(a=file["a"]), "shape": x.shape}
)
adata = AnnData(**kw)
elif assign == "assign":
adata = AnnData(shape=x.shape)
if attr == "X":
adata.X = file["a"]
else:
getattr(adata, attr)["a"] = file["a"]
else:
pytest.fail(f"Unexpected assign: {assign}")

# check equality
if attr == "X":
# TODO: should that be inverted, e.g. when the Dataset’s path matches the backed mode path?
assert not adata.isbacked
backed_array = adata.X
else:
backed_array = getattr(adata, attr)["a"]
assert isinstance(backed_array, zarr.Array if diskfmt == "zarr" else h5py.Dataset)
assert_equal(backed_array, x)

# check that there’s no error deleting it either
if attr == "X":
del adata.X
else:
del getattr(adata, attr)["a"]


def test_assign_x_subset(file: h5py.File | zarr.Group):
x = np.ones((10, 10))
write_elem(file, "a", x)

adata = AnnData(file["a"])

view = adata[3:7, 6:8]
view.X = np.zeros((4, 2))

expected = x.copy()
expected[3:7, 6:8] = np.zeros((4, 2))
assert_equal(adata.X, expected)
5 changes: 5 additions & 0 deletions tests/test_hdf5_backing.py → tests/test_backed_hdf5.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Tests for backing using the `.file` and `.isbacked` attributes."""

from __future__ import annotations

from pathlib import Path
Expand All @@ -18,6 +20,8 @@
from anndata.utils import asarray

subset_func2 = subset_func


# -------------------------------------------------------------------------------
# Some test data
# -------------------------------------------------------------------------------
Expand Down Expand Up @@ -99,6 +103,7 @@ def test_read_write_X(tmp_path, mtx_format, backed_mode, as_dense):


# this is very similar to the views test
@pytest.mark.filterwarnings("ignore::anndata.ImplicitModificationWarning")
def test_backing(adata, tmp_path, backing_h5ad):
assert not adata.isbacked

Expand Down

0 comments on commit 6f26e9a

Please sign in to comment.