From cb2154e5ab418fd06456bcc8ae13e88e7eee67c0 Mon Sep 17 00:00:00 2001 From: "Lumberbot (aka Jack)" <39504233+meeseeksmachine@users.noreply.github.com> Date: Thu, 19 Dec 2024 08:53:29 -0800 Subject: [PATCH] Backport PR #1806: Fix backed sparse matrix compat with scipy 1.15 (#1809) Co-authored-by: Philipp A --- docs/release-notes/1806.bugfix.md | 1 + src/anndata/_core/sparse_dataset.py | 22 ++++++++++++++++++---- src/anndata/compat/__init__.py | 2 +- tests/test_backed_sparse.py | 17 ++++++++++++++--- tests/test_concatenate.py | 2 +- 5 files changed, 35 insertions(+), 9 deletions(-) create mode 100644 docs/release-notes/1806.bugfix.md diff --git a/docs/release-notes/1806.bugfix.md b/docs/release-notes/1806.bugfix.md new file mode 100644 index 000000000..c9948f825 --- /dev/null +++ b/docs/release-notes/1806.bugfix.md @@ -0,0 +1 @@ +Add {mod}`scipy` 1.5 compatibility {user}`flying-sheep` diff --git a/src/anndata/_core/sparse_dataset.py b/src/anndata/_core/sparse_dataset.py index 6099a1765..d59c8bafd 100644 --- a/src/anndata/_core/sparse_dataset.py +++ b/src/anndata/_core/sparse_dataset.py @@ -23,7 +23,9 @@ import h5py import numpy as np +import scipy import scipy.sparse as ss +from packaging.version import Version from scipy.sparse import _sparsetools from .. import abc @@ -39,11 +41,14 @@ from .._types import GroupStorageType from ..compat import H5Array - from .index import Index + from .index import Index, Index1D else: from scipy.sparse import spmatrix as _cs_matrix +SCIPY_1_15 = Version(scipy.__version__) >= Version("1.15rc0") + + class BackedFormat(NamedTuple): format: Literal["csr", "csc"] backed_type: type[BackedSparseMatrix] @@ -353,7 +358,9 @@ def _get_group_format(group: GroupStorageType) -> str: # Check for the overridden few methods above in our BackedSparseMatrix subclasses -def is_sparse_indexing_overridden(format: Literal["csr", "csc"], row, col): +def is_sparse_indexing_overridden( + format: Literal["csr", "csc"], row: Index1D, col: Index1D +): major_indexer, minor_indexer = (row, col) if format == "csr" else (col, row) return isinstance(minor_indexer, slice) and ( (isinstance(major_indexer, int | np.integer)) @@ -362,6 +369,13 @@ def is_sparse_indexing_overridden(format: Literal["csr", "csc"], row, col): ) +def validate_indices( + mtx: BackedSparseMatrix, indices: tuple[Index1D, Index1D] +) -> tuple[Index1D, Index1D]: + res = mtx._validate_indices(indices) + return res[0] if SCIPY_1_15 else res + + class BaseCompressedSparseDataset(abc._AbstractCSDataset, ABC): _group: GroupStorageType @@ -424,8 +438,8 @@ def __getitem__( indices = self._normalize_index(index) row, col = indices mtx = self._to_backed() - row_sp_matrix_validated, col_sp_matrix_validated = mtx._validate_indices( - (row, col) + row_sp_matrix_validated, col_sp_matrix_validated = validate_indices( + mtx, indices ) # Handle masked indexing along major axis diff --git a/src/anndata/compat/__init__.py b/src/anndata/compat/__init__.py index 255ffa548..981e5a04b 100644 --- a/src/anndata/compat/__init__.py +++ b/src/anndata/compat/__init__.py @@ -47,7 +47,7 @@ class Empty: pass -Index1D = slice | int | str | np.int64 | np.ndarray +Index1D = slice | int | str | np.int64 | np.ndarray | pd.Series IndexRest = Index1D | EllipsisType Index = ( IndexRest diff --git a/tests/test_backed_sparse.py b/tests/test_backed_sparse.py index 03155d0a3..499aeadbd 100644 --- a/tests/test_backed_sparse.py +++ b/tests/test_backed_sparse.py @@ -615,6 +615,13 @@ def test_backed_sizeof( assert csr_mem.__sizeof__() > csc_disk.__sizeof__() +sparray_scipy_bug_marks = ( + [pytest.mark.skip(reason="scipy bug causes view to be allocated")] + if CAN_USE_SPARSE_ARRAY + else [] +) + + @pytest.mark.parametrize( "group_fn", [ @@ -622,10 +629,14 @@ def test_backed_sizeof( pytest.param(lambda p: h5py.File(p / "test.h5", mode="a"), id="h5py"), ], ) -@pytest.mark.parametrize("sparse_class", [sparse.csr_matrix, sparse.csr_array]) +@pytest.mark.parametrize( + "sparse_class", + [ + sparse.csr_matrix, + pytest.param(sparse.csr_array, marks=[*sparray_scipy_bug_marks]), + ], +) def test_append_overflow_check(group_fn, sparse_class, tmpdir): - if CAN_USE_SPARSE_ARRAY and issubclass(sparse_class, SpArray): - pytest.skip("scipy bug causes view to be allocated") group = group_fn(tmpdir) typemax_int32 = np.iinfo(np.int32).max orig_mtx = sparse_class(np.ones((1, 1), dtype=bool)) diff --git a/tests/test_concatenate.py b/tests/test_concatenate.py index 2a2e16a5a..3dd5a361e 100644 --- a/tests/test_concatenate.py +++ b/tests/test_concatenate.py @@ -1444,7 +1444,7 @@ def test_concat_outer_aligned_mapping(elem): del b.obsm[elem] concated = concat({"a": a, "b": b}, join="outer", label="group") - result = concated.obsm[elem][concated.obs["group"] == "b"] + result = concated[concated.obs["group"] == "b"].obsm[elem] check_filled_like(result, elem_name=f"obsm/{elem}")