Skip to content

Commit

Permalink
Merge branch 'main' into repeated-cols
Browse files Browse the repository at this point in the history
  • Loading branch information
ivirshup committed Jan 26, 2024
2 parents b2deb2a + 299ca97 commit bebace0
Show file tree
Hide file tree
Showing 12 changed files with 112 additions and 57 deletions.
1 change: 1 addition & 0 deletions .azure-pipelines.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
trigger:
- main
- "*.*.x"

variables:
PIP_CACHE_DIR: $(Pipeline.Workspace)/.pip
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Benchmark

on:
push:
branches: [main]
branches: [main, "[0-9]+.[0-9]+.x"]
pull_request:
branches: [main]

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test-gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: AWS GPU

on:
push:
branches: [main]
branches: [main, "[0-9]+.[0-9]+.x"]
pull_request:
types:
- labeled
Expand Down
17 changes: 4 additions & 13 deletions anndata/_core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def _normalize_index(
| np.integer
| int
| str
| Sequence[int | np.integer]
| Sequence[bool | int | np.integer]
| np.ndarray
| pd.Index,
index: pd.Index,
Expand Down Expand Up @@ -80,18 +80,9 @@ def name_idx(i):
indexer = indexer.toarray()
indexer = np.ravel(indexer)
if not isinstance(indexer, (np.ndarray, pd.Index)):
dtype = "int"
if (
all(isinstance(x, str) for x in indexer) and len(indexer) > 0
): # if not all, but any, then dtype=int will cause an error
dtype = "object"
try:
indexer = np.array(indexer, dtype=dtype)
except ValueError as e:
if str(e).startswith("invalid literal for"):
msg = "Mixed type list indexers not supported."
raise ValueError(msg) from e
raise e
indexer = np.array(indexer)
if len(indexer) == 0:
indexer = indexer.astype(int)
if issubclass(indexer.dtype.type, (np.integer, np.floating)):
return indexer # Might not work for range indexes
elif issubclass(indexer.dtype.type, np.bool_):
Expand Down
14 changes: 10 additions & 4 deletions anndata/_core/sparse_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@ def _get_sliceXslice(self, row: slice, col: slice) -> ss.csr_matrix:

def _get_arrayXslice(self, row: Sequence[int], col: slice) -> ss.csr_matrix:
idxs = np.asarray(row)
if len(idxs) == 0:
return ss.csr_matrix((0, self.shape[1]))
if idxs.dtype == bool:
idxs = np.where(idxs)
return ss.csr_matrix(
Expand Down Expand Up @@ -214,6 +216,8 @@ def _get_sliceXslice(self, row: slice, col: slice) -> ss.csc_matrix:

def _get_sliceXarray(self, row: slice, col: Sequence[int]) -> ss.csc_matrix:
idxs = np.asarray(col)
if len(idxs) == 0:
return ss.csc_matrix((self.shape[0], 0))
if idxs.dtype == bool:
idxs = np.where(idxs)
return ss.csc_matrix(
Expand Down Expand Up @@ -290,10 +294,12 @@ def mean_slice_length(slices):
return floor((slices[-1].stop - slices[0].start) / len(slices))

# heuristic for whether slicing should be optimized
if mean_slice_length(slices) <= 7:
return get_compressed_vectors(mtx, np.where(mask)[0])
else:
return get_compressed_vectors_for_slices(mtx, slices)
if len(slices) > 0:
if mean_slice_length(slices) <= 7:
return get_compressed_vectors(mtx, np.where(mask)[0])
else:
return get_compressed_vectors_for_slices(mtx, slices)
return [], [], [0]


def get_format(data: ss.spmatrix) -> str:
Expand Down
48 changes: 33 additions & 15 deletions anndata/tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import numpy as np
import pandas as pd
import pytest
import zarr
from pandas.api.types import is_numeric_dtype
from scipy import sparse

Expand Down Expand Up @@ -284,6 +283,10 @@ def array_bool_subset(index, min_size=2):
return b


def list_bool_subset(index, min_size=2):
return array_bool_subset(index, min_size=min_size).tolist()


def matrix_bool_subset(index, min_size=2):
with warnings.catch_warnings():
warnings.simplefilter("ignore", PendingDeprecationWarning)
Expand Down Expand Up @@ -321,6 +324,10 @@ def array_int_subset(index, min_size=2):
)


def list_int_subset(index, min_size=2):
return array_int_subset(index, min_size=min_size).tolist()


def slice_subset(index, min_size=2):
while True:
points = np.random.choice(np.arange(len(index) + 1), size=2, replace=False)
Expand All @@ -340,7 +347,9 @@ def single_subset(index):
slice_subset,
single_subset,
array_int_subset,
list_int_subset,
array_bool_subset,
list_bool_subset,
matrix_bool_subset,
spmatrix_bool_subset,
]
Expand Down Expand Up @@ -749,21 +758,30 @@ def shares_memory_sparse(x, y):
),
]

try:
import zarr

class AccessTrackingStore(zarr.DirectoryStore):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._access_count = {}

class AccessTrackingStore(zarr.DirectoryStore):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._access_count = {}
def __getitem__(self, key):
for tracked in self._access_count:
if tracked in key:
self._access_count[tracked] += 1
return super().__getitem__(key)

def __getitem__(self, key):
for tracked in self._access_count:
if tracked in key:
self._access_count[tracked] += 1
return super().__getitem__(key)
def get_access_count(self, key):
return self._access_count[key]

def get_access_count(self, key):
return self._access_count[key]
def set_key_trackers(self, keys_to_track):
for k in keys_to_track:
self._access_count[k] = 0
except ImportError:

def set_key_trackers(self, keys_to_track):
for k in keys_to_track:
self._access_count[k] = 0
class AccessTrackingStore:
def __init__(self, *_args, **_kwargs) -> None:
raise ImportError(
"zarr must be imported to create an `AccessTrackingStore` instance."
)
25 changes: 24 additions & 1 deletion anndata/tests/test_backed_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ def diskfmt(request):
return request.param


M = 50
N = 50


@pytest.fixture(scope="function")
def ondisk_equivalent_adata(
tmp_path: Path, diskfmt: Literal["h5ad", "zarr"]
Expand All @@ -37,7 +41,7 @@ def ondisk_equivalent_adata(

write = lambda x, pth, **kwargs: getattr(x, f"write_{diskfmt}")(pth, **kwargs)

csr_mem = ad.AnnData(X=sparse.random(50, 50, format="csr", density=0.1))
csr_mem = ad.AnnData(X=sparse.random(M, N, format="csr", density=0.1))
csc_mem = ad.AnnData(X=csr_mem.X.tocsc())
dense_mem = ad.AnnData(X=csr_mem.X.toarray())

Expand Down Expand Up @@ -77,6 +81,25 @@ def callback(func, elem_name, elem, iospec):
return csr_mem, csr_disk, csc_disk, dense_disk


@pytest.mark.parametrize(
"empty_mask", [[], np.zeros(M, dtype=bool)], ids=["empty_list", "empty_bool_mask"]
)
def test_empty_backed_indexing(
ondisk_equivalent_adata: tuple[AnnData, AnnData, AnnData, AnnData],
empty_mask,
):
csr_mem, csr_disk, csc_disk, _ = ondisk_equivalent_adata

assert_equal(csr_mem.X[empty_mask], csr_disk.X[empty_mask])
assert_equal(csr_mem.X[:, empty_mask], csc_disk.X[:, empty_mask])

# The following do not work because of https://github.com/scipy/scipy/issues/19919
# Our implementation returns a (0,0) sized matrix but scipy does (1,0).

# assert_equal(csr_mem.X[empty_mask, empty_mask], csr_disk.X[empty_mask, empty_mask])
# assert_equal(csr_mem.X[empty_mask, empty_mask], csc_disk.X[empty_mask, empty_mask])


def test_backed_indexing(
ondisk_equivalent_adata: tuple[AnnData, AnnData, AnnData, AnnData],
subset_func,
Expand Down
4 changes: 2 additions & 2 deletions anndata/tests/test_hdf5_backing.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,8 @@ def test_backed_raw_subset(tmp_path, array_type, subset_func, subset_func2):
var_idx = subset_func2(mem_adata.var_names)
if (
array_type is asarray
and isinstance(obs_idx, (np.ndarray, sparse.spmatrix))
and isinstance(var_idx, (np.ndarray, sparse.spmatrix))
and isinstance(obs_idx, (list, np.ndarray, sparse.spmatrix))
and isinstance(var_idx, (list, np.ndarray, sparse.spmatrix))
):
pytest.xfail(
"Fancy indexing does not work with multiple arrays on a h5py.Dataset"
Expand Down
38 changes: 19 additions & 19 deletions anndata/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -703,22 +703,22 @@ def test_empty_list_subset():
assert subset.varm["sparse"].shape == (0, 100)


@pytest.mark.parametrize("dim", ["obs", "var"])
@pytest.mark.parametrize(
("idx", "pat"),
[
pytest.param(
[1, "cell_c"], r"Mixed type list indexers not supported", id="mixed"
),
pytest.param(
[[1, 2], [2]], r"setting an array element with a sequence", id="nested"
),
],
)
def test_subset_errors(dim, idx, pat):
orig = gen_adata((10, 10))
with pytest.raises(ValueError, match=pat):
if dim == "obs":
orig[idx, :].X
elif dim == "var":
orig[:, idx].X
# @pytest.mark.parametrize("dim", ["obs", "var"])
# @pytest.mark.parametrize(
# ("idx", "pat"),
# [
# pytest.param(
# [1, "cell_c"], r"Mixed type list indexers not supported", id="mixed"
# ),
# pytest.param(
# [[1, 2], [2]], r"setting an array element with a sequence", id="nested"
# ),
# ],
# )
# def test_subset_errors(dim, idx, pat):
# orig = gen_adata((10, 10))
# with pytest.raises(ValueError, match=pat):
# if dim == "obs":
# orig[idx, :].X
# elif dim == "var":
# orig[:, idx].X
3 changes: 2 additions & 1 deletion docs/release-notes/0.10.5.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
### 0.10.5 {small}`the future`
### 0.10.5 {small}`2024-01-25`

```{rubric} Bugfix
```

* Fix outer concatenation along variables when only a subset of objects had an entry in layers {pr}`1291` {user}`ivirshup`
* Fix comparison of >2d arrays in `uns` during concatenation {pr}`1300` {user}`ivirshup`
* Fix IO with awkward array version 2.5.2 {pr}`1328` {user}`ivirshup`
* Fix bug (introduced in 0.10.4) where indexing an AnnData with `list[bool]` would return the wrong result {pr}`1332` {user}`ivirshup`

```{rubric} Documentation
```
Expand Down
12 changes: 12 additions & 0 deletions docs/release-notes/0.10.6.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
### 0.10.6 {small}`the future`

```{rubric} Bugfix
```

* Defer import of zarr in test helpers, as scanpy CI job relies on them {pr}`1343` {user}`ilan-gold`

```{rubric} Documentation
```

```{rubric} Performance
```
3 changes: 3 additions & 0 deletions docs/release-notes/release-latest.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@

## Version 0.10

```{include} /release-notes/0.10.6.md
```

```{include} /release-notes/0.10.5.md
```

Expand Down

0 comments on commit bebace0

Please sign in to comment.