Skip to content

Commit

Permalink
Merge branch 'main' into mindeps
Browse files Browse the repository at this point in the history
  • Loading branch information
ivirshup committed Jan 16, 2024
2 parents b3cf020 + 73dabaa commit 58888e8
Show file tree
Hide file tree
Showing 55 changed files with 1,254 additions and 477 deletions.
9 changes: 7 additions & 2 deletions .azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,17 @@ jobs:
- script: |
pytest
displayName: "PyTest"
condition: eq(variables['RUN_COVERAGE'], 'no')
condition: and(eq(variables['RUN_COVERAGE'], 'no'), eq(variables['PRERELEASE_DEPENDENCIES'], 'no'))
- script: |
pytest --cov --cov-report=xml --cov-context=test
displayName: "PyTest (coverage)"
condition: eq(variables['RUN_COVERAGE'], 'yes')
condition: and(eq(variables['RUN_COVERAGE'], 'yes'), eq(variables['PRERELEASE_DEPENDENCIES'], 'no'))
- script: |
pytest --strict-warnings
displayName: "PyTest (treat warnings as errors)"
condition: and(eq(variables['RUN_COVERAGE'], 'no'), eq(variables['PRERELEASE_DEPENDENCIES'], 'yes'))
- task: PublishCodeCoverageResults@1
inputs:
Expand Down
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/bug-report.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,6 @@ body:
```python
>>> import anndata, session_info; session_info.show(html=False, dependencies=True)
```
render: python
render: python
validations:
required: true
5 changes: 4 additions & 1 deletion .github/ISSUE_TEMPLATE/config.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
blank_issues_enabled: false
blank_issues_enabled: true
contact_links:
- name: Scverse Community Forum
url: https://discourse.scverse.org/
about: If you have questions about “How to do X”, please ask them here.
- name: Blank issue
url: https://github.com/scverse/anndata/issues/new
about: For things that don't quite fit elsewhere. Please note that other templates should be used in most cases – this is mainly for use by the developers.
21 changes: 21 additions & 0 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: Publish Python Package

on:
release:
types: [published]

jobs:
publish:
runs-on: ubuntu-latest
environment: pypi
permissions:
id-token: write # to authenticate as Trusted Publisher to pypi.org
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
with:
python-version: "3.x"
cache: "pip"
- run: pip install build
- run: python -m build
- uses: pypa/gh-action-pypi-publish@release/v1
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Caches for compiled and downloaded files
__pycache__/
/*cache/
/node_modules/
/data/

# Distribution / packaging
Expand Down
18 changes: 9 additions & 9 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
repos:
- repo: https://github.com/psf/black
rev: 23.9.1
hooks:
- id: black
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: "v0.0.292"
rev: v0.1.13
hooks:
- id: ruff
types_or: [python, pyi, jupyter]
args: ["--fix"]
- id: ruff-format
types_or: [python, pyi, jupyter]
- repo: https://github.com/pre-commit/mirrors-prettier
rev: v3.0.3
rev: v4.0.0-alpha.8
hooks:
- id: prettier
exclude_types:
- markdown
language_version: 21.5.0
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.5.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
Expand All @@ -26,7 +27,6 @@ repos:
- id: detect-private-key
- id: no-commit-to-branch
args: ["--branch=main"]

- repo: https://github.com/codespell-project/codespell
rev: v2.2.6
hooks:
Expand Down
60 changes: 30 additions & 30 deletions anndata/_core/aligned_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from typing import (
TYPE_CHECKING,
ClassVar,
Literal,
TypeVar,
Union,
)
Expand All @@ -19,7 +20,7 @@
from anndata._warnings import ExperimentalFeatureWarning, ImplicitModificationWarning
from anndata.compat import AwkArray

from ..utils import deprecated, dim_len, ensure_df_homogeneous
from ..utils import deprecated, dim_len, ensure_df_homogeneous, warn_once
from .access import ElementRef
from .index import _subset
from .views import as_view, view_update
Expand Down Expand Up @@ -61,35 +62,31 @@ def _ipython_key_completions_(self) -> list[str]:
def _validate_value(self, val: V, key: str) -> V:
"""Raises an error if value is invalid"""
if isinstance(val, AwkArray):
warnings.warn(
warn_once(
"Support for Awkward Arrays is currently experimental. "
"Behavior may change in the future. Please report any issues you may encounter!",
ExperimentalFeatureWarning,
# stacklevel=3,
)
# Prevent from showing up every time an awkward array is used
# You'd think `once` works, but it doesn't at the repl and in notebooks
warnings.filterwarnings(
"ignore",
category=ExperimentalFeatureWarning,
message="Support for Awkward Arrays is currently experimental.*",
)
for i, axis in enumerate(self.axes):
if self.parent.shape[axis] != dim_len(val, i):
right_shape = tuple(self.parent.shape[a] for a in self.axes)
actual_shape = tuple(dim_len(val, a) for a, _ in enumerate(self.axes))
if actual_shape[i] is None and isinstance(val, AwkArray):
raise ValueError(
f"The AwkwardArray is of variable length in dimension {i}.",
f"Try ak.to_regular(array, {i}) before including the array in AnnData",
)
else:
raise ValueError(
f"Value passed for key {key!r} is of incorrect shape. "
f"Values of {self.attrname} must match dimensions "
f"{self.axes} of parent. Value had shape {actual_shape} while "
f"it should have had {right_shape}."
)
if self.parent.shape[axis] == dim_len(val, i):
continue
right_shape = tuple(self.parent.shape[a] for a in self.axes)
actual_shape = tuple(dim_len(val, a) for a, _ in enumerate(self.axes))
if actual_shape[i] is None and isinstance(val, AwkArray):
dim = ("obs", "var")[i]
msg = (
f"The AwkwardArray is of variable length in dimension {dim}.",
f"Try ak.to_regular(array, {i}) before including the array in AnnData",
)
else:
dims = tuple(("obs", "var")[ax] for ax in self.axes)
msg = (
f"Value passed for key {key!r} is of incorrect shape. "
f"Values of {self.attrname} must match dimensions {dims} of parent. "
f"Value had shape {actual_shape} while it should have had {right_shape}."
)
raise ValueError(msg)

if not self._allow_df and isinstance(val, pd.DataFrame):
name = self.attrname.title().rstrip("s")
Expand All @@ -104,7 +101,7 @@ def attrname(self) -> str:

@property
@abstractmethod
def axes(self) -> tuple[int, ...]:
def axes(self) -> tuple[Literal[0, 1], ...]:
"""Which axes of the parent is this aligned to?"""
pass

Expand All @@ -131,7 +128,7 @@ def _view(self, parent: AnnData, subset_idx: I):
"""Returns a subset copy-on-write view of the object."""
return self._view_class(self, parent, subset_idx)

@deprecated("dict(obj)")
@deprecated("dict(obj)", FutureWarning)
def as_dict(self) -> dict:
return dict(self)

Expand Down Expand Up @@ -166,7 +163,10 @@ def __setitem__(self, key: str, value: V):
new_mapping[key] = value

def __delitem__(self, key: str):
_ = key in self # Make sure it exists before bothering with a copy
if key not in self:
raise KeyError(
"'{key!r}' not found in view of {self.attrname}"
) # Make sure it exists before bothering with a copy
warnings.warn(
f"Removing element `.{self.attrname}['{key}']` of view, "
"initializing view as actual.",
Expand Down Expand Up @@ -226,7 +226,7 @@ def attrname(self) -> str:
return f"{self.dim}m"

@property
def axes(self) -> tuple[int]:
def axes(self) -> tuple[Literal[0, 1]]:
"""Axes of the parent this is aligned to"""
return (self._axis,)

Expand Down Expand Up @@ -260,7 +260,7 @@ def _validate_value(self, val: V, key: str) -> V:
try:
pd.testing.assert_index_equal(val.index, self.dim_names)
except AssertionError as e:
msg = f"value.index does not match parent’s axis {self.axes[0]} names:\n{e}"
msg = f"value.index does not match parent’s {self.dim} names:\n{e}"
raise ValueError(msg) from None
else:
msg = "Index.equals and pd.testing.assert_index_equal disagree"
Expand Down Expand Up @@ -361,7 +361,7 @@ def attrname(self) -> str:
return f"{self.dim}p"

@property
def axes(self) -> tuple[int, int]:
def axes(self) -> tuple[Literal[0], Literal[0]] | tuple[Literal[1], Literal[1]]:
"""Axes of the parent this is aligned to"""
return self._axis, self._axis

Expand Down
68 changes: 38 additions & 30 deletions anndata/_core/anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from numpy import ma
from pandas.api.types import infer_dtype, is_string_dtype
from scipy import sparse
from scipy.sparse import csr_matrix, issparse
from scipy.sparse import issparse

from anndata._warnings import ImplicitModificationWarning

Expand All @@ -39,7 +39,7 @@
_move_adj_mtx,
)
from ..logging import anndata_logger as logger
from ..utils import convert_to_dict, dim_len, ensure_df_homogeneous
from ..utils import convert_to_dict, deprecated, dim_len, ensure_df_homogeneous
from .access import ElementRef
from .aligned_mapping import (
AxisArrays,
Expand Down Expand Up @@ -74,7 +74,7 @@ class StorageType(Enum):
DaskArray = DaskArray
CupyArray = CupyArray
CupySparseMatrix = CupySparseMatrix
BackedSparseMAtrix = BaseCompressedSparseDataset
BackedSparseMatrix = BaseCompressedSparseDataset

@classmethod
def classes(cls):
Expand Down Expand Up @@ -592,28 +592,37 @@ def _init_as_actual(
# layers
self._layers = Layers(self, layers)

def __sizeof__(self, show_stratified=None) -> int:
def get_size(X):
if issparse(X):
X_csr = csr_matrix(X)
return X_csr.data.nbytes + X_csr.indptr.nbytes + X_csr.indices.nbytes
def __sizeof__(self, show_stratified=None, with_disk: bool = False) -> int:
def get_size(X) -> int:
def cs_to_bytes(X) -> int:
return int(X.data.nbytes + X.indptr.nbytes + X.indices.nbytes)

if isinstance(X, h5py.Dataset) and with_disk:
return int(np.array(X.shape).prod() * X.dtype.itemsize)
elif isinstance(X, BaseCompressedSparseDataset) and with_disk:
return cs_to_bytes(X._to_backed())
elif isinstance(X, (sparse.csr_matrix, sparse.csc_matrix)):
return cs_to_bytes(X)
else:
return X.__sizeof__()

size = 0
attrs = list(["_X", "_obs", "_var"])
attrs_multi = list(["_uns", "_obsm", "_varm", "varp", "_obsp", "_layers"])
sizes = {}
attrs = ["X", "_obs", "_var"]
attrs_multi = ["_uns", "_obsm", "_varm", "varp", "_obsp", "_layers"]
for attr in attrs + attrs_multi:
if attr in attrs_multi:
keys = getattr(self, attr).keys()
s = sum([get_size(getattr(self, attr)[k]) for k in keys])
s = sum(get_size(getattr(self, attr)[k]) for k in keys)
else:
s = get_size(getattr(self, attr))
if s > 0 and show_stratified:
str_attr = attr.replace("_", ".") + " " * (7 - len(attr))
print(f"Size of {str_attr}: {'%3.2f' % (s / (1024 ** 2))} MB")
size += s
return size
from tqdm import tqdm

print(
f"Size of {attr.replace('_', '.'):<7}: {tqdm.format_sizeof(s, 'B')}"
)
sizes[attr] = s
return sum(sizes.values())

def _gen_repr(self, n_obs, n_vars) -> str:
if self.isbacked:
Expand Down Expand Up @@ -875,23 +884,21 @@ def _prep_dim_index(self, value, attr: str) -> pd.Index:
value = pd.Index(value)
if not isinstance(value.name, (str, type(None))):
value.name = None
# fmt: off
if (
not isinstance(value, pd.RangeIndex)
len(value) > 0
and not isinstance(value, pd.RangeIndex)
and infer_dtype(value) not in ("string", "bytes")
):
sample = list(value[: min(len(value), 5)])
warnings.warn(dedent(
msg = dedent(
f"""
AnnData expects .{attr}.index to contain strings, but got values like:
{sample}
Inferred to be: {infer_dtype(value)}
"""
), # noqa
stacklevel=2,
)
# fmt: on
warnings.warn(msg, stacklevel=2)
return value

def _set_dim_index(self, value: pd.Index, attr: str):
Expand Down Expand Up @@ -1303,6 +1310,7 @@ def _inplace_subset_var(self, index: Index1D):
Same as `adata = adata[:, index]`, but inplace.
"""
adata_subset = self[:, index].copy()

self._init_as_actual(adata_subset)

def _inplace_subset_obs(self, index: Index1D):
Expand All @@ -1312,6 +1320,7 @@ def _inplace_subset_obs(self, index: Index1D):
Same as `adata = adata[index, :]`, but inplace.
"""
adata_subset = self[index].copy()

self._init_as_actual(adata_subset)

# TODO: Update, possibly remove
Expand Down Expand Up @@ -1597,6 +1606,13 @@ def copy(self, filename: PathLike | None = None) -> AnnData:
write_h5ad(filename, self)
return read_h5ad(filename, backed=mode)

@deprecated(
"anndata.concat",
FutureWarning,
"See the tutorial for concat at: "
"https://anndata.readthedocs.io/en/latest/concatenation.html",
hide=False,
)
def concatenate(
self,
*adatas: AnnData,
Expand Down Expand Up @@ -1820,14 +1836,6 @@ def concatenate(
"""
from .merge import concat, merge_dataframes, merge_outer, merge_same

warnings.warn(
"The AnnData.concatenate method is deprecated in favour of the "
"anndata.concat function. Please use anndata.concat instead.\n\n"
"See the tutorial for concat at: "
"https://anndata.readthedocs.io/en/latest/concatenation.html",
FutureWarning,
)

if self.isbacked:
raise ValueError("Currently, concatenate only works in memory mode.")

Expand Down
Loading

0 comments on commit 58888e8

Please sign in to comment.