Merge branch 'main' into mindeps

scverse · Jan 16, 2024 · 58888e8 · 58888e8
2 parents b3cf020 + 73dabaa
commit 58888e8
Show file tree

Hide file tree

Showing 55 changed files with 1,254 additions and 477 deletions.
diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml
@@ -55,12 +55,17 @@ jobs:
       - script: |
           pytest
         displayName: "PyTest"
-        condition: eq(variables['RUN_COVERAGE'], 'no')
+        condition: and(eq(variables['RUN_COVERAGE'], 'no'), eq(variables['PRERELEASE_DEPENDENCIES'], 'no'))
 
       - script: |
           pytest --cov --cov-report=xml --cov-context=test
         displayName: "PyTest (coverage)"
-        condition: eq(variables['RUN_COVERAGE'], 'yes')
+        condition: and(eq(variables['RUN_COVERAGE'], 'yes'), eq(variables['PRERELEASE_DEPENDENCIES'], 'no'))
+
+      - script: |
+          pytest --strict-warnings
+        displayName: "PyTest (treat warnings as errors)"
+        condition: and(eq(variables['RUN_COVERAGE'], 'no'), eq(variables['PRERELEASE_DEPENDENCIES'], 'yes'))
 
       - task: PublishCodeCoverageResults@1
         inputs:

diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -56,6 +56,6 @@ body:
         ```python
         >>> import anndata, session_info; session_info.show(html=False, dependencies=True)
         ```
-        render: python
+      render: python
     validations:
       required: true
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,5 +1,8 @@
-blank_issues_enabled: false
+blank_issues_enabled: true
 contact_links:
   - name: Scverse Community Forum
     url: https://discourse.scverse.org/
     about: If you have questions about “How to do X”, please ask them here.
+  - name: Blank issue
+    url: https://github.com/scverse/anndata/issues/new
+    about: For things that don't quite fit elsewhere. Please note that other templates should be used in most cases – this is mainly for use by the developers.
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -0,0 +1,21 @@
+name: Publish Python Package
+
+on:
+  release:
+    types: [published]
+
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+    environment: pypi
+    permissions:
+      id-token: write # to authenticate as Trusted Publisher to pypi.org
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.x"
+          cache: "pip"
+      - run: pip install build
+      - run: python -m build
+      - uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,7 @@
 # Caches for compiled and downloaded files
 __pycache__/
 /*cache/
+/node_modules/
 /data/
 
 # Distribution / packaging

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,20 +1,21 @@
 repos:
-  - repo: https://github.com/psf/black
-    rev: 23.9.1
-    hooks:
-      - id: black
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    # Ruff version.
-    rev: "v0.0.292"
+    rev: v0.1.13
     hooks:
       - id: ruff
+        types_or: [python, pyi, jupyter]
         args: ["--fix"]
+      - id: ruff-format
+        types_or: [python, pyi, jupyter]
   - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: v3.0.3
+    rev: v4.0.0-alpha.8
     hooks:
       - id: prettier
+        exclude_types:
+          - markdown
+        language_version: 21.5.0
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v4.5.0
     hooks:
       - id: trailing-whitespace
       - id: end-of-file-fixer
@@ -26,7 +27,6 @@ repos:
       - id: detect-private-key
       - id: no-commit-to-branch
         args: ["--branch=main"]
-
   - repo: https://github.com/codespell-project/codespell
     rev: v2.2.6
     hooks:

diff --git a/anndata/_core/aligned_mapping.py b/anndata/_core/aligned_mapping.py
@@ -8,6 +8,7 @@
 from typing import (
     TYPE_CHECKING,
     ClassVar,
+    Literal,
     TypeVar,
     Union,
 )
@@ -19,7 +20,7 @@
 from anndata._warnings import ExperimentalFeatureWarning, ImplicitModificationWarning
 from anndata.compat import AwkArray
 
-from ..utils import deprecated, dim_len, ensure_df_homogeneous
+from ..utils import deprecated, dim_len, ensure_df_homogeneous, warn_once
 from .access import ElementRef
 from .index import _subset
 from .views import as_view, view_update
@@ -61,35 +62,31 @@ def _ipython_key_completions_(self) -> list[str]:
     def _validate_value(self, val: V, key: str) -> V:
         """Raises an error if value is invalid"""
         if isinstance(val, AwkArray):
-            warnings.warn(
+            warn_once(
                 "Support for Awkward Arrays is currently experimental. "
                 "Behavior may change in the future. Please report any issues you may encounter!",
                 ExperimentalFeatureWarning,
                 # stacklevel=3,
             )
-            # Prevent from showing up every time an awkward array is used
-            # You'd think `once` works, but it doesn't at the repl and in notebooks
-            warnings.filterwarnings(
-                "ignore",
-                category=ExperimentalFeatureWarning,
-                message="Support for Awkward Arrays is currently experimental.*",
-            )
         for i, axis in enumerate(self.axes):
-            if self.parent.shape[axis] != dim_len(val, i):
-                right_shape = tuple(self.parent.shape[a] for a in self.axes)
-                actual_shape = tuple(dim_len(val, a) for a, _ in enumerate(self.axes))
-                if actual_shape[i] is None and isinstance(val, AwkArray):
-                    raise ValueError(
-                        f"The AwkwardArray is of variable length in dimension {i}.",
-                        f"Try ak.to_regular(array, {i}) before including the array in AnnData",
-                    )
-                else:
-                    raise ValueError(
-                        f"Value passed for key {key!r} is of incorrect shape. "
-                        f"Values of {self.attrname} must match dimensions "
-                        f"{self.axes} of parent. Value had shape {actual_shape} while "
-                        f"it should have had {right_shape}."
-                    )
+            if self.parent.shape[axis] == dim_len(val, i):
+                continue
+            right_shape = tuple(self.parent.shape[a] for a in self.axes)
+            actual_shape = tuple(dim_len(val, a) for a, _ in enumerate(self.axes))
+            if actual_shape[i] is None and isinstance(val, AwkArray):
+                dim = ("obs", "var")[i]
+                msg = (
+                    f"The AwkwardArray is of variable length in dimension {dim}.",
+                    f"Try ak.to_regular(array, {i}) before including the array in AnnData",
+                )
+            else:
+                dims = tuple(("obs", "var")[ax] for ax in self.axes)
+                msg = (
+                    f"Value passed for key {key!r} is of incorrect shape. "
+                    f"Values of {self.attrname} must match dimensions {dims} of parent. "
+                    f"Value had shape {actual_shape} while it should have had {right_shape}."
+                )
+            raise ValueError(msg)
 
         if not self._allow_df and isinstance(val, pd.DataFrame):
             name = self.attrname.title().rstrip("s")
@@ -104,7 +101,7 @@ def attrname(self) -> str:
 
     @property
     @abstractmethod
-    def axes(self) -> tuple[int, ...]:
+    def axes(self) -> tuple[Literal[0, 1], ...]:
         """Which axes of the parent is this aligned to?"""
         pass
 
@@ -131,7 +128,7 @@ def _view(self, parent: AnnData, subset_idx: I):
         """Returns a subset copy-on-write view of the object."""
         return self._view_class(self, parent, subset_idx)
 
-    @deprecated("dict(obj)")
+    @deprecated("dict(obj)", FutureWarning)
     def as_dict(self) -> dict:
         return dict(self)
 
@@ -166,7 +163,10 @@ def __setitem__(self, key: str, value: V):
             new_mapping[key] = value
 
     def __delitem__(self, key: str):
-        _ = key in self  # Make sure it exists before bothering with a copy
+        if key not in self:
+            raise KeyError(
+                "'{key!r}' not found in view of {self.attrname}"
+            )  # Make sure it exists before bothering with a copy
         warnings.warn(
             f"Removing element `.{self.attrname}['{key}']` of view, "
             "initializing view as actual.",
@@ -226,7 +226,7 @@ def attrname(self) -> str:
         return f"{self.dim}m"
 
     @property
-    def axes(self) -> tuple[int]:
+    def axes(self) -> tuple[Literal[0, 1]]:
         """Axes of the parent this is aligned to"""
         return (self._axis,)
 
@@ -260,7 +260,7 @@ def _validate_value(self, val: V, key: str) -> V:
             try:
                 pd.testing.assert_index_equal(val.index, self.dim_names)
             except AssertionError as e:
-                msg = f"value.index does not match parent’s axis {self.axes[0]} names:\n{e}"
+                msg = f"value.index does not match parent’s {self.dim} names:\n{e}"
                 raise ValueError(msg) from None
             else:
                 msg = "Index.equals and pd.testing.assert_index_equal disagree"
@@ -361,7 +361,7 @@ def attrname(self) -> str:
         return f"{self.dim}p"
 
     @property
-    def axes(self) -> tuple[int, int]:
+    def axes(self) -> tuple[Literal[0], Literal[0]] | tuple[Literal[1], Literal[1]]:
         """Axes of the parent this is aligned to"""
         return self._axis, self._axis
 

diff --git a/anndata/_core/anndata.py b/anndata/_core/anndata.py
@@ -25,7 +25,7 @@
 from numpy import ma
 from pandas.api.types import infer_dtype, is_string_dtype
 from scipy import sparse
-from scipy.sparse import csr_matrix, issparse
+from scipy.sparse import issparse
 
 from anndata._warnings import ImplicitModificationWarning
 
@@ -39,7 +39,7 @@
     _move_adj_mtx,
 )
 from ..logging import anndata_logger as logger
-from ..utils import convert_to_dict, dim_len, ensure_df_homogeneous
+from ..utils import convert_to_dict, deprecated, dim_len, ensure_df_homogeneous
 from .access import ElementRef
 from .aligned_mapping import (
     AxisArrays,
@@ -74,7 +74,7 @@ class StorageType(Enum):
     DaskArray = DaskArray
     CupyArray = CupyArray
     CupySparseMatrix = CupySparseMatrix
-    BackedSparseMAtrix = BaseCompressedSparseDataset
+    BackedSparseMatrix = BaseCompressedSparseDataset
 
     @classmethod
     def classes(cls):
@@ -592,28 +592,37 @@ def _init_as_actual(
         # layers
         self._layers = Layers(self, layers)
 
-    def __sizeof__(self, show_stratified=None) -> int:
-        def get_size(X):
-            if issparse(X):
-                X_csr = csr_matrix(X)
-                return X_csr.data.nbytes + X_csr.indptr.nbytes + X_csr.indices.nbytes
+    def __sizeof__(self, show_stratified=None, with_disk: bool = False) -> int:
+        def get_size(X) -> int:
+            def cs_to_bytes(X) -> int:
+                return int(X.data.nbytes + X.indptr.nbytes + X.indices.nbytes)
+
+            if isinstance(X, h5py.Dataset) and with_disk:
+                return int(np.array(X.shape).prod() * X.dtype.itemsize)
+            elif isinstance(X, BaseCompressedSparseDataset) and with_disk:
+                return cs_to_bytes(X._to_backed())
+            elif isinstance(X, (sparse.csr_matrix, sparse.csc_matrix)):
+                return cs_to_bytes(X)
             else:
                 return X.__sizeof__()
 
-        size = 0
-        attrs = list(["_X", "_obs", "_var"])
-        attrs_multi = list(["_uns", "_obsm", "_varm", "varp", "_obsp", "_layers"])
+        sizes = {}
+        attrs = ["X", "_obs", "_var"]
+        attrs_multi = ["_uns", "_obsm", "_varm", "varp", "_obsp", "_layers"]
         for attr in attrs + attrs_multi:
             if attr in attrs_multi:
                 keys = getattr(self, attr).keys()
-                s = sum([get_size(getattr(self, attr)[k]) for k in keys])
+                s = sum(get_size(getattr(self, attr)[k]) for k in keys)
             else:
                 s = get_size(getattr(self, attr))
             if s > 0 and show_stratified:
-                str_attr = attr.replace("_", ".") + " " * (7 - len(attr))
-                print(f"Size of {str_attr}: {'%3.2f' % (s / (1024 ** 2))} MB")
-            size += s
-        return size
+                from tqdm import tqdm
+
+                print(
+                    f"Size of {attr.replace('_', '.'):<7}: {tqdm.format_sizeof(s, 'B')}"
+                )
+            sizes[attr] = s
+        return sum(sizes.values())
 
     def _gen_repr(self, n_obs, n_vars) -> str:
         if self.isbacked:
@@ -875,23 +884,21 @@ def _prep_dim_index(self, value, attr: str) -> pd.Index:
             value = pd.Index(value)
             if not isinstance(value.name, (str, type(None))):
                 value.name = None
-        # fmt: off
         if (
-            not isinstance(value, pd.RangeIndex)
+            len(value) > 0
+            and not isinstance(value, pd.RangeIndex)
             and infer_dtype(value) not in ("string", "bytes")
         ):
             sample = list(value[: min(len(value), 5)])
-            warnings.warn(dedent(
+            msg = dedent(
                 f"""
                 AnnData expects .{attr}.index to contain strings, but got values like:
                     {sample}
 
                     Inferred to be: {infer_dtype(value)}
                 """
-                ), # noqa
-                stacklevel=2,
             )
-        # fmt: on
+            warnings.warn(msg, stacklevel=2)
         return value
 
     def _set_dim_index(self, value: pd.Index, attr: str):
@@ -1303,6 +1310,7 @@ def _inplace_subset_var(self, index: Index1D):
         Same as `adata = adata[:, index]`, but inplace.
         """
         adata_subset = self[:, index].copy()
+
         self._init_as_actual(adata_subset)
 
     def _inplace_subset_obs(self, index: Index1D):
@@ -1312,6 +1320,7 @@ def _inplace_subset_obs(self, index: Index1D):
         Same as `adata = adata[index, :]`, but inplace.
         """
         adata_subset = self[index].copy()
+
         self._init_as_actual(adata_subset)
 
     # TODO: Update, possibly remove
@@ -1597,6 +1606,13 @@ def copy(self, filename: PathLike | None = None) -> AnnData:
             write_h5ad(filename, self)
             return read_h5ad(filename, backed=mode)
 
+    @deprecated(
+        "anndata.concat",
+        FutureWarning,
+        "See the tutorial for concat at: "
+        "https://anndata.readthedocs.io/en/latest/concatenation.html",
+        hide=False,
+    )
     def concatenate(
         self,
         *adatas: AnnData,
@@ -1820,14 +1836,6 @@ def concatenate(
         """
         from .merge import concat, merge_dataframes, merge_outer, merge_same
 
-        warnings.warn(
-            "The AnnData.concatenate method is deprecated in favour of the "
-            "anndata.concat function. Please use anndata.concat instead.\n\n"
-            "See the tutorial for concat at: "
-            "https://anndata.readthedocs.io/en/latest/concatenation.html",
-            FutureWarning,
-        )
-
         if self.isbacked:
             raise ValueError("Currently, concatenate only works in memory mode.")