scverse · flying-sheep · Oct 24, 2023 · Oct 10, 2023 · Oct 10, 2023 · Oct 10, 2023
diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml
@@ -55,12 +55,29 @@ jobs:
       - script: |
           pytest
         displayName: "PyTest"
-        condition: eq(variables['RUN_COVERAGE'], 'no')
+        condition: and(eq(variables['RUN_COVERAGE'], 'no'), eq(variables['PRERELEASE_DEPENDENCIES'], 'no'))
 
       - script: |
           pytest --cov --cov-report=xml --cov-context=test
         displayName: "PyTest (coverage)"
-        condition: eq(variables['RUN_COVERAGE'], 'yes')
+        condition: and(eq(variables['RUN_COVERAGE'], 'yes'), eq(variables['PRERELEASE_DEPENDENCIES'], 'no'))
+
+      # TODO: fix all the exceptions here
+      # TODO: Centralize, see https://github.com/scverse/anndata/issues/1204
+      - script: >
+          pytest
+          -W error
+          -W 'ignore:Support for Awkward Arrays is currently experimental'
+          -W 'ignore:Outer joins on awkward.Arrays'
+          -W 'default:Setting element:UserWarning'
+          -W 'default:Trying to modify attribute:UserWarning'
+          -W 'default:Transforming to str index:UserWarning'
+          -W 'default:Observation names are not unique. To make them unique:UserWarning'
+          -W 'default:Variable names are not unique. To make them unique:UserWarning'
+          -W 'default::scipy.sparse._base.SparseEfficiencyWarning'
+          -W 'default::dask.array.core.PerformanceWarning'
+        displayName: "PyTest (treat warnings as errors)"
+        condition: and(eq(variables['RUN_COVERAGE'], 'no'), eq(variables['PRERELEASE_DEPENDENCIES'], 'yes'))
 
       - task: PublishCodeCoverageResults@1
         inputs:

diff --git a/anndata/_core/aligned_mapping.py b/anndata/_core/aligned_mapping.py
@@ -19,7 +19,7 @@
 from anndata._warnings import ExperimentalFeatureWarning, ImplicitModificationWarning
 from anndata.compat import AwkArray
 
-from ..utils import deprecated, dim_len, ensure_df_homogeneous
+from ..utils import deprecated, dim_len, ensure_df_homogeneous, warn_once
 from .access import ElementRef
 from .index import _subset
 from .views import as_view, view_update
@@ -61,19 +61,12 @@ def _ipython_key_completions_(self) -> list[str]:
     def _validate_value(self, val: V, key: str) -> V:
         """Raises an error if value is invalid"""
         if isinstance(val, AwkArray):
-            warnings.warn(
+            warn_once(
                 "Support for Awkward Arrays is currently experimental. "
                 "Behavior may change in the future. Please report any issues you may encounter!",
                 ExperimentalFeatureWarning,
                 # stacklevel=3,
             )
-            # Prevent from showing up every time an awkward array is used
-            # You'd think `once` works, but it doesn't at the repl and in notebooks
-            warnings.filterwarnings(
-                "ignore",
-                category=ExperimentalFeatureWarning,
-                message="Support for Awkward Arrays is currently experimental.*",
-            )
         for i, axis in enumerate(self.axes):
             if self.parent.shape[axis] != dim_len(val, i):
                 right_shape = tuple(self.parent.shape[a] for a in self.axes)
@@ -131,7 +124,7 @@ def _view(self, parent: AnnData, subset_idx: I):
         """Returns a subset copy-on-write view of the object."""
         return self._view_class(self, parent, subset_idx)
 
-    @deprecated("dict(obj)")
+    @deprecated("dict(obj)", FutureWarning)
     def as_dict(self) -> dict:
         return dict(self)
 

diff --git a/anndata/_core/anndata.py b/anndata/_core/anndata.py
@@ -39,7 +39,7 @@
     _move_adj_mtx,
 )
 from ..logging import anndata_logger as logger
-from ..utils import convert_to_dict, dim_len, ensure_df_homogeneous
+from ..utils import convert_to_dict, deprecated, dim_len, ensure_df_homogeneous
 from .access import ElementRef
 from .aligned_mapping import (
     AxisArrays,
@@ -875,23 +875,21 @@ def _prep_dim_index(self, value, attr: str) -> pd.Index:
             value = pd.Index(value)
             if not isinstance(value.name, (str, type(None))):
                 value.name = None
-        # fmt: off
         if (
-            not isinstance(value, pd.RangeIndex)
+            len(value) > 0
+            and not isinstance(value, pd.RangeIndex)
             and infer_dtype(value) not in ("string", "bytes")
         ):
             sample = list(value[: min(len(value), 5)])
-            warnings.warn(dedent(
+            msg = dedent(
                 f"""
                 AnnData expects .{attr}.index to contain strings, but got values like:
                     {sample}
 
                     Inferred to be: {infer_dtype(value)}
                 """
-                ), # noqa
-                stacklevel=2,
             )
-        # fmt: on
+            warnings.warn(msg, stacklevel=2)
         return value
 
     def _set_dim_index(self, value: pd.Index, attr: str):
@@ -1303,6 +1301,7 @@ def _inplace_subset_var(self, index: Index1D):
         Same as `adata = adata[:, index]`, but inplace.
         """
         adata_subset = self[:, index].copy()
+
         self._init_as_actual(adata_subset)
 
     def _inplace_subset_obs(self, index: Index1D):
@@ -1312,6 +1311,7 @@ def _inplace_subset_obs(self, index: Index1D):
         Same as `adata = adata[index, :]`, but inplace.
         """
         adata_subset = self[index].copy()
+
         self._init_as_actual(adata_subset)
 
     # TODO: Update, possibly remove
@@ -1597,6 +1597,13 @@ def copy(self, filename: PathLike | None = None) -> AnnData:
             write_h5ad(filename, self)
             return read_h5ad(filename, backed=mode)
 
+    @deprecated(
+        "anndata.concat",
+        FutureWarning,
+        "See the tutorial for concat at: "
+        "https://anndata.readthedocs.io/en/latest/concatenation.html",
+        hide=False,
+    )
     def concatenate(
         self,
         *adatas: AnnData,
@@ -1820,14 +1827,6 @@ def concatenate(
         """
         from .merge import concat, merge_dataframes, merge_outer, merge_same
 
-        warnings.warn(
-            "The AnnData.concatenate method is deprecated in favour of the "
-            "anndata.concat function. Please use anndata.concat instead.\n\n"
-            "See the tutorial for concat at: "
-            "https://anndata.readthedocs.io/en/latest/concatenation.html",
-            FutureWarning,
-        )
-
         if self.isbacked:
             raise ValueError("Currently, concatenate only works in memory mode.")
 

diff --git a/anndata/_core/merge.py b/anndata/_core/merge.py
@@ -13,11 +13,11 @@
     MutableSet,
     Sequence,
 )
-from functools import reduce, singledispatch
+from functools import partial, reduce, singledispatch
 from itertools import repeat
 from operator import and_, or_, sub
 from typing import Any, Literal, TypeVar
-from warnings import filterwarnings, warn
+from warnings import warn
 
 import numpy as np
 import pandas as pd
@@ -35,7 +35,7 @@
     DaskArray,
     _map_cat_to_str,
 )
-from ..utils import asarray, dim_len
+from ..utils import asarray, dim_len, warn_once
 from .anndata import AnnData
 from .index import _subset, make_slice
 
@@ -219,6 +219,7 @@ def unify_dtypes(dfs: Iterable[pd.DataFrame]) -> list[pd.DataFrame]:
 
     For catching cases where pandas would convert to object dtype.
     """
+    dfs = list(dfs)
     # Get shared categorical columns
     df_dtypes = [dict(df.dtypes) for df in dfs]
     columns = reduce(lambda x, y: x.union(y), [df.columns for df in dfs])
@@ -752,9 +753,9 @@ def concat_arrays(arrays, reindexers, axis=0, index=None, fill_value=None):
             )
         # TODO: behaviour here should be chosen through a merge strategy
         df = pd.concat(
-            unify_dtypes([f(x) for f, x in zip(reindexers, arrays)]),
-            ignore_index=True,
+            unify_dtypes(f(x) for f, x in zip(reindexers, arrays)),
             axis=axis,
+            ignore_index=True,
         )
         df.index = index
         return df
@@ -819,7 +820,7 @@ def concat_arrays(arrays, reindexers, axis=0, index=None, fill_value=None):
         )
 
 
-def inner_concat_aligned_mapping(mappings, reindexers=None, index=None, axis=0):
+def inner_concat_aligned_mapping(mappings, *, reindexers=None, index=None, axis=0):
     result = {}
 
     for k in intersect_keys(mappings):
@@ -878,17 +879,12 @@ def gen_outer_reindexers(els, shapes, new_index: pd.Index, *, axis=0):
             raise NotImplementedError(
                 "Cannot concatenate an AwkwardArray with other array types."
             )
-        warn(
-            "Outer joins on awkward.Arrays will have different return values in the future."
+        warn_once(
+            "Outer joins on awkward.Arrays will have different return values in the future. "
             "For details, and to offer input, please see:\n\n\t"
             "https://github.com/scverse/anndata/issues/898",
             ExperimentalFeatureWarning,
         )
-        filterwarnings(
-            "ignore",
-            category=ExperimentalFeatureWarning,
-            message=r"Outer joins on awkward.Arrays will have different return values.*",
-        )
         # all_keys = union_keys(el.fields for el in els if not_missing(el))
         reindexers = []
         for el in els:
@@ -912,7 +908,7 @@ def gen_outer_reindexers(els, shapes, new_index: pd.Index, *, axis=0):
 
 
 def outer_concat_aligned_mapping(
-    mappings, reindexers=None, index=None, fill_value=None, axis=0
+    mappings, *, reindexers=None, index=None, axis=0, fill_value=None
 ):
     result = {}
     ns = [m.parent.shape[axis] for m in mappings]
@@ -1261,7 +1257,7 @@ def concat(
     # Annotation for concatenation axis
     check_combinable_cols([getattr(a, dim).columns for a in adatas], join=join)
     concat_annot = pd.concat(
-        unify_dtypes([getattr(a, dim) for a in adatas]),
+        unify_dtypes(getattr(a, dim) for a in adatas),
         join=join,
         ignore_index=True,
     )
@@ -1277,37 +1273,30 @@ def concat(
     X = concat_Xs(adatas, reindexers, axis=axis, fill_value=fill_value)
 
     if join == "inner":
-        layers = inner_concat_aligned_mapping(
-            [a.layers for a in adatas], axis=axis, reindexers=reindexers
-        )
-        concat_mapping = inner_concat_aligned_mapping(
-            [getattr(a, f"{dim}m") for a in adatas], index=concat_indices
-        )
-        if pairwise:
-            concat_pairwise = concat_pairwise_mapping(
-                mappings=[getattr(a, f"{dim}p") for a in adatas],
-                shapes=[a.shape[axis] for a in adatas],
-                join_keys=intersect_keys,
-            )
-        else:
-            concat_pairwise = {}
+        concat_aligned_mapping = inner_concat_aligned_mapping
+        join_keys = intersect_keys
     elif join == "outer":
-        layers = outer_concat_aligned_mapping(
-            [a.layers for a in adatas], reindexers, axis=axis, fill_value=fill_value
+        concat_aligned_mapping = partial(
+            outer_concat_aligned_mapping, fill_value=fill_value
         )
-        concat_mapping = outer_concat_aligned_mapping(
-            [getattr(a, f"{dim}m") for a in adatas],
-            index=concat_indices,
-            fill_value=fill_value,
+        join_keys = union_keys
+    else:
+        assert False, f"{join=} should have been validated above by pd.concat"
+
+    layers = concat_aligned_mapping(
+        [a.layers for a in adatas], axis=axis, reindexers=reindexers
+    )
+    concat_mapping = concat_aligned_mapping(
+        [getattr(a, f"{dim}m") for a in adatas], index=concat_indices
+    )
+    if pairwise:
+        concat_pairwise = concat_pairwise_mapping(
+            mappings=[getattr(a, f"{dim}p") for a in adatas],
+            shapes=[a.shape[axis] for a in adatas],
+            join_keys=join_keys,
         )
-        if pairwise:
-            concat_pairwise = concat_pairwise_mapping(
-                mappings=[getattr(a, f"{dim}p") for a in adatas],
-                shapes=[a.shape[axis] for a in adatas],
-                join_keys=union_keys,
-            )
-        else:
-            concat_pairwise = {}
+    else:
+        concat_pairwise = {}
 
     # TODO: Reindex lazily, so we don't have to make those copies until we're sure we need the element
     alt_mapping = merge(

diff --git a/anndata/_io/read.py b/anndata/_io/read.py
@@ -274,16 +274,16 @@ def read_loom(
         uns = {}
         if cleanup:
             uns_obs = {}
-            for key in list(obs.keys()):
-                if len(set(obs[key])) == 1:
-                    uns_obs[f"{key}"] = obs[key][0]
+            for key in obs.columns:
+                if len(obs[key].unique()) == 1:
+                    uns_obs[key] = obs[key].iloc[0]
                     del obs[key]
             if uns_obs:
                 uns["loom-obs"] = uns_obs
             uns_var = {}
-            for key in list(var.keys()):
-                if len(set(var[key])) == 1:
-                    uns_var[f"{key}"] = var[key][0]
+            for key in var.columns:
+                if len(var[key].unique()) == 1:
+                    uns_var[key] = var[key].iloc[0]
                     del var[key]
             if uns_var:
                 uns["loom-var"] = uns_var

diff --git a/anndata/_io/utils.py b/anndata/_io/utils.py
@@ -5,15 +5,15 @@
 from warnings import warn
 
 import h5py
-from packaging import version
+from packaging.version import Version
 
 from anndata.compat import H5Group, ZarrGroup, add_note
 
 from .._core.sparse_dataset import BaseCompressedSparseDataset
 
 # For allowing h5py v3
 # https://github.com/scverse/anndata/issues/442
-H5PY_V3 = version.parse(h5py.__version__).major >= 3
+H5PY_V3 = Version(h5py.__version__).major >= 3
 
 # -------------------------------------------------------------------------------
 # Type conversion

diff --git a/anndata/_io/zarr.py b/anndata/_io/zarr.py
@@ -12,16 +12,10 @@
 from anndata._warnings import OldFormatWarning
 
 from .._core.anndata import AnnData
-from ..compat import (
-    _clean_uns,
-    _from_fixed_length_strings,
-)
+from ..compat import _clean_uns, _from_fixed_length_strings
 from ..experimental import read_dispatched, write_dispatched
 from .specs import read_elem
-from .utils import (
-    _read_legacy_raw,
-    report_read_key_on_error,
-)
+from .utils import _read_legacy_raw, report_read_key_on_error
 
 if TYPE_CHECKING:
     from collections.abc import MutableMapping

diff --git a/anndata/compat/__init__.py b/anndata/compat/__init__.py
@@ -14,7 +14,7 @@
 import h5py
 import numpy as np
 import pandas as pd
-from packaging.version import parse as _parse_version
+from packaging.version import Version
 from scipy.sparse import issparse, spmatrix
 
 from .exceptiongroups import add_note  # noqa: F401
@@ -395,7 +395,7 @@ def _safe_transpose(x):
 
 
 def _map_cat_to_str(cat: pd.Categorical) -> pd.Categorical:
-    if _parse_version(pd.__version__) >= _parse_version("2.0"):
+    if Version(pd.__version__) >= Version("2.0"):
         # Argument added in pandas 2.0
         return cat.map(str, na_action="ignore")
     else:

diff --git a/anndata/experimental/merge.py b/anndata/experimental/merge.py
@@ -384,7 +384,7 @@ def _write_alt_annot(groups, output_group, alt_dim, alt_indices, merge):
 
 def _write_dim_annot(groups, output_group, dim, concat_indices, label, label_col, join):
     concat_annot = pd.concat(
-        unify_dtypes([read_elem(g[dim]) for g in groups]),
+        unify_dtypes(read_elem(g[dim]) for g in groups),
         join=join,
         ignore_index=True,
     )

diff --git a/anndata/experimental/multi_files/_anncollection.py b/anndata/experimental/multi_files/_anncollection.py
@@ -209,7 +209,7 @@ def __getitem__(self, key, use_convert=True):
             else:
                 if vidx is not None:
                     idx = np.ix_(*idx) if not isinstance(idx[1], slice) else idx
-                arrs.append(arr[idx])
+                arrs.append(arr.iloc[idx] if isinstance(arr, pd.Series) else arr[idx])
 
         if len(arrs) > 1:
             _arr = _merge(arrs)