diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index 9b7cac7353..71637016a7 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -1,8 +1,8 @@ name: Bug report description: Scanpy doesn’t do what it should? Please help us fix it! #title: ... +type: Bug labels: -- Bug 🐛 - Triage 🩺 #assignees: [] body: diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 1505f196f5..a0c4b12e00 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,4 +1,4 @@ -blank_issues_enabled: true +blank_issues_enabled: false contact_links: - name: Scanpy Community Forum url: https://discourse.scverse.org/ diff --git a/.github/ISSUE_TEMPLATE/enhancement-request.yml b/.github/ISSUE_TEMPLATE/enhancement-request.yml index 209ee6805a..9e511c592c 100644 --- a/.github/ISSUE_TEMPLATE/enhancement-request.yml +++ b/.github/ISSUE_TEMPLATE/enhancement-request.yml @@ -1,8 +1,8 @@ name: Enhancement request description: Anything you’d like to see in scanpy? #title: ... +type: Enhancement labels: -- Enhancement ✨ - Triage 🩺 #assignees: [] body: @@ -14,6 +14,7 @@ body: - 'Additional function parameters / changed functionality / changed defaults?' - 'New analysis tool: A simple analysis tool you have been using and are missing in `sc.tools`?' - 'New plotting function: A kind of plot you would like to seein `sc.pl`?' + - 'Improved documentation or error message?' - 'Other?' validations: required: true diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c8088c28f3..6c91285096 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.4 + rev: v0.8.2 hooks: - id: ruff types_or: [python, pyi, jupyter] diff --git a/ci/scripts/min-deps.py b/ci/scripts/min-deps.py index 18af6ce151..0d49d151ef 100755 --- a/ci/scripts/min-deps.py +++ b/ci/scripts/min-deps.py @@ -12,6 +12,7 @@ import sys from collections import deque from contextlib import ExitStack +from functools import cached_property from pathlib import Path from typing import TYPE_CHECKING @@ -25,6 +26,8 @@ if TYPE_CHECKING: from collections.abc import Generator, Iterable, Sequence + from collections.abc import Set as AbstractSet + from typing import Any, Self def min_dep(req: Requirement) -> Requirement: @@ -77,48 +80,86 @@ def extract_min_deps( class Args(argparse.Namespace): - path: Path + """\ + Parse a pyproject.toml file and output a list of minimum dependencies. + Output is optimized for `[uv] pip install` (see `-o`/`--output` for details). + """ + + _path: Path output: Path | None - extras: list[str] + _extras: list[str] + _all_extras: bool + + @classmethod + def parse(cls, argv: Sequence[str] | None = None) -> Self: + return cls.parser().parse_args(argv, cls()) + + @classmethod + def parser(cls) -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog="min-deps", + description=cls.__doc__, + usage="pip install `python min-deps.py pyproject.toml`", + ) + parser.add_argument( + "_path", + metavar="pyproject.toml", + type=Path, + help="Path to pyproject.toml to parse minimum dependencies from", + ) + parser.add_argument( + "--extras", + dest="_extras", + metavar="EXTRA", + type=str, + nargs="*", + default=(), + help="extras to install", + ) + parser.add_argument( + "--all-extras", + dest="_all_extras", + action="store_true", + help="get all extras", + ) + parser.add_argument( + *("--output", "-o"), + metavar="FILE", + type=Path, + default=None, + help=( + "output file (default: stdout). " + "Without this option, output is space-separated for direct passing to `pip install`. " + "With this option, output written to a file newline-separated file usable as `requirements.txt` or `constraints.txt`." + ), + ) + return parser + + @cached_property + def pyproject(self) -> dict[str, Any]: + return tomllib.loads(self._path.read_text()) + + @cached_property + def extras(self) -> AbstractSet[str]: + if self._extras: + if self._all_extras: + sys.exit("Cannot specify both --extras and --all-extras") + return dict.fromkeys(self._extras).keys() + if not self._all_extras: + return set() + return self.pyproject["project"]["optional-dependencies"].keys() def main(argv: Sequence[str] | None = None) -> None: - parser = argparse.ArgumentParser( - prog="min-deps", - description=( - "Parse a pyproject.toml file and output a list of minimum dependencies. " - "Output is optimized for `[uv] pip install` (see `-o`/`--output` for details)." - ), - usage="pip install `python min-deps.py pyproject.toml`", - ) - parser.add_argument( - "path", type=Path, help="pyproject.toml to parse minimum dependencies from" - ) - parser.add_argument( - "--extras", type=str, nargs="*", default=(), help="extras to install" - ) - parser.add_argument( - *("--output", "-o"), - type=Path, - default=None, - help=( - "output file (default: stdout). " - "Without this option, output is space-separated for direct passing to `pip install`. " - "With this option, output written to a file newline-separated file usable as `requirements.txt` or `constraints.txt`." - ), - ) - - args = parser.parse_args(argv, Args()) - - pyproject = tomllib.loads(args.path.read_text()) + args = Args.parse(argv) - project_name = pyproject["project"]["name"] + project_name = args.pyproject["project"]["name"] deps = [ - *map(Requirement, pyproject["project"]["dependencies"]), + *map(Requirement, args.pyproject["project"]["dependencies"]), *(Requirement(f"{project_name}[{extra}]") for extra in args.extras), ] - min_deps = extract_min_deps(deps, pyproject=pyproject) + min_deps = extract_min_deps(deps, pyproject=args.pyproject) sep = "\n" if args.output else " " with ExitStack() as stack: diff --git a/ci/scripts/towncrier_automation.py b/ci/scripts/towncrier_automation.py index c532883036..10a8b0c9dc 100755 --- a/ci/scripts/towncrier_automation.py +++ b/ci/scripts/towncrier_automation.py @@ -92,7 +92,11 @@ def main(argv: Sequence[str] | None = None) -> None: f"--base={base_branch}", f"--title={pr_title}", f"--body={pr_description}", - *(["--label=no milestone"] if base_branch == "main" else []), + *( + ["--label=no milestone", "--label=Development Process 🚀"] + if base_branch == "main" + else [] + ), *(["--dry-run"] if args.dry_run else []), ], check=True, diff --git a/docs/api/deprecated.md b/docs/api/deprecated.md index 4511f4b3a7..d09c1af405 100644 --- a/docs/api/deprecated.md +++ b/docs/api/deprecated.md @@ -11,4 +11,5 @@ pp.filter_genes_dispersion pp.normalize_per_cell + pp.subsample ``` diff --git a/docs/api/preprocessing.md b/docs/api/preprocessing.md index 4b17567a6b..36e732a6dc 100644 --- a/docs/api/preprocessing.md +++ b/docs/api/preprocessing.md @@ -31,7 +31,7 @@ For visual quality control, see {func}`~scanpy.pl.highest_expr_genes` and pp.normalize_total pp.regress_out pp.scale - pp.subsample + pp.sample pp.downsample_counts ``` diff --git a/docs/conf.py b/docs/conf.py index 2c79aa8d82..e17aa9df0f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -142,6 +142,8 @@ rapids_singlecell=("https://rapids-singlecell.readthedocs.io/en/latest/", None), scipy=("https://docs.scipy.org/doc/scipy/", None), seaborn=("https://seaborn.pydata.org/", None), + session_info2=("https://session-info2.readthedocs.io/en/stable/", None), + squidpy=("https://squidpy.readthedocs.io/en/stable/", None), sklearn=("https://scikit-learn.org/stable/", None), ) diff --git a/docs/release-notes/1.11.0.md b/docs/release-notes/1.11.0.md new file mode 100644 index 0000000000..c7258ea271 --- /dev/null +++ b/docs/release-notes/1.11.0.md @@ -0,0 +1,38 @@ +(v1.11.0)= +### 1.11.0rc1 {small}`2024-12-20` + +### Features + +- {func}`~scanpy.pp.sample` supports both upsampling and downsampling of observations and variables. {func}`~scanpy.pp.subsample` is now deprecated. {smaller}`G Eraslan & P Angerer` ({pr}`943`) +- Add `layer` argument to {func}`scanpy.tl.score_genes` and {func}`scanpy.tl.score_genes_cell_cycle` {smaller}`L Zappia` ({pr}`2921`) +- Prevent `raw` conflict with `layer` in {func}`~scanpy.tl.score_genes` {smaller}`S Dicks` ({pr}`3155`) +- Add support for `median` as an aggregation function to the `Aggregation` class in `scanpy.get._aggregated.py`. This allows for median-based aggregation of data (e.g., pseudobulk), complementing existing methods like mean- and sum-based aggregation {smaller}`M Dehkordi (Farhad)` ({pr}`3180`) +- Add `key_added` argument to {func}`~scanpy.pp.pca`, {func}`~scanpy.tl.tsne` and {func}`~scanpy.tl.umap` {smaller}`P Angerer` ({pr}`3184`) +- Support running {func}`scanpy.pp.pca` on sparse Dask arrays with the `'covariance_eigh'` solver {smaller}`P Angerer` ({pr}`3263`) +- Use upstreamed {class}`~sklearn.decomposition.PCA` implementation for {class}`~scipy.sparse.csr_array` and {class}`~scipy.sparse.csr_matrix` (see {ref}`sklearn:changes_1_4`) {smaller}`P Angerer` ({pr}`3267`) +- Add explicit support to {func}`scanpy.pp.pca` for `svd_solver='covariance_eigh'` {smaller}`P Angerer` ({pr}`3296`) +- Add support {class}`dask.array.Array` to {func}`scanpy.pp.calculate_qc_metrics` {smaller}`I Gold` ({pr}`3307`) +- Support `layer` parameter in {func}`scanpy.pl.highest_expr_genes` {smaller}`P Angerer` ({pr}`3324`) +- Run numba functions single-threaded when called from inside of a ThreadPool {smaller}`P Angerer` ({pr}`3335`) +- Switch {func}`~scanpy.logging.print_header` and {func}`~scanpy.logging.print_versions` to {mod}`session_info2` {smaller}`P Angerer` ({pr}`3384`) +- Add sampling probabilities/mask parameter `p` to {func}`~scanpy.pp.sample` {smaller}`P Angerer` ({pr}`3410`) + +### Performance + +- Speed up {func}`~scanpy.pp.regress_out` {smaller}`P Ashish, P Angerer & S Dicks` ({pr}`3284`) + +### Documentation + +- Improve {func}`~scanpy.external.pp.harmony_integrate` docs {smaller}`D Kühl` ({pr}`3362`) +- Raise {exc}`FutureWarning` when calling deprecated {mod}`scanpy.pp` functions {smaller}`P Angerer` ({pr}`3380`) +- | Deprecate … | in favor of … | + | --- | --- | + | {func}`scanpy.read_visium` | {func}`squidpy.read.visium` | + | {func}`scanpy.datasets.visium_sge` | {func}`squidpy.datasets.visium` | + | {func}`scanpy.pl.spatial` | {func}`squidpy.pl.spatial_scatter` | + + {smaller}`P Angerer` ({pr}`3407`) + +### Bug fixes + +- Upper-bound {mod}`sklearn` `<1.6.0` due to {issue}`dask/dask-ml#1002` {smaller}`Ilan Gold` ({pr}`3393`) diff --git a/docs/release-notes/1.5.0.md b/docs/release-notes/1.5.0.md index 922e758723..956ceb9493 100644 --- a/docs/release-notes/1.5.0.md +++ b/docs/release-notes/1.5.0.md @@ -5,7 +5,7 @@ The `1.5.0` release adds a lot of new functionality, much of which takes advanta #### Spatial data support -- Basic analysis {doc}`/tutorials/spatial/basic-analysis` and integration with single cell data {doc}`/tutorials/spatial/integration-scanorama` {smaller}`G Palla` +- Tutorials for basic analysis and integration with single cell data {smaller}`G Palla` - {func}`~scanpy.read_visium` read 10x Visium data {pr}`1034` {smaller}`G Palla, P Angerer, I Virshup` - {func}`~scanpy.datasets.visium_sge` load Visium data directly from 10x Genomics {pr}`1013` {smaller}`M Mirkazemi, G Palla, P Angerer` - {func}`~scanpy.pl.spatial` plot spatial data {pr}`1012` {smaller}`G Palla, P Angerer` diff --git a/docs/release-notes/2921.feature.md b/docs/release-notes/2921.feature.md deleted file mode 100644 index e3c964abb2..0000000000 --- a/docs/release-notes/2921.feature.md +++ /dev/null @@ -1 +0,0 @@ -Add `layer` argument to {func}`scanpy.tl.score_genes` and {func}`scanpy.tl.score_genes_cell_cycle` {smaller}`L Zappia` diff --git a/docs/release-notes/3155.feature.md b/docs/release-notes/3155.feature.md deleted file mode 100644 index 770c504348..0000000000 --- a/docs/release-notes/3155.feature.md +++ /dev/null @@ -1 +0,0 @@ -Prevent `raw` conflict with `layer` in {func}`~scanpy.tl.score_genes` {smaller}`S Dicks` diff --git a/docs/release-notes/3180.feature.md b/docs/release-notes/3180.feature.md deleted file mode 100644 index ab73dfe18e..0000000000 --- a/docs/release-notes/3180.feature.md +++ /dev/null @@ -1 +0,0 @@ -Add support for `median` as an aggregation function to the `Aggregation` class in `scanpy.get._aggregated.py`. This allows for median-based aggregation of data (e.g., pseudobulk), complementing existing methods like mean- and sum-based aggregation {smaller}`M Dehkordi (Farhad)` diff --git a/docs/release-notes/3184.feature.md b/docs/release-notes/3184.feature.md deleted file mode 100644 index 3cc976b141..0000000000 --- a/docs/release-notes/3184.feature.md +++ /dev/null @@ -1 +0,0 @@ -Add `key_added` argument to {func}`~scanpy.pp.pca`, {func}`~scanpy.tl.tsne` and {func}`~scanpy.tl.umap` {smaller}`P Angerer` diff --git a/docs/release-notes/3263.feature.md b/docs/release-notes/3263.feature.md deleted file mode 100644 index 8e924e1799..0000000000 --- a/docs/release-notes/3263.feature.md +++ /dev/null @@ -1 +0,0 @@ -Support running {func}`scanpy.pp.pca` on sparse Dask arrays with the `'covariance_eigh'` solver {smaller}`P Angerer` diff --git a/docs/release-notes/3267.feature.md b/docs/release-notes/3267.feature.md deleted file mode 100644 index 6ea7fb20a2..0000000000 --- a/docs/release-notes/3267.feature.md +++ /dev/null @@ -1 +0,0 @@ -Use upstreamed {class}`~sklearn.decomposition.PCA` implementation for {class}`~scipy.sparse.csr_array` and {class}`~scipy.sparse.csr_matrix` (see {ref}`sklearn:changes_1_4`) {smaller}`P Angerer` diff --git a/docs/release-notes/3284.performance.md b/docs/release-notes/3284.performance.md deleted file mode 100644 index 31c95245ff..0000000000 --- a/docs/release-notes/3284.performance.md +++ /dev/null @@ -1 +0,0 @@ -* Speed up {func}`~scanpy.pp.regress_out` {smaller}`P Ashish, P Angerer & S Dicks` diff --git a/docs/release-notes/3296.feature.md b/docs/release-notes/3296.feature.md deleted file mode 100644 index 74b89945dd..0000000000 --- a/docs/release-notes/3296.feature.md +++ /dev/null @@ -1 +0,0 @@ -Add explicit support to {func}`scanpy.pp.pca` for `svd_solver='covariance_eigh'` {smaller}`P Angerer` diff --git a/docs/release-notes/3307.feature.md b/docs/release-notes/3307.feature.md deleted file mode 100644 index 1505befb40..0000000000 --- a/docs/release-notes/3307.feature.md +++ /dev/null @@ -1 +0,0 @@ -Add support {class}`dask.array.Array` to {func}`scanpy.pp.calculate_qc_metrics` {smaller}`I Gold` diff --git a/docs/release-notes/3324.feature.md b/docs/release-notes/3324.feature.md deleted file mode 100644 index 03d14dceb6..0000000000 --- a/docs/release-notes/3324.feature.md +++ /dev/null @@ -1 +0,0 @@ -Support `layer` parameter in {func}`scanpy.pl.highest_expr_genes` {smaller}`P Angerer` diff --git a/docs/release-notes/3335.feature.md b/docs/release-notes/3335.feature.md deleted file mode 100644 index 77a1723a8e..0000000000 --- a/docs/release-notes/3335.feature.md +++ /dev/null @@ -1 +0,0 @@ -Run numba functions single-threaded when called from inside of a ThreadPool {smaller}`P Angerer` diff --git a/docs/release-notes/3362.doc.md b/docs/release-notes/3362.doc.md deleted file mode 100644 index 1dae77b3e2..0000000000 --- a/docs/release-notes/3362.doc.md +++ /dev/null @@ -1 +0,0 @@ -Improve {func}`~scanpy.external.pp.harmony_integrate` docs {smaller}`D Kühl` diff --git a/docs/tutorials/index.md b/docs/tutorials/index.md index ee57056a6d..b20ee2b762 100644 --- a/docs/tutorials/index.md +++ b/docs/tutorials/index.md @@ -37,19 +37,6 @@ trajectories/index ## Spatial data -```{seealso} -For more up-to-date tutorials on working with spatial data, see: - -* [SquidPy tutorials](https://squidpy.readthedocs.io/en/stable/notebooks/tutorials/index.html) -* [SpatialData tutorials](https://spatialdata.scverse.org/en/latest/tutorials/notebooks/notebooks.html) -* [Scverse ecosystem spatial tutorials](https://scverse.org/learn/) -``` - -```{toctree} -:maxdepth: 2 - -spatial/index -``` ## Experimental @@ -64,3 +51,12 @@ experimental/index A number of older tutorials can be found at: * The [`scanpy_usage`](https://github.com/scverse/scanpy_usage) repository + +```{seealso} +Scanpy used to have tutorials for its (now deprecated) spatial data functionality.x +For up-to-date tutorials on working with spatial data, see: + +* SquidPy {doc}`squidpy:notebooks/tutorials/index` +* [SpatialData tutorials](https://spatialdata.scverse.org/en/latest/tutorials/notebooks/notebooks.html) +* [Scverse ecosystem spatial tutorials](https://scverse.org/learn/) +``` diff --git a/docs/tutorials/spatial/basic-analysis.ipynb b/docs/tutorials/spatial/basic-analysis.ipynb deleted file mode 120000 index 66d9e48121..0000000000 --- a/docs/tutorials/spatial/basic-analysis.ipynb +++ /dev/null @@ -1 +0,0 @@ -../../../notebooks/spatial/basic-analysis.ipynb \ No newline at end of file diff --git a/docs/tutorials/spatial/index.md b/docs/tutorials/spatial/index.md deleted file mode 100644 index 801b901e53..0000000000 --- a/docs/tutorials/spatial/index.md +++ /dev/null @@ -1,8 +0,0 @@ -## Spatial - -```{toctree} -:maxdepth: 1 - -basic-analysis -integration-scanorama -``` diff --git a/docs/tutorials/spatial/integration-scanorama.ipynb b/docs/tutorials/spatial/integration-scanorama.ipynb deleted file mode 120000 index 5143681577..0000000000 --- a/docs/tutorials/spatial/integration-scanorama.ipynb +++ /dev/null @@ -1 +0,0 @@ -../../../notebooks/spatial/integration-scanorama.ipynb \ No newline at end of file diff --git a/hatch.toml b/hatch.toml index ad5db60976..3163d5d82d 100644 --- a/hatch.toml +++ b/hatch.toml @@ -22,7 +22,7 @@ overrides.matrix.deps.env-vars = [ { if = ["min"], key = "UV_CONSTRAINT", value = "ci/scanpy-min-deps.txt" }, ] overrides.matrix.deps.pre-install-commands = [ - { if = ["min"], value = "uv run ci/scripts/min-deps.py pyproject.toml -o ci/scanpy-min-deps.txt" }, + { if = ["min"], value = "uv run ci/scripts/min-deps.py pyproject.toml --all-extras -o ci/scanpy-min-deps.txt" }, ] overrides.matrix.deps.python = [ { if = ["min"], value = "3.10" }, diff --git a/pyproject.toml b/pyproject.toml index cfb7ffd28a..8e23afb14b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,25 +47,26 @@ classifiers = [ ] dependencies = [ "anndata>=0.8", - "numpy>=1.23", + "numpy>=1.24", "matplotlib>=3.6", "pandas >=1.5", "scipy>=1.8", "seaborn>=0.13", "h5py>=3.7", "tqdm", - "scikit-learn>=1.1", + "scikit-learn>=1.1,<1.6.0", "statsmodels>=0.13", "patsy!=1.0.0", # https://github.com/pydata/patsy/issues/215 "networkx>=2.7", "natsort", "joblib", - "numba>=0.56", + "numba>=0.57", "umap-learn>=0.5,!=0.5.0", "pynndescent>=0.5", "packaging>=21.3", - "session-info", + "session-info2", "legacy-api-wrap>=1.4", # for positional API deprecations + "typing-extensions; python_version < '3.13'", ] dynamic = ["version"] diff --git a/src/scanpy/_compat.py b/src/scanpy/_compat.py index dca6c84c4e..9ea7780b0d 100644 --- a/src/scanpy/_compat.py +++ b/src/scanpy/_compat.py @@ -4,7 +4,7 @@ import sys import warnings from dataclasses import dataclass, field -from functools import cache, partial, wraps +from functools import WRAPPER_ASSIGNMENTS, cache, partial, wraps from importlib.util import find_spec from pathlib import Path from typing import TYPE_CHECKING, Literal, ParamSpec, TypeVar, cast, overload @@ -48,6 +48,10 @@ class ZappyArray: "fullname", "pkg_metadata", "pkg_version", + "old_positionals", + "deprecated", + "njit", + "_numba_threading_layer", ] @@ -102,6 +106,28 @@ def old_positionals(*old_positionals: str): return lambda func: func +if sys.version_info >= (3, 11): + + @wraps(BaseException.add_note) + def add_note(exc: BaseException, note: str) -> None: + exc.add_note(note) +else: + + def add_note(exc: BaseException, note: str) -> None: + if not hasattr(exc, "__notes__"): + exc.__notes__ = [] + exc.__notes__.append(note) + + +if sys.version_info >= (3, 13): + from warnings import deprecated as _deprecated +else: + from typing_extensions import deprecated as _deprecated + + +deprecated = partial(_deprecated, category=FutureWarning) + + @overload def njit(fn: Callable[P, R], /) -> Callable[P, R]: ... @overload @@ -198,3 +224,42 @@ def _numba_threading_layer() -> Layer: f" ({available=}, {numba.config.THREADING_LAYER_PRIORITY=})" ) raise ValueError(msg) + + +def _legacy_numpy_gen( + random_state: _LegacyRandom | None = None, +) -> np.random.Generator: + """Return a random generator that behaves like the legacy one.""" + + if random_state is not None: + if isinstance(random_state, np.random.RandomState): + np.random.set_state(random_state.get_state(legacy=False)) + return _FakeRandomGen(random_state) + np.random.seed(random_state) + return _FakeRandomGen(np.random.RandomState(np.random.get_bit_generator())) + + +class _FakeRandomGen(np.random.Generator): + _state: np.random.RandomState + + def __init__(self, random_state: np.random.RandomState) -> None: + self._state = random_state + + @classmethod + def _delegate(cls) -> None: + for name, meth in np.random.Generator.__dict__.items(): + if name.startswith("_") or not callable(meth): + continue + + def mk_wrapper(name: str): + # Old pytest versions try to run the doctests + @wraps(meth, assigned=set(WRAPPER_ASSIGNMENTS) - {"__doc__"}) + def wrapper(self: _FakeRandomGen, *args, **kwargs): + return getattr(self._state, name)(*args, **kwargs) + + return wrapper + + setattr(cls, name, mk_wrapper(name)) + + +_FakeRandomGen._delegate() diff --git a/src/scanpy/_settings.py b/src/scanpy/_settings.py index 54b51b6420..5543689ef7 100644 --- a/src/scanpy/_settings.py +++ b/src/scanpy/_settings.py @@ -82,9 +82,7 @@ def _type_check(var: Any, varname: str, types: type | tuple[type, ...]): possible_types_str = types.__name__ else: type_names = [t.__name__ for t in types] - possible_types_str = "{} or {}".format( - ", ".join(type_names[:-1]), type_names[-1] - ) + possible_types_str = f"{', '.join(type_names[:-1])} or {type_names[-1]}" raise TypeError(f"{varname} must be of type {possible_types_str}") diff --git a/src/scanpy/datasets/_datasets.py b/src/scanpy/datasets/_datasets.py index df510b3209..8859de4d74 100644 --- a/src/scanpy/datasets/_datasets.py +++ b/src/scanpy/datasets/_datasets.py @@ -9,7 +9,7 @@ from anndata import AnnData from .. import _utils -from .._compat import old_positionals +from .._compat import deprecated, old_positionals from .._settings import settings from .._utils._doctests import doctest_internet, doctest_needs from ..readwrite import read, read_visium @@ -509,6 +509,7 @@ def _download_visium_dataset( return sample_dir +@deprecated("Use `squidpy.datasets.visium` instead.") @doctest_internet @check_datasetdir_exists def visium_sge( @@ -519,6 +520,9 @@ def visium_sge( """\ Processed Visium Spatial Gene Expression data from 10x Genomics’ database. + .. deprecated:: 1.11.0 + Use :func:`squidpy.datasets.visium` instead. + The database_ can be browsed online to find the ``sample_id`` you want. .. _database: https://support.10xgenomics.com/spatial-gene-expression/datasets diff --git a/src/scanpy/external/exporting.py b/src/scanpy/external/exporting.py index c1d7fa93b4..9364b7d368 100644 --- a/src/scanpy/external/exporting.py +++ b/src/scanpy/external/exporting.py @@ -345,8 +345,8 @@ def _write_color_tracks(ctracks, fname): def _frac_to_hex(frac): - rgb = tuple(np.array(np.array(plt.cm.jet(frac)[:3]) * 255, dtype=int)) - return "#{:02x}{:02x}{:02x}".format(*rgb) + r, g, b = tuple(np.array(np.array(plt.cm.jet(frac)[:3]) * 255, dtype=int)) + return f"#{r:02x}{g:02x}{b:02x}" def _get_color_stats_genes(color_stats, E, gene_list): diff --git a/src/scanpy/external/tl/_phenograph.py b/src/scanpy/external/tl/_phenograph.py index 8cecfa7276..24e10bcb85 100644 --- a/src/scanpy/external/tl/_phenograph.py +++ b/src/scanpy/external/tl/_phenograph.py @@ -244,8 +244,8 @@ def phenograph( comm_key = ( f"pheno_{clustering_algo}" if clustering_algo in ["louvain", "leiden"] else "" ) - ig_key = "pheno_{}_ig".format("jaccard" if jaccard else "gaussian") - q_key = "pheno_{}_q".format("jaccard" if jaccard else "gaussian") + ig_key = f"pheno_{'jaccard' if jaccard else 'gaussian'}_ig" + q_key = f"pheno_{'jaccard' if jaccard else 'gaussian'}_q" communities, graph, Q = phenograph.cluster( data=data, diff --git a/src/scanpy/get/_aggregated.py b/src/scanpy/get/_aggregated.py index 13ca54b5c4..53a18bb47c 100644 --- a/src/scanpy/get/_aggregated.py +++ b/src/scanpy/get/_aggregated.py @@ -263,8 +263,7 @@ def aggregate( if axis is None: axis = 1 if varm else 0 axis, axis_name = _resolve_axis(axis) - if mask is not None: - mask = _check_mask(adata, mask, axis_name) + mask = _check_mask(adata, mask, axis_name) data = adata.X if sum(p is not None for p in [varm, obsm, layer]) > 1: raise TypeError("Please only provide one (or none) of varm, obsm, or layer") diff --git a/src/scanpy/get/get.py b/src/scanpy/get/get.py index f3172ed45e..c36ddde8f8 100644 --- a/src/scanpy/get/get.py +++ b/src/scanpy/get/get.py @@ -2,11 +2,12 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, TypeVar import numpy as np import pandas as pd from anndata import AnnData +from numpy.typing import NDArray from packaging.version import Version from scipy.sparse import spmatrix @@ -16,7 +17,11 @@ from anndata._core.sparse_dataset import BaseCompressedSparseDataset from anndata._core.views import ArrayView - from numpy.typing import NDArray + from scipy.sparse import csc_matrix, csr_matrix + + from .._compat import DaskArray + + CSMatrix = csr_matrix | csc_matrix # -------------------------------------------------------------------------------- # Plotting data helpers @@ -485,11 +490,16 @@ def _set_obs_rep( raise AssertionError(msg) +M = TypeVar("M", bound=NDArray[np.bool_] | NDArray[np.floating] | pd.Series | None) + + def _check_mask( - data: AnnData | np.ndarray, - mask: NDArray[np.bool_] | str, + data: AnnData | np.ndarray | CSMatrix | DaskArray, + mask: str | M, dim: Literal["obs", "var"], -) -> NDArray[np.bool_]: # Could also be a series, but should be one or the other + *, + allow_probabilities: bool = False, +) -> M: # Could also be a series, but should be one or the other """ Validate mask argument Params @@ -497,30 +507,45 @@ def _check_mask( data Annotated data matrix or numpy array. mask - The mask. Either an appropriatley sized boolean array, or name of a column which will be used to mask. + Mask (or probabilities if `allow_probabilities=True`). + Either an appropriatley sized array, or name of a column. dim The dimension being masked. + allow_probabilities + Whether to allow probabilities as `mask` """ + if mask is None: + return mask + desc = "mask/probabilities" if allow_probabilities else "mask" + if isinstance(mask, str): if not isinstance(data, AnnData): - msg = "Cannot refer to mask with string without providing anndata object as argument" + msg = f"Cannot refer to {desc} with string without providing anndata object as argument" raise ValueError(msg) annot: pd.DataFrame = getattr(data, dim) if mask not in annot.columns: msg = ( f"Did not find `adata.{dim}[{mask!r}]`. " - f"Either add the mask first to `adata.{dim}`" - "or consider using the mask argument with a boolean array." + f"Either add the {desc} first to `adata.{dim}`" + f"or consider using the {desc} argument with an array." ) raise ValueError(msg) mask_array = annot[mask].to_numpy() else: if len(mask) != data.shape[0 if dim == "obs" else 1]: - raise ValueError("The shape of the mask do not match the data.") + msg = f"The shape of the {desc} do not match the data." + raise ValueError(msg) mask_array = mask - if not pd.api.types.is_bool_dtype(mask_array.dtype): - raise ValueError("Mask array must be boolean.") + is_bool = pd.api.types.is_bool_dtype(mask_array.dtype) + if not allow_probabilities and not is_bool: + msg = "Mask array must be boolean." + raise ValueError(msg) + elif allow_probabilities and not ( + is_bool or pd.api.types.is_float_dtype(mask_array.dtype) + ): + msg = f"{desc} array must be boolean or floating point." + raise ValueError(msg) return mask_array diff --git a/src/scanpy/logging.py b/src/scanpy/logging.py index 168c3b5405..3aa0ca494c 100644 --- a/src/scanpy/logging.py +++ b/src/scanpy/logging.py @@ -4,17 +4,20 @@ import logging import sys -import warnings from datetime import datetime, timedelta, timezone from functools import partial, update_wrapper from logging import CRITICAL, DEBUG, ERROR, INFO, WARNING -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, overload import anndata.logging +from ._compat import deprecated + if TYPE_CHECKING: from typing import IO + from session_info2 import SessionInfo + from ._settings import ScanpyConfig @@ -127,33 +130,11 @@ def format(self, record: logging.LogRecord): get_memory_usage = anndata.logging.get_memory_usage -_DEPENDENCIES_NUMERICS = [ - "anndata", # anndata actually shouldn't, but as long as it's in development - "umap", - "numpy", - "scipy", - "pandas", - ("sklearn", "scikit-learn"), - "statsmodels", - "igraph", - "louvain", - "leidenalg", - "pynndescent", -] - - -def _versions_dependencies(dependencies): - # this is not the same as the requirements! - for mod in dependencies: - mod_name, dist_name = mod if isinstance(mod, tuple) else (mod, mod) - try: - imp = __import__(mod_name) - yield dist_name, imp.__version__ - except (ImportError, AttributeError): - pass - - -def print_header(*, file=None): +@overload +def print_header(*, file: None = None) -> SessionInfo: ... +@overload +def print_header(*, file: IO[str]) -> None: ... +def print_header(*, file: IO[str] | None = None): """\ Versions that might influence the numerical results. Matplotlib and Seaborn are excluded from this. @@ -163,50 +144,27 @@ def print_header(*, file=None): file Optional path for dependency output. """ + from session_info2 import session_info - modules = ["scanpy"] + _DEPENDENCIES_NUMERICS - print( - " ".join(f"{mod}=={ver}" for mod, ver in _versions_dependencies(modules)), - file=file or sys.stdout, - ) + sinfo = session_info(os=True, cpu=True, gpu=True, dependencies=True) + + if file is not None: + print(sinfo, file=file) + return + + return sinfo -def print_versions(*, file: IO[str] | None = None): +@deprecated("Use `print_header` instead") +def print_versions() -> SessionInfo: """\ - Print versions of imported packages, OS, and jupyter environment. + Alias for `print_header`. - For more options (including rich output) use `session_info.show` directly. + .. deprecated:: 1.11.0 - Parameters - ---------- - file - Optional path for output. + Use :func:`print_header` instead. """ - import session_info - - if file is not None: - from contextlib import redirect_stdout - - warnings.warn( - "Passing argument 'file' to print_versions is deprecated, and will be " - "removed in a future version.", - FutureWarning, - ) - with redirect_stdout(file): - print_versions() - else: - session_info.show( - dependencies=True, - html=False, - excludes=[ - "builtins", - "stdlib_list", - "importlib_metadata", - # Special module present if test coverage being calculated - # https://gitlab.com/joelostblom/session_info/-/issues/10 - "$coverage", - ], - ) + return print_header() def print_version_and_date(*, file=None): @@ -235,7 +193,7 @@ def _copy_docs_and_signature(fn): def error( msg: str, *, - time: datetime = None, + time: datetime | None = None, deep: str | None = None, extra: dict | None = None, ) -> datetime: diff --git a/src/scanpy/plotting/_preprocessing.py b/src/scanpy/plotting/_preprocessing.py index e6c7808be1..b51688082e 100644 --- a/src/scanpy/plotting/_preprocessing.py +++ b/src/scanpy/plotting/_preprocessing.py @@ -6,7 +6,7 @@ from matplotlib import pyplot as plt from matplotlib import rcParams -from .._compat import old_positionals +from .._compat import deprecated, old_positionals from .._settings import settings from . import _utils @@ -103,6 +103,7 @@ def highly_variable_genes( # backwards compat +@deprecated("Use sc.pl.highly_variable_genes instead") @old_positionals("log", "show", "save") def filter_genes_dispersion( result: np.recarray, diff --git a/src/scanpy/plotting/_tools/paga.py b/src/scanpy/plotting/_tools/paga.py index ff14a19989..e67e6e2ece 100644 --- a/src/scanpy/plotting/_tools/paga.py +++ b/src/scanpy/plotting/_tools/paga.py @@ -702,11 +702,11 @@ def _paga_graph( and isinstance(node_labels, str) and node_labels != adata.uns["paga"]["groups"] ): - raise ValueError( - "Provide a list of group labels for the PAGA groups {}, not {}.".format( - adata.uns["paga"]["groups"], node_labels - ) + msg = ( + "Provide a list of group labels for the PAGA groups " + f"{adata.uns['paga']['groups']}, not {node_labels}." ) + raise ValueError(msg) groups_key = adata.uns["paga"]["groups"] if node_labels is None: node_labels = adata.obs[groups_key].cat.categories diff --git a/src/scanpy/plotting/_tools/scatterplots.py b/src/scanpy/plotting/_tools/scatterplots.py index 4ce39f7211..b54897678f 100644 --- a/src/scanpy/plotting/_tools/scatterplots.py +++ b/src/scanpy/plotting/_tools/scatterplots.py @@ -28,6 +28,7 @@ from packaging.version import Version from ... import logging as logg +from ..._compat import deprecated from ..._settings import settings from ..._utils import ( Empty, # noqa: TCH001 @@ -149,8 +150,7 @@ def embedding( # Checking the mask format and if used together with groups if groups is not None and mask_obs is not None: raise ValueError("Groups and mask arguments are incompatible.") - if mask_obs is not None: - mask_obs = _check_mask(adata, mask_obs, "obs") + mask_obs = _check_mask(adata, mask_obs, "obs") # Figure out if we're using raw if use_raw is None: @@ -919,6 +919,7 @@ def pca( return axs +@deprecated("Use `squidpy.pl.spatial_scatter` instead.") @_wraps_plot_scatter @_doc_params( adata_color_etc=doc_adata_color_etc, @@ -948,6 +949,9 @@ def spatial( """\ Scatter plot in spatial coordinates. + .. deprecated:: 1.11.0 + Use :func:`squidpy.pl.spatial_scatter` instead. + This function allows overlaying data on top of images. Use the parameter `img_key` to see the image in the background And the parameter `library_id` to select the image. @@ -994,8 +998,6 @@ def spatial( -------- :func:`scanpy.datasets.visium_sge` Example visium data. - :doc:`/tutorials/spatial/basic-analysis` - Tutorial on spatial analysis. """ # get default image params if available library_id, spatial_data = _check_spatial_data(adata.uns, library_id) diff --git a/src/scanpy/preprocessing/__init__.py b/src/scanpy/preprocessing/__init__.py index 8c396d8640..4307cbb6c9 100644 --- a/src/scanpy/preprocessing/__init__.py +++ b/src/scanpy/preprocessing/__init__.py @@ -3,6 +3,7 @@ from ..neighbors import neighbors from ._combat import combat from ._deprecated.highly_variable_genes import filter_genes_dispersion +from ._deprecated.sampling import subsample from ._highly_variable_genes import highly_variable_genes from ._normalization import normalize_total from ._pca import pca @@ -17,8 +18,8 @@ log1p, normalize_per_cell, regress_out, + sample, sqrt, - subsample, ) __all__ = [ @@ -40,6 +41,7 @@ "log1p", "normalize_per_cell", "regress_out", + "sample", "scale", "sqrt", "subsample", diff --git a/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py b/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py index f2c3ce971b..27e8f1f846 100644 --- a/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py +++ b/src/scanpy/preprocessing/_deprecated/highly_variable_genes.py @@ -9,7 +9,7 @@ from scipy.sparse import issparse from ... import logging as logg -from ..._compat import old_positionals +from ..._compat import deprecated, old_positionals from .._distributed import materialize_as_ndarray from .._utils import _get_mean_var @@ -19,6 +19,7 @@ from scipy.sparse import spmatrix +@deprecated("Use sc.pp.highly_variable_genes instead") @old_positionals( "flavor", "min_disp", @@ -48,18 +49,17 @@ def filter_genes_dispersion( """\ Extract highly variable genes :cite:p:`Satija2015,Zheng2017`. - .. warning:: - .. deprecated:: 1.3.6 - Use :func:`~scanpy.pp.highly_variable_genes` - instead. The new function is equivalent to the present - function, except that + .. deprecated:: 1.3.6 - * the new function always expects logarithmized data - * `subset=False` in the new function, it suffices to - merely annotate the genes, tools like `pp.pca` will - detect the annotation - * you can now call: `sc.pl.highly_variable_genes(adata)` - * `copy` is replaced by `inplace` + Use :func:`~scanpy.pp.highly_variable_genes` instead. + The new function is equivalent to the present function, except that + + * the new function always expects logarithmized data + * `subset=False` in the new function, it suffices to + merely annotate the genes, tools like `pp.pca` will + detect the annotation + * you can now call: `sc.pl.highly_variable_genes(adata)` + * `copy` is replaced by `inplace` If trying out parameters, pass the data matrix instead of AnnData. diff --git a/src/scanpy/preprocessing/_deprecated/sampling.py b/src/scanpy/preprocessing/_deprecated/sampling.py new file mode 100644 index 0000000000..02619a2364 --- /dev/null +++ b/src/scanpy/preprocessing/_deprecated/sampling.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ..._compat import _legacy_numpy_gen, old_positionals +from .._simple import sample + +if TYPE_CHECKING: + import numpy as np + from anndata import AnnData + from numpy.typing import NDArray + from scipy.sparse import csc_matrix, csr_matrix + + from ..._compat import _LegacyRandom + + CSMatrix = csr_matrix | csc_matrix + + +@old_positionals("n_obs", "random_state", "copy") +def subsample( + data: AnnData | np.ndarray | CSMatrix, + fraction: float | None = None, + *, + n_obs: int | None = None, + random_state: _LegacyRandom = 0, + copy: bool = False, +) -> AnnData | tuple[np.ndarray | CSMatrix, NDArray[np.int64]] | None: + """\ + Subsample to a fraction of the number of observations. + + .. deprecated:: 1.11.0 + + Use :func:`~scanpy.pp.sample` instead. + + Parameters + ---------- + data + The (annotated) data matrix of shape `n_obs` × `n_vars`. + Rows correspond to cells and columns to genes. + fraction + Subsample to this `fraction` of the number of observations. + n_obs + Subsample to this number of observations. + random_state + Random seed to change subsampling. + copy + If an :class:`~anndata.AnnData` is passed, + determines whether a copy is returned. + + Returns + ------- + Returns `X[obs_indices], obs_indices` if data is array-like, otherwise + subsamples the passed :class:`~anndata.AnnData` (`copy == False`) or + returns a subsampled copy of it (`copy == True`). + """ + + rng = _legacy_numpy_gen(random_state) + return sample( + data=data, fraction=fraction, n=n_obs, rng=rng, copy=copy, replace=False, axis=0 + ) diff --git a/src/scanpy/preprocessing/_qc.py b/src/scanpy/preprocessing/_qc.py index 27836e1717..87ad51d420 100644 --- a/src/scanpy/preprocessing/_qc.py +++ b/src/scanpy/preprocessing/_qc.py @@ -194,26 +194,21 @@ def describe_var( if issparse(X): X.eliminate_zeros() var_metrics = pd.DataFrame(index=adata.var_names) - var_metrics["n_cells_by_{expr_type}"], var_metrics["mean_{expr_type}"] = ( + var_metrics[f"n_cells_by_{expr_type}"], var_metrics[f"mean_{expr_type}"] = ( materialize_as_ndarray((axis_nnz(X, axis=0), _get_mean_var(X, axis=0)[0])) ) if log1p: - var_metrics["log1p_mean_{expr_type}"] = np.log1p( - var_metrics["mean_{expr_type}"] + var_metrics[f"log1p_mean_{expr_type}"] = np.log1p( + var_metrics[f"mean_{expr_type}"] ) - var_metrics["pct_dropout_by_{expr_type}"] = ( - 1 - var_metrics["n_cells_by_{expr_type}"] / X.shape[0] + var_metrics[f"pct_dropout_by_{expr_type}"] = ( + 1 - var_metrics[f"n_cells_by_{expr_type}"] / X.shape[0] ) * 100 - var_metrics["total_{expr_type}"] = np.ravel(axis_sum(X, axis=0)) + var_metrics[f"total_{expr_type}"] = np.ravel(axis_sum(X, axis=0)) if log1p: - var_metrics["log1p_total_{expr_type}"] = np.log1p( - var_metrics["total_{expr_type}"] + var_metrics[f"log1p_total_{expr_type}"] = np.log1p( + var_metrics[f"total_{expr_type}"] ) - # Relabel - new_colnames = [] - for col in var_metrics.columns: - new_colnames.append(col.format(**locals())) - var_metrics.columns = new_colnames if inplace: adata.var[var_metrics.columns] = var_metrics return None diff --git a/src/scanpy/preprocessing/_scale.py b/src/scanpy/preprocessing/_scale.py index d7123d5f65..bac08f246b 100644 --- a/src/scanpy/preprocessing/_scale.py +++ b/src/scanpy/preprocessing/_scale.py @@ -164,8 +164,8 @@ def scale_array( ): if copy: X = X.copy() + mask_obs = _check_mask(X, mask_obs, "obs") if mask_obs is not None: - mask_obs = _check_mask(X, mask_obs, "obs") scale_rv = scale_array( X[mask_obs, :], zero_center=zero_center, diff --git a/src/scanpy/preprocessing/_simple.py b/src/scanpy/preprocessing/_simple.py index 01936414a5..821615676a 100644 --- a/src/scanpy/preprocessing/_simple.py +++ b/src/scanpy/preprocessing/_simple.py @@ -8,20 +8,21 @@ import warnings from functools import singledispatch from itertools import repeat -from typing import TYPE_CHECKING, TypeVar +from typing import TYPE_CHECKING, TypeVar, overload import numba import numpy as np from anndata import AnnData from pandas.api.types import CategoricalDtype -from scipy.sparse import csr_matrix, issparse, isspmatrix_csr, spmatrix +from scipy.sparse import csc_matrix, csr_matrix, issparse, isspmatrix_csr, spmatrix from sklearn.utils import check_array, sparsefuncs from .. import logging as logg -from .._compat import njit, old_positionals +from .._compat import DaskArray, deprecated, njit, old_positionals from .._settings import settings as sett from .._utils import ( _check_array_function_arguments, + _resolve_axis, axis_sum, is_backed_type, raise_not_implemented_error_if_backed_type, @@ -29,19 +30,15 @@ sanitize_anndata, view_to_actual, ) -from ..get import _get_obs_rep, _set_obs_rep +from ..get import _check_mask, _get_obs_rep, _set_obs_rep from ._distributed import materialize_as_ndarray from ._utils import _to_dense -# install dask if available try: import dask.array as da except ImportError: da = None -# backwards compat -from ._deprecated.highly_variable_genes import filter_genes_dispersion # noqa: F401 - if TYPE_CHECKING: from collections.abc import Collection, Iterable, Sequence from numbers import Number @@ -50,7 +47,13 @@ import pandas as pd from numpy.typing import NDArray - from .._compat import DaskArray, _LegacyRandom + from .._compat import _LegacyRandom + from .._utils import RNGLike, SeedLike + + +CSMatrix = csr_matrix | csc_matrix + +A = TypeVar("A", bound=np.ndarray | CSMatrix | DaskArray) @old_positionals( @@ -474,6 +477,7 @@ def sqrt( return X.sqrt() +@deprecated("Use sc.pp.normalize_total instead") @old_positionals( "counts_per_cell_after", "counts_per_cell", @@ -497,16 +501,16 @@ def normalize_per_cell( """\ Normalize total counts per cell. - .. warning:: - .. deprecated:: 1.3.7 - Use :func:`~scanpy.pp.normalize_total` instead. - The new function is equivalent to the present - function, except that + .. deprecated:: 1.3.7 - * the new function doesn't filter cells based on `min_counts`, - use :func:`~scanpy.pp.filter_cells` if filtering is needed. - * some arguments were renamed - * `copy` is replaced by `inplace` + Use :func:`~scanpy.pp.normalize_total` instead. + The new function is equivalent to the present + function, except that + + * the new function doesn't filter cells based on `min_counts`, + use :func:`~scanpy.pp.filter_cells` if filtering is needed. + * some arguments were renamed + * `copy` is replaced by `inplace` Normalize each cell by total counts over all genes, so that every cell has the same total count after normalization. @@ -824,17 +828,55 @@ def _regress_out_chunk( return np.vstack(responses_chunk_list) -@old_positionals("n_obs", "random_state", "copy") -def subsample( - data: AnnData | np.ndarray | spmatrix, +@overload +def sample( + data: AnnData, fraction: float | None = None, *, - n_obs: int | None = None, - random_state: _LegacyRandom = 0, + n: int | None = None, + rng: RNGLike | SeedLike | None = 0, + copy: Literal[False] = False, + replace: bool = False, + axis: Literal["obs", 0, "var", 1] = "obs", + p: str | NDArray[np.bool_] | NDArray[np.floating] | None = None, +) -> None: ... +@overload +def sample( + data: AnnData, + fraction: float | None = None, + *, + n: int | None = None, + rng: RNGLike | SeedLike | None = None, + copy: Literal[True], + replace: bool = False, + axis: Literal["obs", 0, "var", 1] = "obs", + p: str | NDArray[np.bool_] | NDArray[np.floating] | None = None, +) -> AnnData: ... +@overload +def sample( + data: A, + fraction: float | None = None, + *, + n: int | None = None, + rng: RNGLike | SeedLike | None = None, + copy: bool = False, + replace: bool = False, + axis: Literal["obs", 0, "var", 1] = "obs", + p: str | NDArray[np.bool_] | NDArray[np.floating] | None = None, +) -> tuple[A, NDArray[np.int64]]: ... +def sample( + data: AnnData | np.ndarray | CSMatrix | DaskArray, + fraction: float | None = None, + *, + n: int | None = None, + rng: RNGLike | SeedLike | None = None, copy: bool = False, -) -> AnnData | tuple[np.ndarray | spmatrix, NDArray[np.int64]] | None: + replace: bool = False, + axis: Literal["obs", 0, "var", 1] = "obs", + p: str | NDArray[np.bool_] | NDArray[np.floating] | None = None, +) -> AnnData | None | tuple[np.ndarray | CSMatrix | DaskArray, NDArray[np.int64]]: """\ - Subsample to a fraction of the number of observations. + Sample observations or variables with or without replacement. Parameters ---------- @@ -842,49 +884,89 @@ def subsample( The (annotated) data matrix of shape `n_obs` × `n_vars`. Rows correspond to cells and columns to genes. fraction - Subsample to this `fraction` of the number of observations. - n_obs - Subsample to this number of observations. + Sample to this `fraction` of the number of observations or variables. + (All of them, even if there are `0`s/`False`s in `p`.) + This can be larger than 1.0, if `replace=True`. + See `axis` and `replace`. + n + Sample to this number of observations or variables. See `axis`. random_state Random seed to change subsampling. copy If an :class:`~anndata.AnnData` is passed, determines whether a copy is returned. + replace + If True, samples are drawn with replacement. + axis + Sample `obs`\\ ervations (axis 0) or `var`\\ iables (axis 1). + p + Drawing probabilities (floats) or mask (bools). + Either an `axis`-sized array, or the name of a column. + If `p` is an array of probabilities, it must sum to 1. Returns ------- - Returns `X[obs_indices], obs_indices` if data is array-like, otherwise - subsamples the passed :class:`~anndata.AnnData` (`copy == False`) or - returns a subsampled copy of it (`copy == True`). + If `isinstance(data, AnnData)` and `copy=False`, + this function returns `None`. Otherwise: + + `data[indices, :]` | `data[:, indices]` (depending on `axis`) + If `data` is array-like or `copy=True`, returns the subset. + `indices` : numpy.ndarray + If `data` is array-like, also returns the indices into the original. """ - np.random.seed(random_state) - old_n_obs = data.n_obs if isinstance(data, AnnData) else data.shape[0] - if n_obs is not None: - new_n_obs = n_obs - elif fraction is not None: - if fraction > 1 or fraction < 0: - raise ValueError(f"`fraction` needs to be within [0, 1], not {fraction}") - new_n_obs = int(fraction * old_n_obs) - logg.debug(f"... subsampled to {new_n_obs} data points") - else: - raise ValueError("Either pass `n_obs` or `fraction`.") - obs_indices = np.random.choice(old_n_obs, size=new_n_obs, replace=False) - if isinstance(data, AnnData): - if data.isbacked: - if copy: - return data[obs_indices].to_memory() - else: - raise NotImplementedError( - "Inplace subsampling is not implemented for backed objects." - ) + # parameter validation + if not copy and isinstance(data, AnnData) and data.isbacked: + msg = "Inplace sampling (`copy=False`) is not implemented for backed objects." + raise NotImplementedError(msg) + axis, axis_name = _resolve_axis(axis) + p = _check_mask(data, p, dim=axis_name, allow_probabilities=True) + if p is not None and p.dtype == bool: + p = p.astype(np.float64) / p.sum() + old_n = data.shape[axis] + match (fraction, n): + case (None, None): + msg = "Either `fraction` or `n` must be set." + raise TypeError(msg) + case (None, _): + pass + case (_, None): + if fraction < 0: + msg = f"`{fraction=}` needs to be nonnegative." + raise ValueError(msg) + if not replace and fraction > 1: + msg = f"If `replace=False`, `{fraction=}` needs to be within [0, 1]." + raise ValueError(msg) + n = int(fraction * old_n) + logg.debug(f"... sampled to {n} {axis_name}") + case _: + msg = "Providing both `fraction` and `n` is not allowed." + raise TypeError(msg) + del fraction + + # actually do subsampling + rng = np.random.default_rng(rng) + indices = rng.choice(old_n, size=n, replace=replace, p=p) + + # overload 1: inplace AnnData subset + if not copy and isinstance(data, AnnData): + if axis_name == "obs": + data._inplace_subset_obs(indices) else: - if copy: - return data[obs_indices].copy() - else: - data._inplace_subset_obs(obs_indices) - else: - X = data - return X[obs_indices], obs_indices + data._inplace_subset_var(indices) + return None + + subset = data[indices] if axis_name == "obs" else data[:, indices] + + # overload 2: copy AnnData subset + if copy and isinstance(data, AnnData): + assert isinstance(subset, AnnData) + return subset.to_memory() if data.isbacked else subset.copy() + + # overload 3: return array and indices + assert isinstance(subset, np.ndarray | CSMatrix | DaskArray), type(subset) + if copy: + subset = subset.copy() + return subset, indices @renamed_arg("target_counts", "counts_per_cell") diff --git a/src/scanpy/readwrite.py b/src/scanpy/readwrite.py index 3c958a1e50..3333fbc0a1 100644 --- a/src/scanpy/readwrite.py +++ b/src/scanpy/readwrite.py @@ -36,7 +36,7 @@ from matplotlib.image import imread from . import logging as logg -from ._compat import old_positionals +from ._compat import add_note, deprecated, old_positionals from ._settings import settings from ._utils import _empty @@ -366,6 +366,7 @@ def _read_v3_10x_h5(filename, *, start=None): raise Exception("File is missing one or more required datasets.") +@deprecated("Use `squidpy.read.visium` instead.") def read_visium( path: Path | str, genome: str | None = None, @@ -378,6 +379,9 @@ def read_visium( """\ Read 10x-Genomics-formatted visum dataset. + .. deprecated:: 1.11.0 + Use :func:`squidpy.read.visium` instead. + In addition to reading regular 10x output, this looks for the `spatial` folder and loads images, coordinates and scale factors. @@ -993,15 +997,11 @@ def _get_filename_from_key(key, ext=None) -> Path: def _download(url: str, path: Path): - try: - import ipywidgets # noqa: F401 - from tqdm.auto import tqdm - except ImportError: - from tqdm import tqdm - from urllib.error import URLError from urllib.request import Request, urlopen + from tqdm.auto import tqdm + blocksize = 1024 * 8 blocknum = 0 @@ -1011,14 +1011,17 @@ def _download(url: str, path: Path): try: open_url = urlopen(req) except URLError: - logg.warning( - "Failed to open the url with default certificates, trying with certifi." - ) + msg = "Failed to open the url with default certificates." + try: + from certifi import where + except ImportError as e: + add_note(e, f"{msg} Please install `certifi` and try again.") + raise + else: + logg.warning(f"{msg} Trying to use certifi.") from ssl import create_default_context - from certifi import where - open_url = urlopen(req, context=create_default_context(cafile=where())) with open_url as resp: diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index 9a2896196a..2c214fcfdd 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -124,10 +124,11 @@ def __init__( ) if len(invalid_groups_selected) > 0: - raise ValueError( - "Could not calculate statistics for groups {} since they only " - "contain one sample.".format(", ".join(invalid_groups_selected)) + msg = ( + f"Could not calculate statistics for groups {', '.join(invalid_groups_selected)} " + "since they only contain one sample." ) + raise ValueError(msg) adata_comp = adata if layer is not None: @@ -593,8 +594,7 @@ def rank_genes_groups( >>> # to visualize the results >>> sc.pl.rank_genes_groups(adata) """ - if mask_var is not None: - mask_var = _check_mask(adata, mask_var, "var") + mask_var = _check_mask(adata, mask_var, "var") if use_raw is None: use_raw = adata.raw is not None @@ -853,7 +853,7 @@ def filter_rank_genes_groups( if not use_logfolds or not use_fraction: sub_X = adata.raw[:, var_names].X if use_raw else adata[:, var_names].X - in_group = adata.obs[groupby] == cluster + in_group = (adata.obs[groupby] == cluster).to_numpy() X_in = sub_X[in_group] X_out = sub_X[~in_group] diff --git a/tests/notebooks/test_paga_paul15_subsampled.py b/tests/notebooks/test_paga_paul15_subsampled.py index 9ce6ea8319..5d8c17d336 100644 --- a/tests/notebooks/test_paga_paul15_subsampled.py +++ b/tests/notebooks/test_paga_paul15_subsampled.py @@ -138,6 +138,6 @@ def test_paga_paul15_subsampled(image_comparer, plt): show=False, ) # add a test for this at some point - # data.to_csv('./write/paga_path_{}.csv'.format(descr)) + # data.to_csv(f"./write/paga_path_{descr}.csv") save_and_compare_images("paga_path") diff --git a/tests/test_backed.py b/tests/test_backed.py index 787edf9c21..bfa1d79592 100644 --- a/tests/test_backed.py +++ b/tests/test_backed.py @@ -91,8 +91,8 @@ def test_log1p_backed_errors(backed_adata): def test_scatter_backed(backed_adata): sc.pp.pca(backed_adata, chunked=True) - sc.pl.scatter(backed_adata, color="0", basis="pca") + sc.pl.scatter(backed_adata, color="0", basis="pca", show=False) def test_dotplot_backed(backed_adata): - sc.pl.dotplot(backed_adata, ["0", "1", "2", "3"], groupby="cat") + sc.pl.dotplot(backed_adata, ["0", "1", "2", "3"], groupby="cat", show=False) diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 4bad3800d7..5e0fc1e125 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -111,6 +111,7 @@ def test_pbmc68k_reduced(): sc.datasets.pbmc68k_reduced() +@pytest.mark.filterwarnings("ignore:Use `squidpy.*` instead:FutureWarning") @pytest.mark.internet def test_visium_datasets(): """Tests that reading/ downloading works and is does not have global effects.""" @@ -121,6 +122,7 @@ def test_visium_datasets(): assert_adata_equal(hheart, hheart_again) +@pytest.mark.filterwarnings("ignore:Use `squidpy.*` instead:FutureWarning") @pytest.mark.internet def test_visium_datasets_dir_change(tmp_path: Path): """Test that changing the dataset dir doesn't break reading.""" @@ -132,6 +134,7 @@ def test_visium_datasets_dir_change(tmp_path: Path): assert_adata_equal(mbrain, mbrain_again) +@pytest.mark.filterwarnings("ignore:Use `squidpy.*` instead:FutureWarning") @pytest.mark.internet def test_visium_datasets_images(): """Test that image download works and is does not have global effects.""" diff --git a/tests/test_embedding_plots.py b/tests/test_embedding_plots.py deleted file mode 100644 index d48f44b2b6..0000000000 --- a/tests/test_embedding_plots.py +++ /dev/null @@ -1,566 +0,0 @@ -from __future__ import annotations - -from functools import partial -from pathlib import Path -from typing import TYPE_CHECKING - -import matplotlib as mpl -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -import pytest -import seaborn as sns -from matplotlib.colors import Normalize -from matplotlib.testing.compare import compare_images - -import scanpy as sc -from testing.scanpy._helpers.data import pbmc3k_processed - -if TYPE_CHECKING: - from scanpy.plotting._utils import _LegendLoc - - -HERE: Path = Path(__file__).parent -ROOT = HERE / "_images" - -MISSING_VALUES_ROOT = ROOT / "embedding-missing-values" - - -def check_images(pth1, pth2, *, tol): - result = compare_images(pth1, pth2, tol=tol) - assert result is None, result - - -@pytest.fixture(scope="module") -def adata(): - """A bit cute.""" - from matplotlib.image import imread - from sklearn.cluster import DBSCAN - from sklearn.datasets import make_blobs - - empty_pixel = np.array([1.0, 1.0, 1.0, 0]).reshape(1, 1, -1) - image = imread(HERE.parent / "docs/_static/img/Scanpy_Logo_RGB.png") - x, y = np.where(np.logical_and.reduce(~np.equal(image, empty_pixel), axis=2)) - - # Just using to calculate the hex coords - hexes = plt.hexbin(x, y, gridsize=(44, 100)) - counts = hexes.get_array() - pixels = hexes.get_offsets()[counts != 0] - plt.close() - - labels = DBSCAN(eps=20, min_samples=2).fit(pixels).labels_ - order = np.argsort(labels) - adata = sc.AnnData( - make_blobs( - pd.Series(labels[order]).value_counts().values, - n_features=20, - shuffle=False, - random_state=42, - )[0], - obs={"label": pd.Categorical(labels[order].astype(str))}, - obsm={"spatial": pixels[order, ::-1]}, - uns={ - "spatial": { - "scanpy_img": { - "images": {"hires": image}, - "scalefactors": { - "tissue_hires_scalef": 1, - "spot_diameter_fullres": 10, - }, - } - } - }, - ) - sc.pp.pca(adata) - - # Adding some missing values - adata.obs["label_missing"] = adata.obs["label"].copy() - adata.obs["label_missing"][::2] = np.nan - - adata.obs["1_missing"] = adata.obs_vector("1") - adata.obs.loc[ - adata.obsm["spatial"][:, 0] < adata.obsm["spatial"][:, 0].mean(), "1_missing" - ] = np.nan - - return adata - - -@pytest.fixture -def fixture_request(request): - """Returns a Request object. - - Allows you to access names of parameterized tests from within a test. - """ - return request - - -@pytest.fixture( - params=[(0, 0, 0, 1), None], - ids=["na_color.black_tup", "na_color.default"], -) -def na_color(request): - return request.param - - -@pytest.fixture(params=[True, False], ids=["na_in_legend.True", "na_in_legend.False"]) -def na_in_legend(request): - return request.param - - -@pytest.fixture( - params=[partial(sc.pl.pca, show=False), partial(sc.pl.spatial, show=False)], - ids=["pca", "spatial"], -) -def plotfunc(request): - return request.param - - -@pytest.fixture( - params=["on data", "right margin", "lower center", None], - ids=["legend.on_data", "legend.on_right", "legend.on_bottom", "legend.off"], -) -def legend_loc(request) -> _LegendLoc | None: - return request.param - - -@pytest.fixture( - params=[lambda x: list(x.cat.categories[:3]), lambda x: []], - ids=["groups.3", "groups.all"], -) -def groupsfunc(request): - return request.param - - -@pytest.fixture( - params=[ - pytest.param( - {"vmin": None, "vmax": None, "vcenter": None, "norm": None}, - id="vbounds.default", - ), - pytest.param( - {"vmin": 0, "vmax": 5, "vcenter": None, "norm": None}, id="vbounds.numbers" - ), - pytest.param( - {"vmin": "p15", "vmax": "p90", "vcenter": None, "norm": None}, - id="vbounds.percentile", - ), - pytest.param( - {"vmin": 0, "vmax": "p99", "vcenter": 0.1, "norm": None}, - id="vbounds.vcenter", - ), - pytest.param( - {"vmin": None, "vmax": None, "vcenter": None, "norm": Normalize(0, 5)}, - id="vbounds.norm", - ), - ] -) -def vbounds(request): - return request.param - - -def test_missing_values_categorical( - *, - fixture_request: pytest.FixtureRequest, - image_comparer, - adata, - plotfunc, - na_color, - na_in_legend, - legend_loc, - groupsfunc, -): - save_and_compare_images = partial(image_comparer, MISSING_VALUES_ROOT, tol=15) - - base_name = fixture_request.node.name - - # Passing through a dict so it's easier to use default values - kwargs = {} - kwargs["legend_loc"] = legend_loc - kwargs["groups"] = groupsfunc(adata.obs["label"]) - if na_color is not None: - kwargs["na_color"] = na_color - kwargs["na_in_legend"] = na_in_legend - - plotfunc(adata, color=["label", "label_missing"], **kwargs) - - save_and_compare_images(base_name) - - -def test_missing_values_continuous( - *, - fixture_request: pytest.FixtureRequest, - image_comparer, - adata, - plotfunc, - na_color, - vbounds, -): - save_and_compare_images = partial(image_comparer, MISSING_VALUES_ROOT, tol=15) - - base_name = fixture_request.node.name - - # Passing through a dict so it's easier to use default values - kwargs = {} - kwargs.update(vbounds) - if na_color is not None: - kwargs["na_color"] = na_color - - plotfunc(adata, color=["1", "1_missing"], **kwargs) - - save_and_compare_images(base_name) - - -def test_enumerated_palettes(fixture_request, adata, tmpdir, plotfunc): - tmpdir = Path(tmpdir) - base_name = fixture_request.node.name - - categories = adata.obs["label"].cat.categories - colors_rgb = dict(zip(categories, sns.color_palette(n_colors=12))) - - dict_pth = tmpdir / f"rgbdict_{base_name}.png" - list_pth = tmpdir / f"rgblist_{base_name}.png" - - # making a copy so colors aren't saved - plotfunc(adata.copy(), color="label", palette=colors_rgb) - plt.savefig(dict_pth, dpi=40) - plt.close() - plotfunc(adata.copy(), color="label", palette=[colors_rgb[c] for c in categories]) - plt.savefig(list_pth, dpi=40) - plt.close() - - check_images(dict_pth, list_pth, tol=15) - - -def test_dimension_broadcasting(adata, tmpdir, check_same_image): - tmpdir = Path(tmpdir) - - with pytest.raises( - ValueError, - match=r"Could not broadcast together arguments with shapes: \[2, 3, 1\]", - ): - sc.pl.pca( - adata, color=["label", "1_missing"], dimensions=[(0, 1), (1, 2), (2, 3)] - ) - - dims_pth = tmpdir / "broadcast_dims.png" - color_pth = tmpdir / "broadcast_colors.png" - - sc.pl.pca(adata, color=["label", "label", "label"], dimensions=(2, 3), show=False) - plt.savefig(dims_pth, dpi=40) - plt.close() - sc.pl.pca(adata, color="label", dimensions=[(2, 3), (2, 3), (2, 3)], show=False) - plt.savefig(color_pth, dpi=40) - plt.close() - - check_same_image(dims_pth, color_pth, tol=5) - - -def test_marker_broadcasting(adata, tmpdir, check_same_image): - tmpdir = Path(tmpdir) - - with pytest.raises( - ValueError, - match=r"Could not broadcast together arguments with shapes: \[2, 1, 3\]", - ): - sc.pl.pca(adata, color=["label", "1_missing"], marker=[".", "^", "x"]) - - dims_pth = tmpdir / "broadcast_markers.png" - color_pth = tmpdir / "broadcast_colors_for_markers.png" - - sc.pl.pca(adata, color=["label", "label", "label"], marker="^", show=False) - plt.savefig(dims_pth, dpi=40) - plt.close() - sc.pl.pca(adata, color="label", marker=["^", "^", "^"], show=False) - plt.savefig(color_pth, dpi=40) - plt.close() - - check_same_image(dims_pth, color_pth, tol=5) - - -def test_dimensions_same_as_components(adata, tmpdir, check_same_image): - tmpdir = Path(tmpdir) - adata = adata.copy() - adata.obs["mean"] = np.ravel(adata.X.mean(axis=1)) - - comp_pth = tmpdir / "components_plot.png" - dims_pth = tmpdir / "dimension_plot.png" - - # TODO: Deprecate components kwarg - # with pytest.warns(FutureWarning, match=r"components .* deprecated"): - sc.pl.pca( - adata, - color=["mean", "label"], - components=["1,2", "2,3"], - show=False, - ) - plt.savefig(comp_pth, dpi=40) - plt.close() - - sc.pl.pca( - adata, - color=["mean", "mean", "label", "label"], - dimensions=[(0, 1), (1, 2), (0, 1), (1, 2)], - show=False, - ) - plt.savefig(dims_pth, dpi=40) - plt.close() - - check_same_image(dims_pth, comp_pth, tol=5) - - -def test_embedding_colorbar_location(image_comparer): - save_and_compare_images = partial(image_comparer, ROOT, tol=15) - - adata = pbmc3k_processed().raw.to_adata() - - sc.pl.pca(adata, color="LDHB", colorbar_loc=None) - - save_and_compare_images("no_colorbar") - - -# Spatial specific - - -def test_visium_circles(image_comparer): # standard visium data - save_and_compare_images = partial(image_comparer, ROOT, tol=15) - - adata = sc.read_visium(HERE / "_data" / "visium_data" / "1.0.0") - adata.obs = adata.obs.astype({"array_row": "str"}) - - sc.pl.spatial( - adata, - color="array_row", - groups=["24", "33"], - crop_coord=(100, 400, 400, 100), - alpha=0.5, - size=1.3, - show=False, - ) - - save_and_compare_images("spatial_visium") - - -def test_visium_default(image_comparer): # default values - from packaging.version import parse as parse_version - - if parse_version(mpl.__version__) < parse_version("3.7.0"): - pytest.xfail("Matplotlib 3.7.0+ required for this test") - - save_and_compare_images = partial(image_comparer, ROOT, tol=5) - - adata = sc.read_visium(HERE / "_data" / "visium_data" / "1.0.0") - adata.obs = adata.obs.astype({"array_row": "str"}) - - # Points default to transparent if an image is included - sc.pl.spatial(adata, show=False) - - save_and_compare_images("spatial_visium_default") - - -def test_visium_empty_img_key(image_comparer): # visium coordinates but image empty - save_and_compare_images = partial(image_comparer, ROOT, tol=15) - - adata = sc.read_visium(HERE / "_data" / "visium_data" / "1.0.0") - adata.obs = adata.obs.astype({"array_row": "str"}) - - sc.pl.spatial(adata, img_key=None, color="array_row", show=False) - - save_and_compare_images("spatial_visium_empty_image") - - sc.pl.embedding(adata, basis="spatial", color="array_row", show=False) - save_and_compare_images("spatial_visium_embedding") - - -def test_spatial_general(image_comparer): # general coordinates - save_and_compare_images = partial(image_comparer, ROOT, tol=15) - - adata = sc.read_visium(HERE / "_data" / "visium_data" / "1.0.0") - adata.obs = adata.obs.astype({"array_row": "str"}) - spatial_metadata = adata.uns.pop( - "spatial" - ) # spatial data don't have imgs, so remove entry from uns - # Required argument for now - spot_size = list(spatial_metadata.values())[0]["scalefactors"][ - "spot_diameter_fullres" - ] - - sc.pl.spatial(adata, show=False, spot_size=spot_size) - save_and_compare_images("spatial_general_nocol") - - # category - sc.pl.spatial(adata, show=False, spot_size=spot_size, color="array_row") - save_and_compare_images("spatial_general_cat") - - # continuous - sc.pl.spatial(adata, show=False, spot_size=spot_size, color="array_col") - save_and_compare_images("spatial_general_cont") - - -def test_spatial_external_img(image_comparer): # external image - save_and_compare_images = partial(image_comparer, ROOT, tol=15) - - adata = sc.read_visium(HERE / "_data" / "visium_data" / "1.0.0") - adata.obs = adata.obs.astype({"array_row": "str"}) - - img = adata.uns["spatial"]["custom"]["images"]["hires"] - scalef = adata.uns["spatial"]["custom"]["scalefactors"]["tissue_hires_scalef"] - sc.pl.spatial( - adata, - color="array_row", - scale_factor=scalef, - img=img, - basis="spatial", - show=False, - ) - save_and_compare_images("spatial_external_img") - - -@pytest.fixture(scope="module") -def equivalent_spatial_plotters(adata): - no_spatial = adata.copy() - del no_spatial.uns["spatial"] - - img_key = "hires" - library_id = list(adata.uns["spatial"])[0] - spatial_data = adata.uns["spatial"][library_id] - img = spatial_data["images"][img_key] - scale_factor = spatial_data["scalefactors"][f"tissue_{img_key}_scalef"] - spot_size = spatial_data["scalefactors"]["spot_diameter_fullres"] - - orig_plotter = partial(sc.pl.spatial, adata, color="1", show=False) - removed_plotter = partial( - sc.pl.spatial, - no_spatial, - color="1", - img=img, - scale_factor=scale_factor, - spot_size=spot_size, - show=False, - ) - - return (orig_plotter, removed_plotter) - - -@pytest.fixture(scope="module") -def equivalent_spatial_plotters_no_img(equivalent_spatial_plotters): - orig, removed = equivalent_spatial_plotters - return (partial(orig, img_key=None), partial(removed, img=None, scale_factor=None)) - - -@pytest.fixture( - params=[ - pytest.param({"crop_coord": (50, 200, 0, 500)}, id="crop"), - pytest.param({"size": 0.5}, id="size:.5"), - pytest.param({"size": 2}, id="size:2"), - pytest.param({"spot_size": 5}, id="spotsize"), - pytest.param({"bw": True}, id="bw"), - # Shape of the image for particular fixture, should not be hardcoded like this - pytest.param({"img": np.ones((774, 1755, 4)), "scale_factor": 1.0}, id="img"), - pytest.param( - {"na_color": (0, 0, 0, 0), "color": "1_missing"}, id="na_color.transparent" - ), - pytest.param( - {"na_color": "lightgray", "color": "1_missing"}, id="na_color.lightgray" - ), - ] -) -def spatial_kwargs(request): - return request.param - - -def test_manual_equivalency(equivalent_spatial_plotters, tmpdir, spatial_kwargs): - """ - Tests that manually passing values to sc.pl.spatial is similar to automatic extraction. - """ - orig, removed = equivalent_spatial_plotters - - TESTDIR = Path(tmpdir) - orig_pth = TESTDIR / "orig.png" - removed_pth = TESTDIR / "removed.png" - - orig(**spatial_kwargs) - plt.savefig(orig_pth, dpi=40) - plt.close() - removed(**spatial_kwargs) - plt.savefig(removed_pth, dpi=40) - plt.close() - - check_images(orig_pth, removed_pth, tol=1) - - -def test_manual_equivalency_no_img( - equivalent_spatial_plotters_no_img, tmpdir, spatial_kwargs -): - if "bw" in spatial_kwargs: - # Has no meaning when there is no image - pytest.skip() - orig, removed = equivalent_spatial_plotters_no_img - - TESTDIR = Path(tmpdir) - orig_pth = TESTDIR / "orig.png" - removed_pth = TESTDIR / "removed.png" - - orig(**spatial_kwargs) - plt.savefig(orig_pth, dpi=40) - plt.close() - removed(**spatial_kwargs) - plt.savefig(removed_pth, dpi=40) - plt.close() - - check_images(orig_pth, removed_pth, tol=1) - - -def test_white_background_vs_no_img(adata, tmpdir, spatial_kwargs): - if {"bw", "img", "img_key", "na_color"}.intersection(spatial_kwargs): - # These arguments don't make sense for this check - pytest.skip() - - white_background = np.ones_like( - adata.uns["spatial"]["scanpy_img"]["images"]["hires"] - ) - TESTDIR = Path(tmpdir) - white_pth = TESTDIR / "white_background.png" - noimg_pth = TESTDIR / "no_img.png" - - sc.pl.spatial( - adata, - color="2", - img=white_background, - scale_factor=1.0, - show=False, - **spatial_kwargs, - ) - plt.savefig(white_pth) - sc.pl.spatial(adata, color="2", img_key=None, show=False, **spatial_kwargs) - plt.savefig(noimg_pth) - - check_images(white_pth, noimg_pth, tol=1) - - -def test_spatial_na_color(adata, tmpdir): - """ - Check that na_color defaults to transparent when an image is present, light gray when not. - """ - white_background = np.ones_like( - adata.uns["spatial"]["scanpy_img"]["images"]["hires"] - ) - TESTDIR = Path(tmpdir) - lightgray_pth = TESTDIR / "lightgray.png" - transparent_pth = TESTDIR / "transparent.png" - noimg_pth = TESTDIR / "noimg.png" - whiteimg_pth = TESTDIR / "whiteimg.png" - - def plot(pth, **kwargs): - sc.pl.spatial(adata, color="1_missing", show=False, **kwargs) - plt.savefig(pth, dpi=40) - plt.close() - - plot(lightgray_pth, na_color="lightgray", img_key=None) - plot(transparent_pth, na_color=(0.0, 0.0, 0.0, 0.0), img_key=None) - plot(noimg_pth, img_key=None) - plot(whiteimg_pth, img=white_background, scale_factor=1.0) - - check_images(lightgray_pth, noimg_pth, tol=1) - check_images(transparent_pth, whiteimg_pth, tol=1) - with pytest.raises(AssertionError): - check_images(lightgray_pth, transparent_pth, tol=1) diff --git a/tests/test_filter_rank_genes_groups.py b/tests/test_filter_rank_genes_groups.py index 26851bb102..a64ac983f3 100644 --- a/tests/test_filter_rank_genes_groups.py +++ b/tests/test_filter_rank_genes_groups.py @@ -1,159 +1,96 @@ from __future__ import annotations import numpy as np +import pytest from scanpy.tools import filter_rank_genes_groups, rank_genes_groups from testing.scanpy._helpers.data import pbmc68k_reduced -names_no_reference = np.array( +NAMES_NO_REF = [ + ["CD3D", "ITM2A", "CD3D", "CCL5", "CD7", "nan", "CD79A", "nan", "NKG7", "LYZ"], + ["CD3E", "CD3D", "nan", "NKG7", "CD3D", "AIF1", "CD79B", "nan", "GNLY", "CST3"], + ["IL32", "RPL39", "nan", "CST7", "nan", "nan", "nan", "SNHG7", "CD7", "nan"], + ["nan", "SRSF7", "IL32", "GZMA", "nan", "LST1", "IGJ", "nan", "CTSW", "nan"], + ["nan", "nan", "CD2", "CTSW", "CD8B", "TYROBP", "ISG20", "SNHG8", "GZMB", "nan"], +] + +NAMES_REF = [ + ["CD3D", "ITM2A", "CD3D", "nan", "CD3D", "nan", "CD79A", "nan", "CD7"], + ["nan", "nan", "nan", "CD3D", "nan", "AIF1", "nan", "nan", "NKG7"], + ["nan", "nan", "nan", "NKG7", "nan", "FCGR3A", "ISG20", "SNHG7", "CTSW"], + ["nan", "CD3D", "nan", "CCL5", "CD7", "nan", "CD79B", "nan", "GNLY"], + ["CD3E", "IL32", "nan", "IL32", "CD27", "FCER1G", "nan", "nan", "nan"], +] + +NAMES_NO_REF_COMPARE_ABS = [ [ - ["CD3D", "ITM2A", "CD3D", "CCL5", "CD7", "nan", "CD79A", "nan", "NKG7", "LYZ"], - ["CD3E", "CD3D", "nan", "NKG7", "CD3D", "AIF1", "CD79B", "nan", "GNLY", "CST3"], - ["IL32", "RPL39", "nan", "CST7", "nan", "nan", "nan", "SNHG7", "CD7", "nan"], - ["nan", "SRSF7", "IL32", "GZMA", "nan", "LST1", "IGJ", "nan", "CTSW", "nan"], - [ - "nan", - "nan", - "CD2", - "CTSW", - "CD8B", - "TYROBP", - "ISG20", - "SNHG8", - "GZMB", - "nan", - ], - ] -) - -names_reference = np.array( + *("CD3D", "ITM2A", "HLA-DRB1", "CCL5", "HLA-DPA1"), + *("nan", "CD79A", "nan", "NKG7", "LYZ"), + ], [ - ["CD3D", "ITM2A", "CD3D", "nan", "CD3D", "nan", "CD79A", "nan", "CD7"], - ["nan", "nan", "nan", "CD3D", "nan", "AIF1", "nan", "nan", "NKG7"], - ["nan", "nan", "nan", "NKG7", "nan", "FCGR3A", "ISG20", "SNHG7", "CTSW"], - ["nan", "CD3D", "nan", "CCL5", "CD7", "nan", "CD79B", "nan", "GNLY"], - ["CD3E", "IL32", "nan", "IL32", "CD27", "FCER1G", "nan", "nan", "nan"], - ] -) - -names_compare_abs = np.array( + *("HLA-DPA1", "nan", "CD3D", "NKG7", "HLA-DRB1"), + *("AIF1", "CD79B", "nan", "GNLY", "CST3"), + ], [ - [ - "CD3D", - "ITM2A", - "HLA-DRB1", - "CCL5", - "HLA-DPA1", - "nan", - "CD79A", - "nan", - "NKG7", - "LYZ", - ], - [ - "HLA-DPA1", - "nan", - "CD3D", - "NKG7", - "HLA-DRB1", - "AIF1", - "CD79B", - "nan", - "GNLY", - "CST3", - ], - [ - "nan", - "PSAP", - "CD74", - "CST7", - "CD74", - "PSAP", - "FCER1G", - "SNHG7", - "CD7", - "HLA-DRA", - ], - [ - "IL32", - "nan", - "HLA-DRB5", - "GZMA", - "HLA-DRB5", - "LST1", - "nan", - "nan", - "CTSW", - "HLA-DRB1", - ], - [ - "nan", - "FCER1G", - "HLA-DPB1", - "CTSW", - "HLA-DPB1", - "TYROBP", - "TYROBP", - "S100A10", - "GZMB", - "HLA-DPA1", - ], - ] -) - - -def test_filter_rank_genes_groups(): - adata = pbmc68k_reduced() - - # fix filter defaults - args = { - "adata": adata, - "key_added": "rank_genes_groups_filtered", - "min_in_group_fraction": 0.25, - "min_fold_change": 1, - "max_out_group_fraction": 0.5, - } - - rank_genes_groups( - adata, "bulk_labels", reference="Dendritic", method="wilcoxon", n_genes=5 - ) - filter_rank_genes_groups(**args) - - assert np.array_equal( - names_reference, - np.array(adata.uns["rank_genes_groups_filtered"]["names"].tolist()), - ) + *("nan", "PSAP", "CD74", "CST7", "CD74"), + *("PSAP", "FCER1G", "SNHG7", "CD7", "HLA-DRA"), + ], + [ + *("IL32", "nan", "HLA-DRB5", "GZMA", "HLA-DRB5"), + *("LST1", "nan", "nan", "CTSW", "HLA-DRB1"), + ], + [ + *("nan", "FCER1G", "HLA-DPB1", "CTSW", "HLA-DPB1"), + *("TYROBP", "TYROBP", "S100A10", "GZMB", "HLA-DPA1"), + ], +] - rank_genes_groups(adata, "bulk_labels", method="wilcoxon", n_genes=5) - filter_rank_genes_groups(**args) - assert np.array_equal( - names_no_reference, - np.array(adata.uns["rank_genes_groups_filtered"]["names"].tolist()), - ) +EXPECTED = { + ("Dendritic", False): np.array(NAMES_REF), + ("rest", False): np.array(NAMES_NO_REF), + ("rest", True): np.array(NAMES_NO_REF_COMPARE_ABS), +} - rank_genes_groups(adata, "bulk_labels", method="wilcoxon", pts=True, n_genes=5) - filter_rank_genes_groups(**args) - assert np.array_equal( - names_no_reference, - np.array(adata.uns["rank_genes_groups_filtered"]["names"].tolist()), - ) +@pytest.mark.parametrize( + ("reference", "pts", "abs"), + [ + pytest.param("Dendritic", False, False, id="ref-no_pts-no_abs"), + pytest.param("rest", False, False, id="rest-no_pts-no_abs"), + pytest.param("rest", True, False, id="rest-pts-no_abs"), + pytest.param("rest", True, True, id="rest-pts-abs"), + ], +) +def test_filter_rank_genes_groups(reference, pts, abs): + adata = pbmc68k_reduced() - # test compare_abs rank_genes_groups( - adata, "bulk_labels", method="wilcoxon", pts=True, rankby_abs=True, n_genes=5 - ) - - filter_rank_genes_groups( adata, - compare_abs=True, - min_in_group_fraction=-1, - max_out_group_fraction=1, - min_fold_change=3.1, + "bulk_labels", + reference=reference, + pts=pts, + method="wilcoxon", + rankby_abs=abs, + n_genes=5, ) + if abs: + filter_rank_genes_groups( + adata, + compare_abs=True, + min_in_group_fraction=-1, + max_out_group_fraction=1, + min_fold_change=3.1, + ) + else: + filter_rank_genes_groups( + adata, + min_in_group_fraction=0.25, + min_fold_change=1, + max_out_group_fraction=0.5, + ) assert np.array_equal( - names_compare_abs, + EXPECTED[reference, abs], np.array(adata.uns["rank_genes_groups_filtered"]["names"].tolist()), ) diff --git a/tests/test_logging.py b/tests/test_logging.py index 3f8a3ee97d..81b4acbf38 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -142,6 +142,8 @@ def test_call_outputs(func): """ output_io = StringIO() with redirect_stdout(output_io): - func() + out = func() + if out is not None: + print(out) output = output_io.getvalue() assert output != "" diff --git a/tests/test_package_structure.py b/tests/test_package_structure.py index 834c06d8b4..3541c561a5 100644 --- a/tests/test_package_structure.py +++ b/tests/test_package_structure.py @@ -138,6 +138,7 @@ class ExpectedSig(TypedDict): copy_sigs["sc.pp.filter_cells"] = None # unclear `inplace` situation copy_sigs["sc.pp.filter_genes"] = None # unclear `inplace` situation copy_sigs["sc.pp.subsample"] = None # returns indices along matrix +copy_sigs["sc.pp.sample"] = None # returns indices along matrix # partial exceptions: “data” instead of “adata” copy_sigs["sc.pp.log1p"]["first_name"] = "data" copy_sigs["sc.pp.normalize_per_cell"]["first_name"] = "data" diff --git a/tests/test_plotting.py b/tests/test_plotting.py index 2f0f5f60cd..f135a68aa4 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -1456,11 +1456,10 @@ def test_rankings(image_comparer): # TODO: Make more generic -def test_scatter_rep(tmpdir): +def test_scatter_rep(tmp_path): """ Test to make sure I can predict when scatter reps should be the same """ - TESTDIR = Path(tmpdir) rep_args = { "raw": {"use_raw": True}, "layer": {"layer": "layer", "use_raw": False}, @@ -1475,7 +1474,7 @@ def test_scatter_rep(tmpdir): columns=["rep", "gene", "result"], ) states["outpth"] = [ - TESTDIR / f"{state.gene}_{state.rep}_{state.result}.png" + tmp_path / f"{state.gene}_{state.rep}_{state.result}.png" for state in states.itertuples() ] pattern = np.array(list(chain.from_iterable(repeat(i, 5) for i in range(3)))) diff --git a/tests/test_plotting_embedded/conftest.py b/tests/test_plotting_embedded/conftest.py new file mode 100644 index 0000000000..d9e8ff8581 --- /dev/null +++ b/tests/test_plotting_embedded/conftest.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import pytest + +import scanpy as sc + +HERE: Path = Path(__file__).parent + + +@pytest.fixture(scope="module") +def adata(): + """A bit cute.""" + from matplotlib.image import imread + from sklearn.cluster import DBSCAN + from sklearn.datasets import make_blobs + + empty_pixel = np.array([1.0, 1.0, 1.0, 0]).reshape(1, 1, -1) + image = imread(HERE.parent.parent / "docs/_static/img/Scanpy_Logo_RGB.png") + x, y = np.where(np.logical_and.reduce(~np.equal(image, empty_pixel), axis=2)) + + # Just using to calculate the hex coords + hexes = plt.hexbin(x, y, gridsize=(44, 100)) + counts = hexes.get_array() + pixels = hexes.get_offsets()[counts != 0] + plt.close() + + labels = DBSCAN(eps=20, min_samples=2).fit(pixels).labels_ + order = np.argsort(labels) + adata = sc.AnnData( + make_blobs( + pd.Series(labels[order]).value_counts().values, + n_features=20, + shuffle=False, + random_state=42, + )[0], + obs={"label": pd.Categorical(labels[order].astype(str))}, + obsm={"spatial": pixels[order, ::-1]}, + uns={ + "spatial": { + "scanpy_img": { + "images": {"hires": image}, + "scalefactors": { + "tissue_hires_scalef": 1, + "spot_diameter_fullres": 10, + }, + } + } + }, + ) + sc.pp.pca(adata) + + # Adding some missing values + adata.obs["label_missing"] = adata.obs["label"].copy() + adata.obs.loc[::2, "label_missing"] = np.nan + + adata.obs["1_missing"] = adata.obs_vector("1") + adata.obs.loc[ + adata.obsm["spatial"][:, 0] < adata.obsm["spatial"][:, 0].mean(), "1_missing" + ] = np.nan + + return adata diff --git a/tests/test_plotting_embedded/test_embeddings.py b/tests/test_plotting_embedded/test_embeddings.py new file mode 100644 index 0000000000..c5dc8d3e53 --- /dev/null +++ b/tests/test_plotting_embedded/test_embeddings.py @@ -0,0 +1,253 @@ +from __future__ import annotations + +from functools import partial, wraps +from pathlib import Path +from typing import TYPE_CHECKING + +import matplotlib.pyplot as plt +import numpy as np +import pytest +import seaborn as sns +from matplotlib.colors import Normalize +from matplotlib.testing.compare import compare_images + +import scanpy as sc +from testing.scanpy._helpers.data import pbmc3k_processed + +if TYPE_CHECKING: + from scanpy.plotting._utils import _LegendLoc + + +HERE: Path = Path(__file__).parent +ROOT = HERE.parent / "_images" + +MISSING_VALUES_ROOT = ROOT / "embedding-missing-values" + + +def check_images(pth1: Path, pth2: Path, *, tol: int) -> None: + result = compare_images(str(pth1), str(pth2), tol=tol) + assert result is None, result + + +@pytest.fixture( + params=[(0, 0, 0, 1), None], + ids=["na_color.black_tup", "na_color.default"], +) +def na_color(request): + return request.param + + +@pytest.fixture(params=[True, False], ids=["na_in_legend.True", "na_in_legend.False"]) +def na_in_legend(request): + return request.param + + +@pytest.fixture(params=[sc.pl.pca, sc.pl.spatial]) +def plotfunc(request): + if request.param is sc.pl.spatial: + + @wraps(request.param) + def f(adata, **kwargs): + with pytest.warns(FutureWarning, match=r"Use `squidpy.*` instead"): + return sc.pl.spatial(adata, **kwargs) + + else: + f = request.param + return partial(f, show=False) + + +@pytest.fixture( + params=["on data", "right margin", "lower center", None], + ids=["legend.on_data", "legend.on_right", "legend.on_bottom", "legend.off"], +) +def legend_loc(request) -> _LegendLoc | None: + return request.param + + +@pytest.fixture( + params=[lambda x: list(x.cat.categories[:3]), lambda x: []], + ids=["groups.3", "groups.all"], +) +def groupsfunc(request): + return request.param + + +@pytest.fixture( + params=[ + pytest.param( + {"vmin": None, "vmax": None, "vcenter": None, "norm": None}, + id="vbounds.default", + ), + pytest.param( + {"vmin": 0, "vmax": 5, "vcenter": None, "norm": None}, id="vbounds.numbers" + ), + pytest.param( + {"vmin": "p15", "vmax": "p90", "vcenter": None, "norm": None}, + id="vbounds.percentile", + ), + pytest.param( + {"vmin": 0, "vmax": "p99", "vcenter": 0.1, "norm": None}, + id="vbounds.vcenter", + ), + pytest.param( + {"vmin": None, "vmax": None, "vcenter": None, "norm": Normalize(0, 5)}, + id="vbounds.norm", + ), + ] +) +def vbounds(request): + return request.param + + +def test_missing_values_categorical( + *, + request: pytest.FixtureRequest, + image_comparer, + adata, + plotfunc, + na_color, + na_in_legend, + legend_loc, + groupsfunc, +): + save_and_compare_images = partial(image_comparer, MISSING_VALUES_ROOT, tol=15) + + base_name = request.node.name + + # Passing through a dict so it's easier to use default values + kwargs = {} + kwargs["legend_loc"] = legend_loc + kwargs["groups"] = groupsfunc(adata.obs["label"]) + if na_color is not None: + kwargs["na_color"] = na_color + kwargs["na_in_legend"] = na_in_legend + + plotfunc(adata, color=["label", "label_missing"], **kwargs) + + save_and_compare_images(base_name) + + +def test_missing_values_continuous( + *, + request: pytest.FixtureRequest, + image_comparer, + adata, + plotfunc, + na_color, + vbounds, +): + save_and_compare_images = partial(image_comparer, MISSING_VALUES_ROOT, tol=15) + + base_name = request.node.name + + # Passing through a dict so it's easier to use default values + kwargs = {} + kwargs.update(vbounds) + if na_color is not None: + kwargs["na_color"] = na_color + + plotfunc(adata, color=["1", "1_missing"], **kwargs) + + save_and_compare_images(base_name) + + +def test_enumerated_palettes(request, adata, tmp_path, plotfunc): + base_name = request.node.name + + categories = adata.obs["label"].cat.categories + colors_rgb = dict(zip(categories, sns.color_palette(n_colors=12))) + + dict_pth = tmp_path / f"rgbdict_{base_name}.png" + list_pth = tmp_path / f"rgblist_{base_name}.png" + + # making a copy so colors aren't saved + plotfunc(adata.copy(), color="label", palette=colors_rgb) + plt.savefig(dict_pth, dpi=40) + plt.close() + plotfunc(adata.copy(), color="label", palette=[colors_rgb[c] for c in categories]) + plt.savefig(list_pth, dpi=40) + plt.close() + + check_images(dict_pth, list_pth, tol=15) + + +def test_dimension_broadcasting(adata, tmp_path, check_same_image): + with pytest.raises( + ValueError, + match=r"Could not broadcast together arguments with shapes: \[2, 3, 1\]", + ): + sc.pl.pca( + adata, color=["label", "1_missing"], dimensions=[(0, 1), (1, 2), (2, 3)] + ) + + dims_pth = tmp_path / "broadcast_dims.png" + color_pth = tmp_path / "broadcast_colors.png" + + sc.pl.pca(adata, color=["label", "label", "label"], dimensions=(2, 3), show=False) + plt.savefig(dims_pth, dpi=40) + plt.close() + sc.pl.pca(adata, color="label", dimensions=[(2, 3), (2, 3), (2, 3)], show=False) + plt.savefig(color_pth, dpi=40) + plt.close() + + check_same_image(dims_pth, color_pth, tol=5) + + +def test_marker_broadcasting(adata, tmp_path, check_same_image): + with pytest.raises( + ValueError, + match=r"Could not broadcast together arguments with shapes: \[2, 1, 3\]", + ): + sc.pl.pca(adata, color=["label", "1_missing"], marker=[".", "^", "x"]) + + dims_pth = tmp_path / "broadcast_markers.png" + color_pth = tmp_path / "broadcast_colors_for_markers.png" + + sc.pl.pca(adata, color=["label", "label", "label"], marker="^", show=False) + plt.savefig(dims_pth, dpi=40) + plt.close() + sc.pl.pca(adata, color="label", marker=["^", "^", "^"], show=False) + plt.savefig(color_pth, dpi=40) + plt.close() + + check_same_image(dims_pth, color_pth, tol=5) + + +def test_dimensions_same_as_components(adata, tmp_path, check_same_image): + adata = adata.copy() + adata.obs["mean"] = np.ravel(adata.X.mean(axis=1)) + + comp_pth = tmp_path / "components_plot.png" + dims_pth = tmp_path / "dimension_plot.png" + + # TODO: Deprecate components kwarg + # with pytest.warns(FutureWarning, match=r"components .* deprecated"): + sc.pl.pca( + adata, + color=["mean", "label"], + components=["1,2", "2,3"], + show=False, + ) + plt.savefig(comp_pth, dpi=40) + plt.close() + + sc.pl.pca( + adata, + color=["mean", "mean", "label", "label"], + dimensions=[(0, 1), (1, 2), (0, 1), (1, 2)], + show=False, + ) + plt.savefig(dims_pth, dpi=40) + plt.close() + + check_same_image(dims_pth, comp_pth, tol=5) + + +def test_embedding_colorbar_location(image_comparer): + save_and_compare_images = partial(image_comparer, ROOT, tol=15) + + adata = pbmc3k_processed().raw.to_adata() + + sc.pl.pca(adata, color="LDHB", colorbar_loc=None) + + save_and_compare_images("no_colorbar") diff --git a/tests/test_plotting_embedded/test_spatial.py b/tests/test_plotting_embedded/test_spatial.py new file mode 100644 index 0000000000..873db68794 --- /dev/null +++ b/tests/test_plotting_embedded/test_spatial.py @@ -0,0 +1,267 @@ +from __future__ import annotations + +from functools import partial +from pathlib import Path + +import matplotlib as mpl +import matplotlib.pyplot as plt +import numpy as np +import pytest +from matplotlib.testing.compare import compare_images + +import scanpy as sc + +HERE: Path = Path(__file__).parent +ROOT = HERE.parent / "_images" +DATA_DIR = HERE.parent / "_data" + + +pytestmark = [ + pytest.mark.filterwarnings("ignore:Use `squidpy.*` instead:FutureWarning") +] + + +def check_images(pth1: Path, pth2: Path, *, tol: int) -> None: + result = compare_images(str(pth1), str(pth2), tol=tol) + assert result is None, result + + +def test_visium_circles(image_comparer): # standard visium data + save_and_compare_images = partial(image_comparer, ROOT, tol=15) + + adata = sc.read_visium(DATA_DIR / "visium_data" / "1.0.0") + adata.obs = adata.obs.astype({"array_row": "str"}) + + sc.pl.spatial( + adata, + color="array_row", + groups=["24", "33"], + crop_coord=(100, 400, 400, 100), + alpha=0.5, + size=1.3, + show=False, + ) + + save_and_compare_images("spatial_visium") + + +def test_visium_default(image_comparer): # default values + from packaging.version import parse as parse_version + + if parse_version(mpl.__version__) < parse_version("3.7.0"): + pytest.xfail("Matplotlib 3.7.0+ required for this test") + + save_and_compare_images = partial(image_comparer, ROOT, tol=5) + + adata = sc.read_visium(DATA_DIR / "visium_data" / "1.0.0") + adata.obs = adata.obs.astype({"array_row": "str"}) + + # Points default to transparent if an image is included + sc.pl.spatial(adata, show=False) + + save_and_compare_images("spatial_visium_default") + + +def test_visium_empty_img_key(image_comparer): # visium coordinates but image empty + save_and_compare_images = partial(image_comparer, ROOT, tol=15) + + adata = sc.read_visium(DATA_DIR / "visium_data" / "1.0.0") + adata.obs = adata.obs.astype({"array_row": "str"}) + + sc.pl.spatial(adata, img_key=None, color="array_row", show=False) + + save_and_compare_images("spatial_visium_empty_image") + + sc.pl.embedding(adata, basis="spatial", color="array_row", show=False) + save_and_compare_images("spatial_visium_embedding") + + +def test_spatial_general(image_comparer): # general coordinates + save_and_compare_images = partial(image_comparer, ROOT, tol=15) + + adata = sc.read_visium(DATA_DIR / "visium_data" / "1.0.0") + adata.obs = adata.obs.astype({"array_row": "str"}) + spatial_metadata = adata.uns.pop( + "spatial" + ) # spatial data don't have imgs, so remove entry from uns + # Required argument for now + spot_size = list(spatial_metadata.values())[0]["scalefactors"][ + "spot_diameter_fullres" + ] + + sc.pl.spatial(adata, show=False, spot_size=spot_size) + save_and_compare_images("spatial_general_nocol") + + # category + sc.pl.spatial(adata, show=False, spot_size=spot_size, color="array_row") + save_and_compare_images("spatial_general_cat") + + # continuous + sc.pl.spatial(adata, show=False, spot_size=spot_size, color="array_col") + save_and_compare_images("spatial_general_cont") + + +def test_spatial_external_img(image_comparer): # external image + save_and_compare_images = partial(image_comparer, ROOT, tol=15) + + adata = sc.read_visium(DATA_DIR / "visium_data" / "1.0.0") + adata.obs = adata.obs.astype({"array_row": "str"}) + + img = adata.uns["spatial"]["custom"]["images"]["hires"] + scalef = adata.uns["spatial"]["custom"]["scalefactors"]["tissue_hires_scalef"] + sc.pl.spatial( + adata, + color="array_row", + scale_factor=scalef, + img=img, + basis="spatial", + show=False, + ) + save_and_compare_images("spatial_external_img") + + +@pytest.fixture(scope="module") +def equivalent_spatial_plotters(adata): + no_spatial = adata.copy() + del no_spatial.uns["spatial"] + + img_key = "hires" + library_id = list(adata.uns["spatial"])[0] + spatial_data = adata.uns["spatial"][library_id] + img = spatial_data["images"][img_key] + scale_factor = spatial_data["scalefactors"][f"tissue_{img_key}_scalef"] + spot_size = spatial_data["scalefactors"]["spot_diameter_fullres"] + + orig_plotter = partial(sc.pl.spatial, adata, color="1", show=False) + removed_plotter = partial( + sc.pl.spatial, + no_spatial, + color="1", + img=img, + scale_factor=scale_factor, + spot_size=spot_size, + show=False, + ) + + return (orig_plotter, removed_plotter) + + +@pytest.fixture(scope="module") +def equivalent_spatial_plotters_no_img(equivalent_spatial_plotters): + orig, removed = equivalent_spatial_plotters + return (partial(orig, img_key=None), partial(removed, img=None, scale_factor=None)) + + +@pytest.fixture( + params=[ + pytest.param({"crop_coord": (50, 200, 0, 500)}, id="crop"), + pytest.param({"size": 0.5}, id="size:.5"), + pytest.param({"size": 2}, id="size:2"), + pytest.param({"spot_size": 5}, id="spotsize"), + pytest.param({"bw": True}, id="bw"), + # Shape of the image for particular fixture, should not be hardcoded like this + pytest.param({"img": np.ones((774, 1755, 4)), "scale_factor": 1.0}, id="img"), + pytest.param( + {"na_color": (0, 0, 0, 0), "color": "1_missing"}, id="na_color.transparent" + ), + pytest.param( + {"na_color": "lightgray", "color": "1_missing"}, id="na_color.lightgray" + ), + ] +) +def spatial_kwargs(request): + return request.param + + +def test_manual_equivalency(equivalent_spatial_plotters, tmp_path, spatial_kwargs): + """ + Tests that manually passing values to sc.pl.spatial is similar to automatic extraction. + """ + orig, removed = equivalent_spatial_plotters + + orig_pth = tmp_path / "orig.png" + removed_pth = tmp_path / "removed.png" + + orig(**spatial_kwargs) + plt.savefig(orig_pth, dpi=40) + plt.close() + removed(**spatial_kwargs) + plt.savefig(removed_pth, dpi=40) + plt.close() + + check_images(orig_pth, removed_pth, tol=1) + + +def test_manual_equivalency_no_img( + equivalent_spatial_plotters_no_img, tmp_path, spatial_kwargs +): + if "bw" in spatial_kwargs: + # Has no meaning when there is no image + pytest.skip() + orig, removed = equivalent_spatial_plotters_no_img + + orig_pth = tmp_path / "orig.png" + removed_pth = tmp_path / "removed.png" + + orig(**spatial_kwargs) + plt.savefig(orig_pth, dpi=40) + plt.close() + removed(**spatial_kwargs) + plt.savefig(removed_pth, dpi=40) + plt.close() + + check_images(orig_pth, removed_pth, tol=1) + + +def test_white_background_vs_no_img(adata, tmp_path, spatial_kwargs): + if {"bw", "img", "img_key", "na_color"}.intersection(spatial_kwargs): + # These arguments don't make sense for this check + pytest.skip() + + white_background = np.ones_like( + adata.uns["spatial"]["scanpy_img"]["images"]["hires"] + ) + white_pth = tmp_path / "white_background.png" + noimg_pth = tmp_path / "no_img.png" + + sc.pl.spatial( + adata, + color="2", + img=white_background, + scale_factor=1.0, + show=False, + **spatial_kwargs, + ) + plt.savefig(white_pth) + sc.pl.spatial(adata, color="2", img_key=None, show=False, **spatial_kwargs) + plt.savefig(noimg_pth) + + check_images(white_pth, noimg_pth, tol=1) + + +def test_spatial_na_color(adata, tmp_path): + """ + Check that na_color defaults to transparent when an image is present, light gray when not. + """ + white_background = np.ones_like( + adata.uns["spatial"]["scanpy_img"]["images"]["hires"] + ) + lightgray_pth = tmp_path / "lightgray.png" + transparent_pth = tmp_path / "transparent.png" + noimg_pth = tmp_path / "noimg.png" + whiteimg_pth = tmp_path / "whiteimg.png" + + def plot(pth, **kwargs): + sc.pl.spatial(adata, color="1_missing", show=False, **kwargs) + plt.savefig(pth, dpi=40) + plt.close() + + plot(lightgray_pth, na_color="lightgray", img_key=None) + plot(transparent_pth, na_color=(0.0, 0.0, 0.0, 0.0), img_key=None) + plot(noimg_pth, img_key=None) + plot(whiteimg_pth, img=white_background, scale_factor=1.0) + + check_images(lightgray_pth, noimg_pth, tol=1) + check_images(transparent_pth, whiteimg_pth, tol=1) + with pytest.raises(AssertionError): + check_images(lightgray_pth, transparent_pth, tol=1) diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py index b8f5115b01..6282c5ccf4 100644 --- a/tests/test_preprocessing.py +++ b/tests/test_preprocessing.py @@ -1,7 +1,10 @@ from __future__ import annotations +import warnings +from importlib.util import find_spec from itertools import product from pathlib import Path +from typing import TYPE_CHECKING import numpy as np import pandas as pd @@ -22,6 +25,15 @@ from testing.scanpy._helpers.data import pbmc3k, pbmc68k_reduced from testing.scanpy._pytest.params import ARRAY_TYPES +if TYPE_CHECKING: + from collections.abc import Callable + from typing import Any, Literal + + from numpy.typing import NDArray + + CSMatrix = sp.csc_matrix | sp.csr_matrix + + HERE = Path(__file__).parent DATA_PATH = HERE / "_data" @@ -134,34 +146,159 @@ def test_normalize_per_cell(): assert adata.X.sum(axis=1).tolist() == adata_sparse.X.sum(axis=1).A1.tolist() -def test_subsample(): - adata = AnnData(np.ones((200, 10))) - sc.pp.subsample(adata, n_obs=40) - assert adata.n_obs == 40 - sc.pp.subsample(adata, fraction=0.1) - assert adata.n_obs == 4 +def _random_probs(n: int, frac_zero: float) -> NDArray[np.float64]: + """ + Generate a random probability distribution of `n` values between 0 and 1. + """ + probs = np.random.randint(0, 10000, n).astype(np.float64) + probs[probs < np.quantile(probs, frac_zero)] = 0 + probs /= probs.sum() + np.testing.assert_almost_equal(probs.sum(), 1) + return probs + + +@pytest.mark.parametrize("array_type", ARRAY_TYPES) +@pytest.mark.parametrize("which", ["copy", "inplace", "array"]) +@pytest.mark.parametrize( + ("axis", "f_or_n", "replace"), + [ + pytest.param(0, 40, False, id="obs-40-no_replace"), + pytest.param(0, 0.1, False, id="obs-0.1-no_replace"), + pytest.param(0, 201, True, id="obs-201-replace"), + pytest.param(0, 1, True, id="obs-1-replace"), + pytest.param(1, 10, False, id="var-10-no_replace"), + pytest.param(1, 11, True, id="var-11-replace"), + pytest.param(1, 2.0, True, id="var-2.0-replace"), + ], +) +@pytest.mark.parametrize( + "ps", + [ + dict(obs=None, var=None), + dict(obs=np.tile([True, False], 100), var=np.tile([True, False], 5)), + dict(obs=_random_probs(200, 0.3), var=_random_probs(10, 0.7)), + ], + ids=["all", "mask", "p"], +) +def test_sample( + *, + request: pytest.FixtureRequest, + array_type: Callable[[np.ndarray], np.ndarray | CSMatrix], + which: Literal["copy", "inplace", "array"], + axis: Literal[0, 1], + f_or_n: float | int, # noqa: PYI041 + replace: bool, + ps: dict[Literal["obs", "var"], NDArray[np.bool_] | None], +): + adata = AnnData(array_type(np.ones((200, 10)))) + p = ps["obs" if axis == 0 else "var"] + expected = int(adata.shape[axis] * f_or_n) if isinstance(f_or_n, float) else f_or_n + if p is not None and not replace and expected > (n_possible := (p != 0).sum()): + request.applymarker(pytest.xfail(f"Can’t draw {expected} out of {n_possible}")) + + # ignoring this warning declaratively is a pain so do it here + if find_spec("dask"): + import dask.array as da + + warnings.filterwarnings("ignore", category=da.PerformanceWarning) + # can’t guarantee that duplicates are drawn when `replace=True`, + # so we just ignore the warning instead using `with pytest.warns(...)` + warnings.filterwarnings( + "ignore" if replace else "error", r".*names are not unique", UserWarning + ) + rv = sc.pp.sample( + adata.X if which == "array" else adata, + f_or_n if isinstance(f_or_n, float) else None, + n=f_or_n if isinstance(f_or_n, int) else None, + replace=replace, + axis=axis, + # `copy` only effects AnnData inputs + copy=dict(copy=True, inplace=False, array=False)[which], + p=p, + ) + + match which: + case "copy": + subset = rv + assert rv is not adata + assert adata.shape == (200, 10) + case "inplace": + subset = adata + assert rv is None + case "array": + subset, indices = rv + assert len(indices) == expected + assert adata.shape == (200, 10) + case _: + pytest.fail(f"Unknown `{which=}`") + assert subset.shape == ((expected, 10) if axis == 0 else (200, expected)) -def test_subsample_copy(): + +@pytest.mark.parametrize( + ("args", "exc", "pattern"), + [ + pytest.param( + dict(), TypeError, r"Either `fraction` or `n` must be set", id="empty" + ), + pytest.param( + dict(n=10, fraction=0.2), + TypeError, + r"Providing both `fraction` and `n` is not allowed", + id="both", + ), + pytest.param( + dict(fraction=2), + ValueError, + r"If `replace=False`, `fraction=2` needs to be", + id="frac>1", + ), + pytest.param( + dict(fraction=-0.3), + ValueError, + r"`fraction=-0\.3` needs to be nonnegative", + id="frac<0", + ), + pytest.param( + dict(n=3, p=np.ones(200, dtype=np.int32)), + ValueError, + r"mask/probabilities array must be boolean or floating point", + id="type(p)", + ), + ], +) +def test_sample_error(args: dict[str, Any], exc: type[Exception], pattern: str): adata = AnnData(np.ones((200, 10))) - assert sc.pp.subsample(adata, n_obs=40, copy=True).shape == (40, 10) - assert sc.pp.subsample(adata, fraction=0.1, copy=True).shape == (20, 10) + with pytest.raises(exc, match=pattern): + sc.pp.sample(adata, **args) -def test_subsample_copy_backed(tmp_path): - A = np.random.rand(200, 10).astype(np.float32) - adata_m = AnnData(A.copy()) - adata_d = AnnData(A.copy()) - filename = tmp_path / "test.h5ad" - adata_d.filename = filename - # This should not throw an error - assert sc.pp.subsample(adata_d, n_obs=40, copy=True).shape == (40, 10) +def test_sample_backwards_compat(): + expected = np.array( + [26, 86, 2, 55, 75, 93, 16, 73, 54, 95, 53, 92, 78, 13, 7, 30, 22, 24, 33, 8] + ) + legacy_result, indices = sc.pp.subsample(np.arange(100), n_obs=20) + assert np.array_equal(indices, legacy_result), "arange choices should match indices" + assert np.array_equal(legacy_result, expected) + + +def test_sample_copy_backed(tmp_path): + adata_m = AnnData(np.random.rand(200, 10).astype(np.float32)) + adata_d = adata_m.copy() + adata_d.filename = tmp_path / "test.h5ad" + + assert sc.pp.sample(adata_d, n=40, copy=True).shape == (40, 10) np.testing.assert_array_equal( - sc.pp.subsample(adata_m, n_obs=40, copy=True).X, - sc.pp.subsample(adata_d, n_obs=40, copy=True).X, + sc.pp.sample(adata_m, n=40, copy=True, rng=0).X, + sc.pp.sample(adata_d, n=40, copy=True, rng=0).X, ) + + +def test_sample_copy_backed_error(tmp_path): + adata_d = AnnData(np.random.rand(200, 10).astype(np.float32)) + adata_d.filename = tmp_path / "test.h5ad" with pytest.raises(NotImplementedError): - sc.pp.subsample(adata_d, n_obs=40, copy=False) + sc.pp.sample(adata_d, n=40, copy=False) @pytest.mark.parametrize("array_type", ARRAY_TYPES) diff --git a/tests/test_preprocessing_distributed.py b/tests/test_preprocessing_distributed.py index a1b99121ef..afb120b982 100644 --- a/tests/test_preprocessing_distributed.py +++ b/tests/test_preprocessing_distributed.py @@ -40,13 +40,13 @@ def adata() -> AnnData: return a -@filter_oldformatwarning @pytest.fixture( params=[ pytest.param("direct", marks=[needs.zappy]), pytest.param("dask", marks=[needs.dask, pytest.mark.anndata_dask_support]), ] ) +@filter_oldformatwarning def adata_dist(request: pytest.FixtureRequest) -> AnnData: # regular anndata except for X, which we replace on the next line a = read_zarr(input_file) @@ -75,6 +75,7 @@ def test_log1p(adata: AnnData, adata_dist: AnnData): npt.assert_allclose(result, adata.X) +@pytest.mark.filterwarnings("ignore:Use sc.pp.normalize_total instead:FutureWarning") def test_normalize_per_cell( request: pytest.FixtureRequest, adata: AnnData, adata_dist: AnnData ): diff --git a/tests/test_read_10x.py b/tests/test_read_10x.py index 7b31f6bddf..301a156bec 100644 --- a/tests/test_read_10x.py +++ b/tests/test_read_10x.py @@ -143,6 +143,7 @@ def visium_pth(request, tmp_path) -> Path: pytest.fail("add branch for new visium version") +@pytest.mark.filterwarnings("ignore:Use `squidpy.*` instead:FutureWarning") def test_read_visium_counts(visium_pth): """Test checking that read_visium reads the right genome""" spec_genome_v3 = sc.read_visium(visium_pth, genome="GRCh38") diff --git a/tests/test_utils.py b/tests/test_utils.py index f8a38a5f9d..81369a6938 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -2,6 +2,7 @@ from operator import mul, truediv from types import ModuleType +from typing import TYPE_CHECKING import numpy as np import pytest @@ -9,7 +10,7 @@ from packaging.version import Version from scipy.sparse import csr_matrix, issparse -from scanpy._compat import DaskArray, pkg_version +from scanpy._compat import DaskArray, _legacy_numpy_gen, pkg_version from scanpy._utils import ( axis_mul_or_truediv, axis_sum, @@ -26,6 +27,9 @@ ARRAY_TYPES_SPARSE_DASK_UNSUPPORTED, ) +if TYPE_CHECKING: + from typing import Any + def test_descend_classes_and_funcs(): # create module hierarchy @@ -247,3 +251,39 @@ def test_is_constant_dask(request: pytest.FixtureRequest, axis, expected, block_ x = da.from_array(np.array(x_data), chunks=2).map_blocks(block_type) result = is_constant(x, axis=axis).compute() np.testing.assert_array_equal(expected, result) + + +@pytest.mark.parametrize("seed", [0, 1, 1256712675]) +@pytest.mark.parametrize("pass_seed", [True, False], ids=["pass_seed", "set_seed"]) +@pytest.mark.parametrize("func", ["choice"]) +def test_legacy_numpy_gen(*, seed: int, pass_seed: bool, func: str): + np.random.seed(seed) + state_before = np.random.get_state(legacy=False) + + arrs: dict[bool, np.ndarray] = {} + states_after: dict[bool, dict[str, Any]] = {} + for direct in [True, False]: + if not pass_seed: + np.random.seed(seed) + arrs[direct] = _mk_random(func, direct=direct, seed=seed if pass_seed else None) + states_after[direct] = np.random.get_state(legacy=False) + + np.testing.assert_array_equal(arrs[True], arrs[False]) + np.testing.assert_equal( + *states_after.values(), err_msg="both should affect global state the same" + ) + # they should affect the global state + with pytest.raises(AssertionError): + np.testing.assert_equal(states_after[True], state_before) + + +def _mk_random(func: str, *, direct: bool, seed: int | None) -> np.ndarray: + if direct and seed is not None: + np.random.seed(seed) + gen = np.random if direct else _legacy_numpy_gen(seed) + match func: + case "choice": + arr = np.arange(1000) + return gen.choice(arr, size=(100, 100)) + case _: + pytest.fail(f"Unknown {func=}")