Skip to content

Commit

Permalink
Add replace option to subsample and rename function to sample (#943)
Browse files Browse the repository at this point in the history
  • Loading branch information
gokceneraslan authored Dec 19, 2024
1 parent 7e3dd15 commit 86d656d
Show file tree
Hide file tree
Showing 11 changed files with 391 additions and 74 deletions.
1 change: 1 addition & 0 deletions docs/api/deprecated.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@
pp.filter_genes_dispersion
pp.normalize_per_cell
pp.subsample
```
2 changes: 1 addition & 1 deletion docs/api/preprocessing.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ For visual quality control, see {func}`~scanpy.pl.highest_expr_genes` and
pp.normalize_total
pp.regress_out
pp.scale
pp.subsample
pp.sample
pp.downsample_counts
```

Expand Down
1 change: 1 addition & 0 deletions docs/release-notes/943.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{func}`~scanpy.pp.sample` supports both upsampling and downsampling of observations and variables. {func}`~scanpy.pp.subsample` is now deprecated. {smaller}`G Eraslan` & {smaller}`P Angerer`
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ classifiers = [
]
dependencies = [
"anndata>=0.8",
"numpy>=1.23",
"numpy>=1.24",
"matplotlib>=3.6",
"pandas >=1.5",
"scipy>=1.8",
Expand All @@ -60,7 +60,7 @@ dependencies = [
"networkx>=2.7",
"natsort",
"joblib",
"numba>=0.56",
"numba>=0.57",
"umap-learn>=0.5,!=0.5.0",
"pynndescent>=0.5",
"packaging>=21.3",
Expand Down
41 changes: 40 additions & 1 deletion src/scanpy/_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import sys
import warnings
from dataclasses import dataclass, field
from functools import cache, partial, wraps
from functools import WRAPPER_ASSIGNMENTS, cache, partial, wraps
from importlib.util import find_spec
from pathlib import Path
from typing import TYPE_CHECKING, Literal, ParamSpec, TypeVar, cast, overload
Expand Down Expand Up @@ -224,3 +224,42 @@ def _numba_threading_layer() -> Layer:
f" ({available=}, {numba.config.THREADING_LAYER_PRIORITY=})"
)
raise ValueError(msg)


def _legacy_numpy_gen(
random_state: _LegacyRandom | None = None,
) -> np.random.Generator:
"""Return a random generator that behaves like the legacy one."""

if random_state is not None:
if isinstance(random_state, np.random.RandomState):
np.random.set_state(random_state.get_state(legacy=False))
return _FakeRandomGen(random_state)
np.random.seed(random_state)
return _FakeRandomGen(np.random.RandomState(np.random.get_bit_generator()))


class _FakeRandomGen(np.random.Generator):
_state: np.random.RandomState

def __init__(self, random_state: np.random.RandomState) -> None:
self._state = random_state

@classmethod
def _delegate(cls) -> None:
for name, meth in np.random.Generator.__dict__.items():
if name.startswith("_") or not callable(meth):
continue

def mk_wrapper(name: str):
# Old pytest versions try to run the doctests
@wraps(meth, assigned=set(WRAPPER_ASSIGNMENTS) - {"__doc__"})
def wrapper(self: _FakeRandomGen, *args, **kwargs):
return getattr(self._state, name)(*args, **kwargs)

return wrapper

setattr(cls, name, mk_wrapper(name))


_FakeRandomGen._delegate()
4 changes: 3 additions & 1 deletion src/scanpy/preprocessing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from ..neighbors import neighbors
from ._combat import combat
from ._deprecated.highly_variable_genes import filter_genes_dispersion
from ._deprecated.sampling import subsample
from ._highly_variable_genes import highly_variable_genes
from ._normalization import normalize_total
from ._pca import pca
Expand All @@ -17,8 +18,8 @@
log1p,
normalize_per_cell,
regress_out,
sample,
sqrt,
subsample,
)

__all__ = [
Expand All @@ -40,6 +41,7 @@
"log1p",
"normalize_per_cell",
"regress_out",
"sample",
"scale",
"sqrt",
"subsample",
Expand Down
60 changes: 60 additions & 0 deletions src/scanpy/preprocessing/_deprecated/sampling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from ..._compat import _legacy_numpy_gen, old_positionals
from .._simple import sample

if TYPE_CHECKING:
import numpy as np
from anndata import AnnData
from numpy.typing import NDArray
from scipy.sparse import csc_matrix, csr_matrix

from ..._compat import _LegacyRandom

CSMatrix = csr_matrix | csc_matrix


@old_positionals("n_obs", "random_state", "copy")
def subsample(
data: AnnData | np.ndarray | CSMatrix,
fraction: float | None = None,
*,
n_obs: int | None = None,
random_state: _LegacyRandom = 0,
copy: bool = False,
) -> AnnData | tuple[np.ndarray | CSMatrix, NDArray[np.int64]] | None:
"""\
Subsample to a fraction of the number of observations.
.. deprecated:: 1.11.0
Use :func:`~scanpy.pp.sample` instead.
Parameters
----------
data
The (annotated) data matrix of shape `n_obs` × `n_vars`.
Rows correspond to cells and columns to genes.
fraction
Subsample to this `fraction` of the number of observations.
n_obs
Subsample to this number of observations.
random_state
Random seed to change subsampling.
copy
If an :class:`~anndata.AnnData` is passed,
determines whether a copy is returned.
Returns
-------
Returns `X[obs_indices], obs_indices` if data is array-like, otherwise
subsamples the passed :class:`~anndata.AnnData` (`copy == False`) or
returns a subsampled copy of it (`copy == True`).
"""

rng = _legacy_numpy_gen(random_state)
return sample(
data=data, fraction=fraction, n=n_obs, rng=rng, copy=copy, replace=False, axis=0
)
Loading

0 comments on commit 86d656d

Please sign in to comment.