From d6fb42641afbeebcd069ecd71b43e762a27c0094 Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Mon, 13 Jan 2025 12:34:30 +0100 Subject: [PATCH] Better error messages --- src/scanpy/_utils/__init__.py | 6 ++-- src/scanpy/get/get.py | 6 ++-- src/scanpy/plotting/_anndata.py | 31 ++++++++++++------- src/scanpy/plotting/_baseplot_class.py | 14 +++------ src/scanpy/plotting/_tools/__init__.py | 4 +-- src/scanpy/plotting/_tools/paga.py | 2 +- src/scanpy/plotting/_tools/scatterplots.py | 6 ++-- src/scanpy/plotting/_utils.py | 4 +-- .../preprocessing/_highly_variable_genes.py | 2 +- src/scanpy/preprocessing/_pca/__init__.py | 2 +- src/scanpy/preprocessing/_qc.py | 2 +- src/scanpy/readwrite.py | 22 +++++++------ src/scanpy/tools/_dpt.py | 4 +-- src/scanpy/tools/_ingest.py | 2 +- src/scanpy/tools/_leiden.py | 2 +- src/scanpy/tools/_utils_clustering.py | 2 +- 16 files changed, 58 insertions(+), 53 deletions(-) diff --git a/src/scanpy/_utils/__init__.py b/src/scanpy/_utils/__init__.py index 58a2c57d2..326ea216d 100644 --- a/src/scanpy/_utils/__init__.py +++ b/src/scanpy/_utils/__init__.py @@ -1076,7 +1076,7 @@ def __init__(self, adata: AnnData, key=None): self._dists_key = "distances" else: if key not in adata.uns: - msg = f'No "{key}" in .uns' + msg = f"No {key!r} in .uns" raise KeyError(msg) self._neighbors_dict = adata.uns[key] self._conns_key = self._neighbors_dict["connectivities_key"] @@ -1110,12 +1110,12 @@ def __getitem__(self, key: Literal["connectivities_key"]) -> str: ... def __getitem__(self, key: str): if key == "distances": if "distances" not in self: - msg = f'No "{self._dists_key}" in .obsp' + msg = f"No {self._dists_key!r} in .obsp" raise KeyError(msg) return self._distances elif key == "connectivities": if "connectivities" not in self: - msg = f'No "{self._conns_key}" in .obsp' + msg = f"No {self._conns_key!r} in .obsp" raise KeyError(msg) return self._connectivities elif key == "connectivities_key": diff --git a/src/scanpy/get/get.py b/src/scanpy/get/get.py index e4781b125..abfa51d1f 100644 --- a/src/scanpy/get/get.py +++ b/src/scanpy/get/get.py @@ -170,7 +170,7 @@ def _check_indices( if key in dim_df.columns: col_keys.append(key) if key in alt_names.index: - msg = f"The key '{key}' is found in both adata.{dim} and {alt_repr}.{alt_search_repr}." + msg = f"The key {key!r} is found in both adata.{dim} and {alt_repr}.{alt_search_repr}." raise KeyError(msg) elif key in alt_names.index: val = alt_names[key] @@ -178,7 +178,7 @@ def _check_indices( # while var_names must be unique, adata.var[gene_symbols] does not # It's still ambiguous to refer to a duplicated entry though. assert alias_index is not None - msg = f"Found duplicate entries for '{key}' in {alt_repr}.{alt_search_repr}." + msg = f"Found duplicate entries for {key!r} in {alt_repr}.{alt_search_repr}." raise KeyError(msg) index_keys.append(val) index_aliases.append(key) @@ -186,7 +186,7 @@ def _check_indices( not_found.append(key) if len(not_found) > 0: msg = ( - f"Could not find keys '{not_found}' in columns of `adata.{dim}` or in" + f"Could not find keys {not_found!r} in columns of `adata.{dim}` or in" f" {alt_repr}.{alt_search_repr}." ) raise KeyError(msg) diff --git a/src/scanpy/plotting/_anndata.py b/src/scanpy/plotting/_anndata.py index a411ec7f5..75dd210c0 100755 --- a/src/scanpy/plotting/_anndata.py +++ b/src/scanpy/plotting/_anndata.py @@ -2284,20 +2284,12 @@ def _reorder_categories_after_dendrogram( 'var_group_labels', and 'var_group_positions' """ - dendrogram_key = _get_dendrogram_key(adata, dendrogram_key, groupby) - if isinstance(groupby, str): groupby = [groupby] - dendro_info = adata.uns[dendrogram_key] - if groupby != dendro_info["groupby"]: - msg = ( - "Incompatible observations. The precomputed dendrogram contains " - f"information for the observation: '{groupby}' while the plot is " - f"made for the observation: '{dendro_info['groupby']}. " - "Please run `sc.tl.dendrogram` using the right observation.'" - ) - raise ValueError(msg) + dendro_info = adata.uns[ + _get_dendrogram_key(adata, dendrogram_key, groupby, validate_groupby=True) + ] if categories is None: categories = adata.obs[dendro_info["groupby"]].cat.categories @@ -2371,7 +2363,11 @@ def _format_first_three_categories(categories): def _get_dendrogram_key( - adata: AnnData, dendrogram_key: str | None, groupby: str | Sequence[str] + adata: AnnData, + dendrogram_key: str | None, + groupby: str | Sequence[str], + *, + validate_groupby: bool = False, ) -> str: # the `dendrogram_key` can be a bool an NoneType or the name of the # dendrogram key. By default the name of the dendrogram key is 'dendrogram' @@ -2401,6 +2397,17 @@ def _get_dendrogram_key( ) raise ValueError(msg) + if validate_groupby: + existing_groupby = adata.uns[dendrogram_key]["groupby"] + if groupby != existing_groupby: + msg = ( + "Incompatible observations. The precomputed dendrogram contains " + f"information for the observation: {groupby!r} while the plot is " + f"made for the observation: {existing_groupby!r}. " + "Please run `sc.tl.dendrogram` using the right observation.'" + ) + raise ValueError(msg) + return dendrogram_key diff --git a/src/scanpy/plotting/_baseplot_class.py b/src/scanpy/plotting/_baseplot_class.py index 3b54ca4ad..e14d387f8 100644 --- a/src/scanpy/plotting/_baseplot_class.py +++ b/src/scanpy/plotting/_baseplot_class.py @@ -899,17 +899,11 @@ def _format_first_three_categories(_categories): _categories = _categories[:3] + ["etc."] return ", ".join(_categories) - key = _get_dendrogram_key(self.adata, dendrogram_key, self.groupby) - - dendro_info = self.adata.uns[key] - if self.groupby != dendro_info["groupby"]: - msg = ( - "Incompatible observations. The precomputed dendrogram contains " - f"information for the observation: '{self.groupby}' while the plot is " - f"made for the observation: '{dendro_info['groupby']}. " - "Please run `sc.tl.dendrogram` using the right observation.'" + dendro_info = self.adata.uns[ + _get_dendrogram_key( + self.adata, dendrogram_key, self.groupby, validate_groupby=True ) - raise ValueError(msg) + ] # order of groupby categories categories_idx_ordered = dendro_info["categories_idx_ordered"] diff --git a/src/scanpy/plotting/_tools/__init__.py b/src/scanpy/plotting/_tools/__init__.py index 6af6c178e..8f189121e 100644 --- a/src/scanpy/plotting/_tools/__init__.py +++ b/src/scanpy/plotting/_tools/__init__.py @@ -400,7 +400,7 @@ def rank_genes_groups( if n_genes < 1: msg = ( "Specifying a negative number for n_genes has not been implemented for " - f"this plot. Received n_genes={n_genes}." + f"this plot. Received {n_genes=!r}." ) raise NotImplementedError(msg) @@ -1535,7 +1535,7 @@ def embedding_density( if f"X_{basis}" not in adata.obsm_keys(): msg = ( - f"Cannot find the embedded representation `adata.obsm[X_{basis!r}]`. " + f"Cannot find the embedded representation `adata.obsm['X_{basis}']`. " "Compute the embedding first." ) raise ValueError(msg) diff --git a/src/scanpy/plotting/_tools/paga.py b/src/scanpy/plotting/_tools/paga.py index 661d571ca..a4b2de344 100644 --- a/src/scanpy/plotting/_tools/paga.py +++ b/src/scanpy/plotting/_tools/paga.py @@ -1166,7 +1166,7 @@ def moving_average(a): if node not in groups_names_set: msg = ( f"Each node/group needs to be in {groups_names.tolist()} " - f"(`groups_key`={groups_key!r}) not {node!r}." + f"({groups_key=!r}) not {node!r}." ) raise ValueError(msg) nodes_ints.append(groups_names.get_loc(node)) diff --git a/src/scanpy/plotting/_tools/scatterplots.py b/src/scanpy/plotting/_tools/scatterplots.py index 2c6463cfb..cb3c9d7c6 100644 --- a/src/scanpy/plotting/_tools/scatterplots.py +++ b/src/scanpy/plotting/_tools/scatterplots.py @@ -159,8 +159,8 @@ def embedding( use_raw = layer is None and adata.raw is not None if use_raw and layer is not None: msg = ( - "Cannot use both a layer and the raw representation. Was passed:" - f"use_raw={use_raw}, layer={layer}." + "Cannot use both a layer and the raw representation. " + f"Was passed: {use_raw=!r}, {layer=!r}." ) raise ValueError(msg) if use_raw and adata.raw is None: @@ -1167,7 +1167,7 @@ def _get_basis(adata: AnnData, basis: str) -> np.ndarray: elif f"X_{basis}" in adata.obsm: return adata.obsm[f"X_{basis}"] else: - msg = f"Could not find '{basis}' or 'X_{basis}' in .obsm" + msg = f"Could not find {basis!r} or 'X_{basis}' in .obsm" raise KeyError(msg) diff --git a/src/scanpy/plotting/_utils.py b/src/scanpy/plotting/_utils.py index 178451b66..b6cd92003 100644 --- a/src/scanpy/plotting/_utils.py +++ b/src/scanpy/plotting/_utils.py @@ -398,7 +398,7 @@ def _validate_palette(adata: AnnData, key: str) -> None: else: logg.warning( f"The following color value found in adata.uns['{key}_colors'] " - f"is not valid: '{color}'. Default colors will be used instead." + f"is not valid: {color!r}. Default colors will be used instead." ) _set_default_colors_for_categorical_obs(adata, key) _palette = None @@ -633,7 +633,7 @@ def scatter_group( color = rgb2hex(adata.uns[key + "_colors"][cat_code]) if not is_color_like(color): - msg = f'"{color}" is not a valid matplotlib color.' + msg = f"{color!r} is not a valid matplotlib color." raise ValueError(msg) data = [Y[mask_obs, 0], Y[mask_obs, 1]] if projection == "3d": diff --git a/src/scanpy/preprocessing/_highly_variable_genes.py b/src/scanpy/preprocessing/_highly_variable_genes.py index 1bbd28b16..356fa8f03 100644 --- a/src/scanpy/preprocessing/_highly_variable_genes.py +++ b/src/scanpy/preprocessing/_highly_variable_genes.py @@ -72,7 +72,7 @@ def _highly_variable_genes_seurat_v3( if check_values and not check_nonnegative_integers(data): warnings.warn( - f"`flavor='{flavor}'` expects raw count data, but non-integers were found.", + f"`{flavor=!r}` expects raw count data, but non-integers were found.", UserWarning, ) diff --git a/src/scanpy/preprocessing/_pca/__init__.py b/src/scanpy/preprocessing/_pca/__init__.py index 512815052..db7886a29 100644 --- a/src/scanpy/preprocessing/_pca/__init__.py +++ b/src/scanpy/preprocessing/_pca/__init__.py @@ -239,7 +239,7 @@ def pca( min_dim = min(adata_comp.n_vars, adata_comp.n_obs) n_comps = min_dim - 1 if min_dim <= settings.N_PCS else settings.N_PCS - logg.info(f" with n_comps={n_comps}") + logg.info(f" with {n_comps=}") X = _get_obs_rep(adata_comp, layer=layer) if is_backed_type(X) and layer is not None: diff --git a/src/scanpy/preprocessing/_qc.py b/src/scanpy/preprocessing/_qc.py index def4feaf4..5af8def04 100644 --- a/src/scanpy/preprocessing/_qc.py +++ b/src/scanpy/preprocessing/_qc.py @@ -34,7 +34,7 @@ def _choose_mtx_rep(adata, *, use_raw: bool = False, layer: str | None = None): if use_raw and is_layer: msg = ( "Cannot use expression from both layer and raw. You provided:" - f"'use_raw={use_raw}' and 'layer={layer}'" + f"{use_raw=!r} and {layer=!r}" ) raise ValueError(msg) if is_layer: diff --git a/src/scanpy/readwrite.py b/src/scanpy/readwrite.py index d06e6dc3e..c568519cd 100644 --- a/src/scanpy/readwrite.py +++ b/src/scanpy/readwrite.py @@ -41,6 +41,7 @@ from ._utils import _empty if TYPE_CHECKING: + from datetime import datetime from typing import BinaryIO, Literal from ._utils import Empty @@ -221,7 +222,7 @@ def read_10x_h5( if genome: if genome not in adata.var["genome"].values: msg = ( - f"Could not find data corresponding to genome '{genome}' in '{filename}'. " + f"Could not find data corresponding to genome {genome!r} in {filename}. " f"Available genomes are: {list(adata.var['genome'].unique())}." ) raise ValueError(msg) @@ -231,29 +232,32 @@ def read_10x_h5( if adata.is_view: adata = adata.copy() else: - adata = _read_legacy_10x_h5(filename, genome=genome, start=start) + adata = _read_legacy_10x_h5(Path(filename), genome=genome, start=start) return adata -def _read_legacy_10x_h5(filename, *, genome=None, start=None): +def _read_legacy_10x_h5( + path: Path, *, genome: str | None = None, start: datetime | None = None +): """ Read hdf5 file from Cell Ranger v2 or earlier versions. """ - with h5py.File(str(filename), "r") as f: + with h5py.File(str(path), "r") as f: try: children = list(f.keys()) if not genome: if len(children) > 1: msg = ( - f"'{filename}' contains more than one genome. For legacy 10x h5 " - "files you must specify the genome if more than one is present. " + f"{path} contains more than one genome. " + "For legacy 10x h5 files you must specify the genome " + "if more than one is present. " f"Available genomes are: {children}" ) raise ValueError(msg) genome = children[0] elif genome not in children: msg = ( - f"Could not find genome '{genome}' in '{filename}'. " + f"Could not find genome {genome!r} in {path}. " f"Available genomes are: {children}" ) raise ValueError(msg) @@ -475,10 +479,10 @@ def read_visium( if not f.exists(): if any(x in str(f) for x in ["hires_image", "lowres_image"]): logg.warning( - f"You seem to be missing an image file.\nCould not find '{f}'." + f"You seem to be missing an image file.\nCould not find {f}." ) else: - msg = f"Could not find '{f}'" + msg = f"Could not find {f}" raise OSError(msg) adata.uns["spatial"][library_id]["images"] = dict() diff --git a/src/scanpy/tools/_dpt.py b/src/scanpy/tools/_dpt.py index 231c3ee06..e92fc726c 100644 --- a/src/scanpy/tools/_dpt.py +++ b/src/scanpy/tools/_dpt.py @@ -18,7 +18,7 @@ def _diffmap(adata, n_comps=15, neighbors_key=None, random_state=0): - start = logg.info(f"computing Diffusion Maps using n_comps={n_comps}(=n_dcs)") + start = logg.info(f"computing Diffusion Maps using {n_comps=}(=n_dcs)") dpt = DPT(adata, neighbors_key=neighbors_key) dpt.compute_transitions() dpt.compute_eigen(n_comps=n_comps, random_state=random_state) @@ -153,7 +153,7 @@ def dpt( allow_kendall_tau_shift=allow_kendall_tau_shift, neighbors_key=neighbors_key, ) - start = logg.info(f"computing Diffusion Pseudotime using n_dcs={n_dcs}") + start = logg.info(f"computing Diffusion Pseudotime using {n_dcs=}") if n_branchings > 1: logg.info(" this uses a hierarchical implementation") if dpt.iroot is not None: diff --git a/src/scanpy/tools/_ingest.py b/src/scanpy/tools/_ingest.py index 949a44251..2a47e095a 100644 --- a/src/scanpy/tools/_ingest.py +++ b/src/scanpy/tools/_ingest.py @@ -189,7 +189,7 @@ def __init__(self, dim, axis=0, vals=None): def __setitem__(self, key, value): if value.shape[self._axis] != self._dim: msg = ( - f"Value passed for key '{key}' is of incorrect shape. " + f"Value passed for key {key!r} is of incorrect shape. " f"Value has shape {value.shape[self._axis]} " f"for dimension {self._axis} while " f"it should have {self._dim}." diff --git a/src/scanpy/tools/_leiden.py b/src/scanpy/tools/_leiden.py index 388f05ef8..9f1fbf23e 100644 --- a/src/scanpy/tools/_leiden.py +++ b/src/scanpy/tools/_leiden.py @@ -121,7 +121,7 @@ def leiden( """ if flavor not in {"igraph", "leidenalg"}: msg = ( - f"flavor must be either 'igraph' or 'leidenalg', but '{flavor}' was passed" + f"flavor must be either 'igraph' or 'leidenalg', but {flavor!r} was passed" ) raise ValueError(msg) _utils.ensure_igraph() diff --git a/src/scanpy/tools/_utils_clustering.py b/src/scanpy/tools/_utils_clustering.py index f8690d706..3c771e5d7 100644 --- a/src/scanpy/tools/_utils_clustering.py +++ b/src/scanpy/tools/_utils_clustering.py @@ -41,7 +41,7 @@ def restrict_adjacency( raise ValueError(msg) for c in restrict_categories: if c not in adata.obs[restrict_key].cat.categories: - msg = f"'{c}' is not a valid category for '{restrict_key}'" + msg = f"{c!r} is not a valid category for {restrict_key!r}" raise ValueError(msg) restrict_indices = adata.obs[restrict_key].isin(restrict_categories).values adjacency = adjacency[restrict_indices, :]