From d6fb42641afbeebcd069ecd71b43e762a27c0094 Mon Sep 17 00:00:00 2001
From: Phil Schaf <flying-sheep@web.de>
Date: Mon, 13 Jan 2025 12:34:30 +0100
Subject: [PATCH] Better error messages

---
 src/scanpy/_utils/__init__.py                 |  6 ++--
 src/scanpy/get/get.py                         |  6 ++--
 src/scanpy/plotting/_anndata.py               | 31 ++++++++++++-------
 src/scanpy/plotting/_baseplot_class.py        | 14 +++------
 src/scanpy/plotting/_tools/__init__.py        |  4 +--
 src/scanpy/plotting/_tools/paga.py            |  2 +-
 src/scanpy/plotting/_tools/scatterplots.py    |  6 ++--
 src/scanpy/plotting/_utils.py                 |  4 +--
 .../preprocessing/_highly_variable_genes.py   |  2 +-
 src/scanpy/preprocessing/_pca/__init__.py     |  2 +-
 src/scanpy/preprocessing/_qc.py               |  2 +-
 src/scanpy/readwrite.py                       | 22 +++++++------
 src/scanpy/tools/_dpt.py                      |  4 +--
 src/scanpy/tools/_ingest.py                   |  2 +-
 src/scanpy/tools/_leiden.py                   |  2 +-
 src/scanpy/tools/_utils_clustering.py         |  2 +-
 16 files changed, 58 insertions(+), 53 deletions(-)

diff --git a/src/scanpy/_utils/__init__.py b/src/scanpy/_utils/__init__.py
index 58a2c57d2..326ea216d 100644
--- a/src/scanpy/_utils/__init__.py
+++ b/src/scanpy/_utils/__init__.py
@@ -1076,7 +1076,7 @@ def __init__(self, adata: AnnData, key=None):
             self._dists_key = "distances"
         else:
             if key not in adata.uns:
-                msg = f'No "{key}" in .uns'
+                msg = f"No {key!r} in .uns"
                 raise KeyError(msg)
             self._neighbors_dict = adata.uns[key]
             self._conns_key = self._neighbors_dict["connectivities_key"]
@@ -1110,12 +1110,12 @@ def __getitem__(self, key: Literal["connectivities_key"]) -> str: ...
     def __getitem__(self, key: str):
         if key == "distances":
             if "distances" not in self:
-                msg = f'No "{self._dists_key}" in .obsp'
+                msg = f"No {self._dists_key!r} in .obsp"
                 raise KeyError(msg)
             return self._distances
         elif key == "connectivities":
             if "connectivities" not in self:
-                msg = f'No "{self._conns_key}" in .obsp'
+                msg = f"No {self._conns_key!r} in .obsp"
                 raise KeyError(msg)
             return self._connectivities
         elif key == "connectivities_key":
diff --git a/src/scanpy/get/get.py b/src/scanpy/get/get.py
index e4781b125..abfa51d1f 100644
--- a/src/scanpy/get/get.py
+++ b/src/scanpy/get/get.py
@@ -170,7 +170,7 @@ def _check_indices(
         if key in dim_df.columns:
             col_keys.append(key)
             if key in alt_names.index:
-                msg = f"The key '{key}' is found in both adata.{dim} and {alt_repr}.{alt_search_repr}."
+                msg = f"The key {key!r} is found in both adata.{dim} and {alt_repr}.{alt_search_repr}."
                 raise KeyError(msg)
         elif key in alt_names.index:
             val = alt_names[key]
@@ -178,7 +178,7 @@ def _check_indices(
                 # while var_names must be unique, adata.var[gene_symbols] does not
                 # It's still ambiguous to refer to a duplicated entry though.
                 assert alias_index is not None
-                msg = f"Found duplicate entries for '{key}' in {alt_repr}.{alt_search_repr}."
+                msg = f"Found duplicate entries for {key!r} in {alt_repr}.{alt_search_repr}."
                 raise KeyError(msg)
             index_keys.append(val)
             index_aliases.append(key)
@@ -186,7 +186,7 @@ def _check_indices(
             not_found.append(key)
     if len(not_found) > 0:
         msg = (
-            f"Could not find keys '{not_found}' in columns of `adata.{dim}` or in"
+            f"Could not find keys {not_found!r} in columns of `adata.{dim}` or in"
             f" {alt_repr}.{alt_search_repr}."
         )
         raise KeyError(msg)
diff --git a/src/scanpy/plotting/_anndata.py b/src/scanpy/plotting/_anndata.py
index a411ec7f5..75dd210c0 100755
--- a/src/scanpy/plotting/_anndata.py
+++ b/src/scanpy/plotting/_anndata.py
@@ -2284,20 +2284,12 @@ def _reorder_categories_after_dendrogram(
     'var_group_labels', and 'var_group_positions'
     """
 
-    dendrogram_key = _get_dendrogram_key(adata, dendrogram_key, groupby)
-
     if isinstance(groupby, str):
         groupby = [groupby]
 
-    dendro_info = adata.uns[dendrogram_key]
-    if groupby != dendro_info["groupby"]:
-        msg = (
-            "Incompatible observations. The precomputed dendrogram contains "
-            f"information for the observation: '{groupby}' while the plot is "
-            f"made for the observation: '{dendro_info['groupby']}. "
-            "Please run `sc.tl.dendrogram` using the right observation.'"
-        )
-        raise ValueError(msg)
+    dendro_info = adata.uns[
+        _get_dendrogram_key(adata, dendrogram_key, groupby, validate_groupby=True)
+    ]
 
     if categories is None:
         categories = adata.obs[dendro_info["groupby"]].cat.categories
@@ -2371,7 +2363,11 @@ def _format_first_three_categories(categories):
 
 
 def _get_dendrogram_key(
-    adata: AnnData, dendrogram_key: str | None, groupby: str | Sequence[str]
+    adata: AnnData,
+    dendrogram_key: str | None,
+    groupby: str | Sequence[str],
+    *,
+    validate_groupby: bool = False,
 ) -> str:
     # the `dendrogram_key` can be a bool an NoneType or the name of the
     # dendrogram key. By default the name of the dendrogram key is 'dendrogram'
@@ -2401,6 +2397,17 @@ def _get_dendrogram_key(
         )
         raise ValueError(msg)
 
+    if validate_groupby:
+        existing_groupby = adata.uns[dendrogram_key]["groupby"]
+        if groupby != existing_groupby:
+            msg = (
+                "Incompatible observations. The precomputed dendrogram contains "
+                f"information for the observation: {groupby!r} while the plot is "
+                f"made for the observation: {existing_groupby!r}. "
+                "Please run `sc.tl.dendrogram` using the right observation.'"
+            )
+            raise ValueError(msg)
+
     return dendrogram_key
 
 
diff --git a/src/scanpy/plotting/_baseplot_class.py b/src/scanpy/plotting/_baseplot_class.py
index 3b54ca4ad..e14d387f8 100644
--- a/src/scanpy/plotting/_baseplot_class.py
+++ b/src/scanpy/plotting/_baseplot_class.py
@@ -899,17 +899,11 @@ def _format_first_three_categories(_categories):
                 _categories = _categories[:3] + ["etc."]
             return ", ".join(_categories)
 
-        key = _get_dendrogram_key(self.adata, dendrogram_key, self.groupby)
-
-        dendro_info = self.adata.uns[key]
-        if self.groupby != dendro_info["groupby"]:
-            msg = (
-                "Incompatible observations. The precomputed dendrogram contains "
-                f"information for the observation: '{self.groupby}' while the plot is "
-                f"made for the observation: '{dendro_info['groupby']}. "
-                "Please run `sc.tl.dendrogram` using the right observation.'"
+        dendro_info = self.adata.uns[
+            _get_dendrogram_key(
+                self.adata, dendrogram_key, self.groupby, validate_groupby=True
             )
-            raise ValueError(msg)
+        ]
 
         # order of groupby categories
         categories_idx_ordered = dendro_info["categories_idx_ordered"]
diff --git a/src/scanpy/plotting/_tools/__init__.py b/src/scanpy/plotting/_tools/__init__.py
index 6af6c178e..8f189121e 100644
--- a/src/scanpy/plotting/_tools/__init__.py
+++ b/src/scanpy/plotting/_tools/__init__.py
@@ -400,7 +400,7 @@ def rank_genes_groups(
     if n_genes < 1:
         msg = (
             "Specifying a negative number for n_genes has not been implemented for "
-            f"this plot. Received n_genes={n_genes}."
+            f"this plot. Received {n_genes=!r}."
         )
         raise NotImplementedError(msg)
 
@@ -1535,7 +1535,7 @@ def embedding_density(
 
     if f"X_{basis}" not in adata.obsm_keys():
         msg = (
-            f"Cannot find the embedded representation `adata.obsm[X_{basis!r}]`. "
+            f"Cannot find the embedded representation `adata.obsm['X_{basis}']`. "
             "Compute the embedding first."
         )
         raise ValueError(msg)
diff --git a/src/scanpy/plotting/_tools/paga.py b/src/scanpy/plotting/_tools/paga.py
index 661d571ca..a4b2de344 100644
--- a/src/scanpy/plotting/_tools/paga.py
+++ b/src/scanpy/plotting/_tools/paga.py
@@ -1166,7 +1166,7 @@ def moving_average(a):
             if node not in groups_names_set:
                 msg = (
                     f"Each node/group needs to be in {groups_names.tolist()} "
-                    f"(`groups_key`={groups_key!r}) not {node!r}."
+                    f"({groups_key=!r}) not {node!r}."
                 )
                 raise ValueError(msg)
             nodes_ints.append(groups_names.get_loc(node))
diff --git a/src/scanpy/plotting/_tools/scatterplots.py b/src/scanpy/plotting/_tools/scatterplots.py
index 2c6463cfb..cb3c9d7c6 100644
--- a/src/scanpy/plotting/_tools/scatterplots.py
+++ b/src/scanpy/plotting/_tools/scatterplots.py
@@ -159,8 +159,8 @@ def embedding(
         use_raw = layer is None and adata.raw is not None
     if use_raw and layer is not None:
         msg = (
-            "Cannot use both a layer and the raw representation. Was passed:"
-            f"use_raw={use_raw}, layer={layer}."
+            "Cannot use both a layer and the raw representation. "
+            f"Was passed: {use_raw=!r}, {layer=!r}."
         )
         raise ValueError(msg)
     if use_raw and adata.raw is None:
@@ -1167,7 +1167,7 @@ def _get_basis(adata: AnnData, basis: str) -> np.ndarray:
     elif f"X_{basis}" in adata.obsm:
         return adata.obsm[f"X_{basis}"]
     else:
-        msg = f"Could not find '{basis}' or 'X_{basis}' in .obsm"
+        msg = f"Could not find {basis!r} or 'X_{basis}' in .obsm"
         raise KeyError(msg)
 
 
diff --git a/src/scanpy/plotting/_utils.py b/src/scanpy/plotting/_utils.py
index 178451b66..b6cd92003 100644
--- a/src/scanpy/plotting/_utils.py
+++ b/src/scanpy/plotting/_utils.py
@@ -398,7 +398,7 @@ def _validate_palette(adata: AnnData, key: str) -> None:
             else:
                 logg.warning(
                     f"The following color value found in adata.uns['{key}_colors'] "
-                    f"is not valid: '{color}'. Default colors will be used instead."
+                    f"is not valid: {color!r}. Default colors will be used instead."
                 )
                 _set_default_colors_for_categorical_obs(adata, key)
                 _palette = None
@@ -633,7 +633,7 @@ def scatter_group(
 
         color = rgb2hex(adata.uns[key + "_colors"][cat_code])
     if not is_color_like(color):
-        msg = f'"{color}" is not a valid matplotlib color.'
+        msg = f"{color!r} is not a valid matplotlib color."
         raise ValueError(msg)
     data = [Y[mask_obs, 0], Y[mask_obs, 1]]
     if projection == "3d":
diff --git a/src/scanpy/preprocessing/_highly_variable_genes.py b/src/scanpy/preprocessing/_highly_variable_genes.py
index 1bbd28b16..356fa8f03 100644
--- a/src/scanpy/preprocessing/_highly_variable_genes.py
+++ b/src/scanpy/preprocessing/_highly_variable_genes.py
@@ -72,7 +72,7 @@ def _highly_variable_genes_seurat_v3(
 
     if check_values and not check_nonnegative_integers(data):
         warnings.warn(
-            f"`flavor='{flavor}'` expects raw count data, but non-integers were found.",
+            f"`{flavor=!r}` expects raw count data, but non-integers were found.",
             UserWarning,
         )
 
diff --git a/src/scanpy/preprocessing/_pca/__init__.py b/src/scanpy/preprocessing/_pca/__init__.py
index 512815052..db7886a29 100644
--- a/src/scanpy/preprocessing/_pca/__init__.py
+++ b/src/scanpy/preprocessing/_pca/__init__.py
@@ -239,7 +239,7 @@ def pca(
         min_dim = min(adata_comp.n_vars, adata_comp.n_obs)
         n_comps = min_dim - 1 if min_dim <= settings.N_PCS else settings.N_PCS
 
-    logg.info(f"    with n_comps={n_comps}")
+    logg.info(f"    with {n_comps=}")
 
     X = _get_obs_rep(adata_comp, layer=layer)
     if is_backed_type(X) and layer is not None:
diff --git a/src/scanpy/preprocessing/_qc.py b/src/scanpy/preprocessing/_qc.py
index def4feaf4..5af8def04 100644
--- a/src/scanpy/preprocessing/_qc.py
+++ b/src/scanpy/preprocessing/_qc.py
@@ -34,7 +34,7 @@ def _choose_mtx_rep(adata, *, use_raw: bool = False, layer: str | None = None):
     if use_raw and is_layer:
         msg = (
             "Cannot use expression from both layer and raw. You provided:"
-            f"'use_raw={use_raw}' and 'layer={layer}'"
+            f"{use_raw=!r} and {layer=!r}"
         )
         raise ValueError(msg)
     if is_layer:
diff --git a/src/scanpy/readwrite.py b/src/scanpy/readwrite.py
index d06e6dc3e..c568519cd 100644
--- a/src/scanpy/readwrite.py
+++ b/src/scanpy/readwrite.py
@@ -41,6 +41,7 @@
 from ._utils import _empty
 
 if TYPE_CHECKING:
+    from datetime import datetime
     from typing import BinaryIO, Literal
 
     from ._utils import Empty
@@ -221,7 +222,7 @@ def read_10x_h5(
         if genome:
             if genome not in adata.var["genome"].values:
                 msg = (
-                    f"Could not find data corresponding to genome '{genome}' in '{filename}'. "
+                    f"Could not find data corresponding to genome {genome!r} in {filename}. "
                     f"Available genomes are: {list(adata.var['genome'].unique())}."
                 )
                 raise ValueError(msg)
@@ -231,29 +232,32 @@ def read_10x_h5(
         if adata.is_view:
             adata = adata.copy()
     else:
-        adata = _read_legacy_10x_h5(filename, genome=genome, start=start)
+        adata = _read_legacy_10x_h5(Path(filename), genome=genome, start=start)
     return adata
 
 
-def _read_legacy_10x_h5(filename, *, genome=None, start=None):
+def _read_legacy_10x_h5(
+    path: Path, *, genome: str | None = None, start: datetime | None = None
+):
     """
     Read hdf5 file from Cell Ranger v2 or earlier versions.
     """
-    with h5py.File(str(filename), "r") as f:
+    with h5py.File(str(path), "r") as f:
         try:
             children = list(f.keys())
             if not genome:
                 if len(children) > 1:
                     msg = (
-                        f"'{filename}' contains more than one genome. For legacy 10x h5 "
-                        "files you must specify the genome if more than one is present. "
+                        f"{path} contains more than one genome. "
+                        "For legacy 10x h5 files you must specify the genome "
+                        "if more than one is present. "
                         f"Available genomes are: {children}"
                     )
                     raise ValueError(msg)
                 genome = children[0]
             elif genome not in children:
                 msg = (
-                    f"Could not find genome '{genome}' in '{filename}'. "
+                    f"Could not find genome {genome!r} in {path}. "
                     f"Available genomes are: {children}"
                 )
                 raise ValueError(msg)
@@ -475,10 +479,10 @@ def read_visium(
             if not f.exists():
                 if any(x in str(f) for x in ["hires_image", "lowres_image"]):
                     logg.warning(
-                        f"You seem to be missing an image file.\nCould not find '{f}'."
+                        f"You seem to be missing an image file.\nCould not find {f}."
                     )
                 else:
-                    msg = f"Could not find '{f}'"
+                    msg = f"Could not find {f}"
                     raise OSError(msg)
 
         adata.uns["spatial"][library_id]["images"] = dict()
diff --git a/src/scanpy/tools/_dpt.py b/src/scanpy/tools/_dpt.py
index 231c3ee06..e92fc726c 100644
--- a/src/scanpy/tools/_dpt.py
+++ b/src/scanpy/tools/_dpt.py
@@ -18,7 +18,7 @@
 
 
 def _diffmap(adata, n_comps=15, neighbors_key=None, random_state=0):
-    start = logg.info(f"computing Diffusion Maps using n_comps={n_comps}(=n_dcs)")
+    start = logg.info(f"computing Diffusion Maps using {n_comps=}(=n_dcs)")
     dpt = DPT(adata, neighbors_key=neighbors_key)
     dpt.compute_transitions()
     dpt.compute_eigen(n_comps=n_comps, random_state=random_state)
@@ -153,7 +153,7 @@ def dpt(
         allow_kendall_tau_shift=allow_kendall_tau_shift,
         neighbors_key=neighbors_key,
     )
-    start = logg.info(f"computing Diffusion Pseudotime using n_dcs={n_dcs}")
+    start = logg.info(f"computing Diffusion Pseudotime using {n_dcs=}")
     if n_branchings > 1:
         logg.info("    this uses a hierarchical implementation")
     if dpt.iroot is not None:
diff --git a/src/scanpy/tools/_ingest.py b/src/scanpy/tools/_ingest.py
index 949a44251..2a47e095a 100644
--- a/src/scanpy/tools/_ingest.py
+++ b/src/scanpy/tools/_ingest.py
@@ -189,7 +189,7 @@ def __init__(self, dim, axis=0, vals=None):
     def __setitem__(self, key, value):
         if value.shape[self._axis] != self._dim:
             msg = (
-                f"Value passed for key '{key}' is of incorrect shape. "
+                f"Value passed for key {key!r} is of incorrect shape. "
                 f"Value has shape {value.shape[self._axis]} "
                 f"for dimension {self._axis} while "
                 f"it should have {self._dim}."
diff --git a/src/scanpy/tools/_leiden.py b/src/scanpy/tools/_leiden.py
index 388f05ef8..9f1fbf23e 100644
--- a/src/scanpy/tools/_leiden.py
+++ b/src/scanpy/tools/_leiden.py
@@ -121,7 +121,7 @@ def leiden(
     """
     if flavor not in {"igraph", "leidenalg"}:
         msg = (
-            f"flavor must be either 'igraph' or 'leidenalg', but '{flavor}' was passed"
+            f"flavor must be either 'igraph' or 'leidenalg', but {flavor!r} was passed"
         )
         raise ValueError(msg)
     _utils.ensure_igraph()
diff --git a/src/scanpy/tools/_utils_clustering.py b/src/scanpy/tools/_utils_clustering.py
index f8690d706..3c771e5d7 100644
--- a/src/scanpy/tools/_utils_clustering.py
+++ b/src/scanpy/tools/_utils_clustering.py
@@ -41,7 +41,7 @@ def restrict_adjacency(
         raise ValueError(msg)
     for c in restrict_categories:
         if c not in adata.obs[restrict_key].cat.categories:
-            msg = f"'{c}' is not a valid category for '{restrict_key}'"
+            msg = f"{c!r} is not a valid category for {restrict_key!r}"
             raise ValueError(msg)
     restrict_indices = adata.obs[restrict_key].isin(restrict_categories).values
     adjacency = adjacency[restrict_indices, :]