Add functionality for plotting mass errors (#46)

* Add mass errors plot * Set intensity ylim to 1.1 to avoid annotation overflow * Add plot_unknown argument to mass_errors * Add reference * Implement requested changes * Add figure-level plot.full_mirror * Implement requested changes * Add documentation for mass_errors and facet to plotting.md * Implement requested changes: use width instead of aspect ratio; fix docs figure filename * Fix docs figure alt text * Update docs to facet width kwarg change
bittremieux · Jul 6, 2023 · bd1c9d5 · bd1c9d5
1 parent 3f4f553
commit bd1c9d5
Show file tree

Hide file tree

Showing 5 changed files with 355 additions and 20 deletions.
diff --git a/docs/src/facet.png b/docs/src/facet.png
diff --git a/docs/src/mass_errors.png b/docs/src/mass_errors.png
diff --git a/docs/src/plotting.md b/docs/src/plotting.md
@@ -9,7 +9,7 @@ Some of the arguments that can be provided to `spectrum_utils.plot.spectrum(...)
 - `color_ions`: Boolean flag indicating whether the annotated peaks should be colored.
 - `annot_fmt`: A function that converts a `FragmentAnnotation` to a label to annotate the corresponding peak (see below).
 - `annot_kws`: A dictionary with options to customize peak label texts.
-See the [`matplotlib.text.Text` documentation](https://matplotlib.org/3.1.1/api/text_api.html#matplotlib.text.Text) for available options.
+  See the [`matplotlib.text.Text` documentation](https://matplotlib.org/3.1.1/api/text_api.html#matplotlib.text.Text) for available options.
 - `grid`: Enable/disable the grid.
 
 See the [API reference](api.md) for full details on how to use these settings.
@@ -121,6 +121,75 @@ plt.close()
 
 All of the advanced plotting arguments described above can be provided for the mirror plot as well using the `spectrum_kws` argument.
 
+## Mass error plot
+
+The difference between the observed and the theoretical mass of annotated fragment ions can be visualized in a mass error plot. In these bubble plots, the size of the bubbles corresponds to the intensity of the fragment ions, the x-axis shows the observed _m/z_, and the y-axis shows the mass error either ppm or in Dalton. Use `spectrum_utils.plot.mass_errors(...)` to plot mass errors:
+
+```python
+import matplotlib.pyplot as plt
+import spectrum_utils.plot as sup
+import spectrum_utils.spectrum as sus
+
+usi = "mzspec:PXD022531:j12541_C5orf38:scan:12368"
+peptide = "VAATLEILTLK/2"
+spectrum = sus.MsmsSpectrum.from_usi(usi)
+spectrum.annotate_proforma(
+    peptide,
+    fragment_tol_mass=0.05,
+    fragment_tol_mode="Da",
+    ion_types="aby",
+    max_ion_charge=2,
+    neutral_losses={"NH3": -17.026549, "H2O": -18.010565},
+)
+
+fig, ax = plt.subplots(figsize=(10.5, 3))
+sup.mass_errors(spectrum, plot_unknown=False, ax=ax)
+plt.savefig("mass_errors.png", dpi=300, bbox_inches="tight", transparent=True)
+plt.close()
+```
+
+![Mass error plot](mass_errors.png)
+
+## Figure-level facet plot
+
+The figure-level `spectrum_utils.plot.facet` function combines the `spectrum_utils.plot.mirror` and `spectrum_utils.plot.mass_errors` functionality:
+
+```python
+import matplotlib.pyplot as plt
+import spectrum_utils.plot as sup
+import spectrum_utils.spectrum as sus
+
+peptide = "VAATLEILTLK/2"
+annotation_settings = {
+    "fragment_tol_mass": 0.05,
+    "fragment_tol_mode": "Da",
+    "ion_types": "aby",
+    "max_ion_charge": 2,
+    "neutral_losses": {"NH3": -17.026549, "H2O": -18.010565},
+}
+
+usi_top = "mzspec:PXD022531:j12541_C5orf38:scan:12368"
+spectrum_top = sus.MsmsSpectrum.from_usi(usi_top)
+spectrum_top.annotate_proforma(peptide, **annotation_settings)
+
+usi_bottom = "mzspec:PXD022531:b11156_PRAMEF17:scan:22140"
+spectrum_bottom = sus.MsmsSpectrum.from_usi(usi_bottom)
+spectrum_bottom.annotate_proforma(peptide, **annotation_settings)
+
+fig = sup.facet(
+    spec_top=spectrum_top,
+    spec_mass_errors=spectrum_top,
+    spec_bottom=spectrum_bottom,
+    mass_errors_kws={"plot_unknown": False},
+    height=7,
+    width=10.5,
+)
+plt.savefig("facet.png", dpi=300, bbox_inches="tight", transparent=True)
+plt.close()
+```
+
+![Facet plot](facet.png)
+
 ## Interactive plotting
 
 Besides the standard plotting functionality in `spectrum_utils.plot`, spectrum_utils also contains interactive plotting functionality in `spectrum_utils.iplot`.

diff --git a/spectrum_utils/plot.py b/spectrum_utils/plot.py
@@ -1,13 +1,24 @@
 import functools
 import itertools
 import math
-from typing import Callable, Dict, Iterable, Optional, Tuple, Union
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Iterable,
+    Mapping,
+    Optional,
+    Tuple,
+    Union,
+)
 
 import matplotlib.pyplot as plt
 import matplotlib.ticker as mticker
+import numpy as np
 
 import spectrum_utils.fragment_annotation as fa
 from spectrum_utils.spectrum import MsmsSpectrum
+from spectrum_utils.utils import da_to_ppm, ppm_to_da
 
 
 colors = {
@@ -40,6 +51,31 @@
 }
 
 
+def _format_ax(
+    ax: plt.Axes,
+    grid: Union[bool, str],
+):
+    """Set ax formatting options that are common to all plot types."""
+    ax.xaxis.set_minor_locator(mticker.AutoLocator())
+    ax.yaxis.set_minor_locator(mticker.AutoLocator())
+    ax.xaxis.set_minor_locator(mticker.AutoMinorLocator())
+    ax.yaxis.set_minor_locator(mticker.AutoMinorLocator())
+    if grid in (True, "both", "major"):
+        ax.grid(True, "major", color="#9E9E9E", linewidth=0.2)
+    if grid in (True, "both", "minor"):
+        ax.grid(True, "minor", color="#9E9E9E", linewidth=0.2)
+    ax.set_axisbelow(True)
+    ax.tick_params(axis="both", which="both", labelsize="small")
+    ax.set_xlabel("m/z", style="italic")
+
+
+def _get_xlim(spec: MsmsSpectrum) -> Tuple[float, float]:
+    """Get plot x-axis limits for a given spectrum."""
+    round_mz = 50
+    max_mz = math.ceil(spec.mz[-1] / round_mz + 1) * round_mz
+    return 0.0, max_mz
+
+
 def _annotate_ion(
     mz: float,
     intensity: float,
@@ -167,30 +203,15 @@ def spectrum(
     if ax is None:
         ax = plt.gca()
 
+    _format_ax(ax, grid)
     ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1.0))
-    ax.set_ylim(*(0, 1) if not mirror_intensity else (-1, 0))
-
-    ax.xaxis.set_minor_locator(mticker.AutoLocator())
-    ax.yaxis.set_minor_locator(mticker.AutoLocator())
-    ax.xaxis.set_minor_locator(mticker.AutoMinorLocator())
-    ax.yaxis.set_minor_locator(mticker.AutoMinorLocator())
-    if grid in (True, "both", "major"):
-        ax.grid(True, "major", color="#9E9E9E", linewidth=0.2)
-    if grid in (True, "both", "minor"):
-        ax.grid(True, "minor", color="#9E9E9E", linewidth=0.2)
-    ax.set_axisbelow(True)
-
-    ax.tick_params(axis="both", which="both", labelsize="small")
-
-    ax.set_xlabel("m/z", style="italic")
+    ax.set_ylim(*(0, 1.15) if not mirror_intensity else (-1.15, 0))
     ax.set_ylabel("Intensity")
 
     if len(spec.mz) == 0:
         return ax
 
-    round_mz = 50
-    max_mz = math.ceil(spec.mz[-1] / round_mz + 1) * round_mz
-    ax.set_xlim(0, max_mz)
+    ax.set_xlim(*_get_xlim(spec))
 
     max_intensity = spec.intensity.max()
     annotations = (
@@ -227,6 +248,129 @@ def spectrum(
     return ax
 
 
+def mass_errors(
+    spec: MsmsSpectrum,
+    *,
+    unit: Optional[str] = None,
+    plot_unknown: bool = True,
+    color_ions: bool = True,
+    grid: Union[bool, str] = True,
+    ax: Optional[plt.Axes] = None,
+) -> plt.Axes:
+    """
+    Plot mass error bubble plot for a given spectrum.
+
+    A mass error bubble plot shows the error between observed and theoretical
+    mass (y-axis) in function of the **m/z** (x-axis) for each peak in the
+    spectrum. The size of the bubble is proportional to the intensity of the
+    peak.
+
+    Parameters
+    ----------
+    spec : MsmsSpectrum
+        The spectrum with mass errors to be plotted.
+    unit : str, optional
+        The unit of the mass errors, either 'ppm', 'Da', or None. If None,
+        the unit that was used for spectrum annotation is used. The default is
+        None.
+    plot_unknown : bool, optional
+        Flag indicating whether or not to plot mass errors for unknown peaks.
+    color_ions : bool, optional
+        Flag indicating whether or not to color dots for annotated fragment
+        ions. The default is True.
+    grid : Union[bool, str], optional
+        Draw grid lines or not. Either a boolean to enable/disable both major
+        and minor grid lines or 'major'/'minor' to enable major or minor grid
+        lines respectively.
+    ax : Optional[plt.Axes], optional
+        Axes instance on which to plot the mass errors. If None the current
+        Axes instance is used.
+
+    Returns
+    -------
+    plt.Axes
+        The matplotlib Axes instance on which the mass errors are plotted.
+
+    Notes
+    -----
+    The mass error bubble plot was first introduced in [1]_.
+
+    References
+    ----------
+    .. [1] Barsnes,H., Eidhammer,I. and Martens,L. (2010)
+       FragmentationAnalyzer: An open-source tool to analyze MS/MS
+       fragmentation data. PROTEOMICS, 10, 1087–1090.
+       doi:10.1002/pmic.200900681
+
+    """
+    if ax is None:
+        ax = plt.gca()
+
+    _format_ax(ax, grid)
+
+    if len(spec.mz) == 0:
+        ax.set_ylabel("Mass error")
+        ax.set_ylim(-1, 1)
+        return ax
+
+    annotations = (
+        spec.annotation
+        if spec.annotation is not None
+        else itertools.repeat(None, len(spec.mz))
+    )
+
+    known_ions = []
+    dot_colors = []
+    mz_deltas = []
+    mz_delta_units = []
+    for ann in annotations:
+        # Use the first annotation in case there are multiple options.
+        ion_type = ann[0].ion_type[0] if ann is not None else None
+        is_known_ion = ion_type is not None and ion_type != "?"
+        known_ions.append(is_known_ion)
+        dot_colors.append(colors.get(ion_type if color_ions else None))
+        mz_deltas.append(ann[0].mz_delta[0] if is_known_ion else 0.0)
+        mz_delta_units.append(ann[0].mz_delta[1] if is_known_ion else None)
+
+    dot_colors = np.array(dot_colors)
+    mz_deltas = np.array(mz_deltas)
+    intensity_scaled = 500 * (spec.intensity / np.max(spec.intensity))
+    mask = (
+        np.ones_like(spec.mz, dtype=bool)
+        if plot_unknown
+        else np.array(known_ions)
+    )
+
+    for known_unit in ["ppm", "Da"]:
+        # Use `not any` instead of `all` to fail fast
+        if not any(u and u != known_unit for u in mz_delta_units):
+            annotation_unit = known_unit
+            break
+    else:
+        raise ValueError("Inconsistent or unknown mass units in annotations.")
+    if unit == "Da" and annotation_unit == "ppm":
+        mz_deltas = ppm_to_da(mz_deltas, spec.mz)
+    elif unit == "ppm" and annotation_unit == "Da":
+        mz_deltas = da_to_ppm(mz_deltas, spec.mz)
+
+    y_lim = 1.2 * np.max(np.abs(mz_deltas))
+    if y_lim > 0.0:
+        ax.set_ylim(-y_lim, y_lim)
+    ax.set_xlim(*_get_xlim(spec))
+    ax.set_ylabel(f"Mass error ({unit or annotation_unit})")
+
+    ax.scatter(
+        spec.mz[mask],
+        mz_deltas[mask],
+        s=intensity_scaled[mask],
+        c=dot_colors[mask],
+        alpha=0.5,
+        edgecolors="none",
+    )
+
+    return ax
+
+
 def mirror(
     spec_top: MsmsSpectrum,
     spec_bottom: MsmsSpectrum,
@@ -286,3 +430,80 @@ def mirror(
     )
 
     return ax
+
+
+def facet(
+    spec_top: MsmsSpectrum,
+    spec_mass_errors: Optional[MsmsSpectrum] = None,
+    spec_bottom: Optional[MsmsSpectrum] = None,
+    spectrum_kws: Optional[Mapping[str, Any]] = None,
+    mass_errors_kws: Optional[Mapping[str, Any]] = None,
+    height: Optional[float] = None,
+    width: Optional[float] = None,
+) -> plt.Figure:
+    """
+    Plot a spectrum, and optionally mass errors, and a mirror spectrum.
+
+    Parameters
+    ----------
+    spec_top : MsmsSpectrum
+        The spectrum to be plotted on the top.
+    spec_mass_errors : Optional[MsmsSpectrum], optional
+        The spectrum for which mass errors are to be plotted in the middle.
+    spec_bottom : Optional[MsmsSpectrum], optional
+        The spectrum to be plotted on the bottom.
+    spectrum_kws : Optional[Mapping[str, Any]], optional
+        Keyword arguments for `plot.spectrum` for the top and bottom spectra.
+    mass_errors_kws : Optional[Mapping[str, Any]], optional
+        Keyword arguments for `plot.mass_errors`.
+    height : Optional[float], optional
+        The height of the figure in inches.
+    width : Optional[float], optional
+        The width of the figure in inches.
+
+    Returns
+    -------
+    plt.Figure
+        The matplotlib Figure instance on which the spectra and mass errors
+        are plotted.
+    """
+
+    n_rows = 1 + (spec_mass_errors is not None) + (spec_bottom is not None)
+    height_ratios = [1]
+    if spec_mass_errors is not None:
+        height_ratios.append(0.5)
+    if spec_bottom is not None:
+        height_ratios.append(1)
+
+    fig, axes = plt.subplots(
+        *(n_rows, 1),
+        figsize=(width or 7.5, height or (3.75 if spec_bottom is None else 6)),
+        sharex=True,
+        gridspec_kw={"height_ratios": height_ratios},
+    )
+    axes = np.array(axes).flatten()
+
+    spectrum(spec_top, ax=axes[0], **spectrum_kws or {})
+
+    if spec_mass_errors is not None:
+        mass_errors(spec_mass_errors, ax=axes[1], **mass_errors_kws or {})
+        axes[0].get_xaxis().get_label().set_visible(False)
+
+    if spec_bottom is not None:
+        spectrum(
+            spec_bottom,
+            mirror_intensity=True,
+            ax=axes[-1],
+            **spectrum_kws or {},
+        )
+        for ax in axes[:-1]:
+            ax.get_xaxis().get_label().set_visible(False)
+
+        axes[-1].yaxis.set_major_formatter(
+            mticker.FuncFormatter(lambda x, pos: f"{abs(x):.0%}")
+        )
+
+    fig.align_ylabels(axes)
+    fig.tight_layout()
+
+    return fig