diff --git a/docs/src/facet.png b/docs/src/facet.png new file mode 100644 index 0000000..e99fc30 Binary files /dev/null and b/docs/src/facet.png differ diff --git a/docs/src/mass_errors.png b/docs/src/mass_errors.png new file mode 100644 index 0000000..b2d44d7 Binary files /dev/null and b/docs/src/mass_errors.png differ diff --git a/docs/src/plotting.md b/docs/src/plotting.md index 6cad870..603d789 100644 --- a/docs/src/plotting.md +++ b/docs/src/plotting.md @@ -9,7 +9,7 @@ Some of the arguments that can be provided to `spectrum_utils.plot.spectrum(...) - `color_ions`: Boolean flag indicating whether the annotated peaks should be colored. - `annot_fmt`: A function that converts a `FragmentAnnotation` to a label to annotate the corresponding peak (see below). - `annot_kws`: A dictionary with options to customize peak label texts. -See the [`matplotlib.text.Text` documentation](https://matplotlib.org/3.1.1/api/text_api.html#matplotlib.text.Text) for available options. + See the [`matplotlib.text.Text` documentation](https://matplotlib.org/3.1.1/api/text_api.html#matplotlib.text.Text) for available options. - `grid`: Enable/disable the grid. See the [API reference](api.md) for full details on how to use these settings. @@ -121,6 +121,75 @@ plt.close() All of the advanced plotting arguments described above can be provided for the mirror plot as well using the `spectrum_kws` argument. +## Mass error plot + +The difference between the observed and the theoretical mass of annotated fragment ions can be visualized in a mass error plot. In these bubble plots, the size of the bubbles corresponds to the intensity of the fragment ions, the x-axis shows the observed _m/z_, and the y-axis shows the mass error either ppm or in Dalton. Use `spectrum_utils.plot.mass_errors(...)` to plot mass errors: + +```python +import matplotlib.pyplot as plt +import spectrum_utils.plot as sup +import spectrum_utils.spectrum as sus + +usi = "mzspec:PXD022531:j12541_C5orf38:scan:12368" +peptide = "VAATLEILTLK/2" +spectrum = sus.MsmsSpectrum.from_usi(usi) +spectrum.annotate_proforma( + peptide, + fragment_tol_mass=0.05, + fragment_tol_mode="Da", + ion_types="aby", + max_ion_charge=2, + neutral_losses={"NH3": -17.026549, "H2O": -18.010565}, +) + +fig, ax = plt.subplots(figsize=(10.5, 3)) +sup.mass_errors(spectrum, plot_unknown=False, ax=ax) +plt.savefig("mass_errors.png", dpi=300, bbox_inches="tight", transparent=True) +plt.close() +``` + +![Mass error plot](mass_errors.png) + +## Figure-level facet plot + +The figure-level `spectrum_utils.plot.facet` function combines the `spectrum_utils.plot.mirror` and `spectrum_utils.plot.mass_errors` functionality: + +```python +import matplotlib.pyplot as plt +import spectrum_utils.plot as sup +import spectrum_utils.spectrum as sus + +peptide = "VAATLEILTLK/2" +annotation_settings = { + "fragment_tol_mass": 0.05, + "fragment_tol_mode": "Da", + "ion_types": "aby", + "max_ion_charge": 2, + "neutral_losses": {"NH3": -17.026549, "H2O": -18.010565}, +} + +usi_top = "mzspec:PXD022531:j12541_C5orf38:scan:12368" +spectrum_top = sus.MsmsSpectrum.from_usi(usi_top) +spectrum_top.annotate_proforma(peptide, **annotation_settings) + +usi_bottom = "mzspec:PXD022531:b11156_PRAMEF17:scan:22140" +spectrum_bottom = sus.MsmsSpectrum.from_usi(usi_bottom) +spectrum_bottom.annotate_proforma(peptide, **annotation_settings) + +fig = sup.facet( + spec_top=spectrum_top, + spec_mass_errors=spectrum_top, + spec_bottom=spectrum_bottom, + mass_errors_kws={"plot_unknown": False}, + height=7, + width=10.5, +) +plt.savefig("facet.png", dpi=300, bbox_inches="tight", transparent=True) +plt.close() +``` + +![Facet plot](facet.png) + ## Interactive plotting Besides the standard plotting functionality in `spectrum_utils.plot`, spectrum_utils also contains interactive plotting functionality in `spectrum_utils.iplot`. diff --git a/spectrum_utils/plot.py b/spectrum_utils/plot.py index 408eca0..0cbac91 100755 --- a/spectrum_utils/plot.py +++ b/spectrum_utils/plot.py @@ -1,13 +1,24 @@ import functools import itertools import math -from typing import Callable, Dict, Iterable, Optional, Tuple, Union +from typing import ( + Any, + Callable, + Dict, + Iterable, + Mapping, + Optional, + Tuple, + Union, +) import matplotlib.pyplot as plt import matplotlib.ticker as mticker +import numpy as np import spectrum_utils.fragment_annotation as fa from spectrum_utils.spectrum import MsmsSpectrum +from spectrum_utils.utils import da_to_ppm, ppm_to_da colors = { @@ -40,6 +51,31 @@ } +def _format_ax( + ax: plt.Axes, + grid: Union[bool, str], +): + """Set ax formatting options that are common to all plot types.""" + ax.xaxis.set_minor_locator(mticker.AutoLocator()) + ax.yaxis.set_minor_locator(mticker.AutoLocator()) + ax.xaxis.set_minor_locator(mticker.AutoMinorLocator()) + ax.yaxis.set_minor_locator(mticker.AutoMinorLocator()) + if grid in (True, "both", "major"): + ax.grid(True, "major", color="#9E9E9E", linewidth=0.2) + if grid in (True, "both", "minor"): + ax.grid(True, "minor", color="#9E9E9E", linewidth=0.2) + ax.set_axisbelow(True) + ax.tick_params(axis="both", which="both", labelsize="small") + ax.set_xlabel("m/z", style="italic") + + +def _get_xlim(spec: MsmsSpectrum) -> Tuple[float, float]: + """Get plot x-axis limits for a given spectrum.""" + round_mz = 50 + max_mz = math.ceil(spec.mz[-1] / round_mz + 1) * round_mz + return 0.0, max_mz + + def _annotate_ion( mz: float, intensity: float, @@ -167,30 +203,15 @@ def spectrum( if ax is None: ax = plt.gca() + _format_ax(ax, grid) ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=1.0)) - ax.set_ylim(*(0, 1) if not mirror_intensity else (-1, 0)) - - ax.xaxis.set_minor_locator(mticker.AutoLocator()) - ax.yaxis.set_minor_locator(mticker.AutoLocator()) - ax.xaxis.set_minor_locator(mticker.AutoMinorLocator()) - ax.yaxis.set_minor_locator(mticker.AutoMinorLocator()) - if grid in (True, "both", "major"): - ax.grid(True, "major", color="#9E9E9E", linewidth=0.2) - if grid in (True, "both", "minor"): - ax.grid(True, "minor", color="#9E9E9E", linewidth=0.2) - ax.set_axisbelow(True) - - ax.tick_params(axis="both", which="both", labelsize="small") - - ax.set_xlabel("m/z", style="italic") + ax.set_ylim(*(0, 1.15) if not mirror_intensity else (-1.15, 0)) ax.set_ylabel("Intensity") if len(spec.mz) == 0: return ax - round_mz = 50 - max_mz = math.ceil(spec.mz[-1] / round_mz + 1) * round_mz - ax.set_xlim(0, max_mz) + ax.set_xlim(*_get_xlim(spec)) max_intensity = spec.intensity.max() annotations = ( @@ -227,6 +248,129 @@ def spectrum( return ax +def mass_errors( + spec: MsmsSpectrum, + *, + unit: Optional[str] = None, + plot_unknown: bool = True, + color_ions: bool = True, + grid: Union[bool, str] = True, + ax: Optional[plt.Axes] = None, +) -> plt.Axes: + """ + Plot mass error bubble plot for a given spectrum. + + A mass error bubble plot shows the error between observed and theoretical + mass (y-axis) in function of the **m/z** (x-axis) for each peak in the + spectrum. The size of the bubble is proportional to the intensity of the + peak. + + Parameters + ---------- + spec : MsmsSpectrum + The spectrum with mass errors to be plotted. + unit : str, optional + The unit of the mass errors, either 'ppm', 'Da', or None. If None, + the unit that was used for spectrum annotation is used. The default is + None. + plot_unknown : bool, optional + Flag indicating whether or not to plot mass errors for unknown peaks. + color_ions : bool, optional + Flag indicating whether or not to color dots for annotated fragment + ions. The default is True. + grid : Union[bool, str], optional + Draw grid lines or not. Either a boolean to enable/disable both major + and minor grid lines or 'major'/'minor' to enable major or minor grid + lines respectively. + ax : Optional[plt.Axes], optional + Axes instance on which to plot the mass errors. If None the current + Axes instance is used. + + Returns + ------- + plt.Axes + The matplotlib Axes instance on which the mass errors are plotted. + + Notes + ----- + The mass error bubble plot was first introduced in [1]_. + + References + ---------- + .. [1] Barsnes,H., Eidhammer,I. and Martens,L. (2010) + FragmentationAnalyzer: An open-source tool to analyze MS/MS + fragmentation data. PROTEOMICS, 10, 1087–1090. + doi:10.1002/pmic.200900681 + + """ + if ax is None: + ax = plt.gca() + + _format_ax(ax, grid) + + if len(spec.mz) == 0: + ax.set_ylabel("Mass error") + ax.set_ylim(-1, 1) + return ax + + annotations = ( + spec.annotation + if spec.annotation is not None + else itertools.repeat(None, len(spec.mz)) + ) + + known_ions = [] + dot_colors = [] + mz_deltas = [] + mz_delta_units = [] + for ann in annotations: + # Use the first annotation in case there are multiple options. + ion_type = ann[0].ion_type[0] if ann is not None else None + is_known_ion = ion_type is not None and ion_type != "?" + known_ions.append(is_known_ion) + dot_colors.append(colors.get(ion_type if color_ions else None)) + mz_deltas.append(ann[0].mz_delta[0] if is_known_ion else 0.0) + mz_delta_units.append(ann[0].mz_delta[1] if is_known_ion else None) + + dot_colors = np.array(dot_colors) + mz_deltas = np.array(mz_deltas) + intensity_scaled = 500 * (spec.intensity / np.max(spec.intensity)) + mask = ( + np.ones_like(spec.mz, dtype=bool) + if plot_unknown + else np.array(known_ions) + ) + + for known_unit in ["ppm", "Da"]: + # Use `not any` instead of `all` to fail fast + if not any(u and u != known_unit for u in mz_delta_units): + annotation_unit = known_unit + break + else: + raise ValueError("Inconsistent or unknown mass units in annotations.") + if unit == "Da" and annotation_unit == "ppm": + mz_deltas = ppm_to_da(mz_deltas, spec.mz) + elif unit == "ppm" and annotation_unit == "Da": + mz_deltas = da_to_ppm(mz_deltas, spec.mz) + + y_lim = 1.2 * np.max(np.abs(mz_deltas)) + if y_lim > 0.0: + ax.set_ylim(-y_lim, y_lim) + ax.set_xlim(*_get_xlim(spec)) + ax.set_ylabel(f"Mass error ({unit or annotation_unit})") + + ax.scatter( + spec.mz[mask], + mz_deltas[mask], + s=intensity_scaled[mask], + c=dot_colors[mask], + alpha=0.5, + edgecolors="none", + ) + + return ax + + def mirror( spec_top: MsmsSpectrum, spec_bottom: MsmsSpectrum, @@ -286,3 +430,80 @@ def mirror( ) return ax + + +def facet( + spec_top: MsmsSpectrum, + spec_mass_errors: Optional[MsmsSpectrum] = None, + spec_bottom: Optional[MsmsSpectrum] = None, + spectrum_kws: Optional[Mapping[str, Any]] = None, + mass_errors_kws: Optional[Mapping[str, Any]] = None, + height: Optional[float] = None, + width: Optional[float] = None, +) -> plt.Figure: + """ + Plot a spectrum, and optionally mass errors, and a mirror spectrum. + + Parameters + ---------- + spec_top : MsmsSpectrum + The spectrum to be plotted on the top. + spec_mass_errors : Optional[MsmsSpectrum], optional + The spectrum for which mass errors are to be plotted in the middle. + spec_bottom : Optional[MsmsSpectrum], optional + The spectrum to be plotted on the bottom. + spectrum_kws : Optional[Mapping[str, Any]], optional + Keyword arguments for `plot.spectrum` for the top and bottom spectra. + mass_errors_kws : Optional[Mapping[str, Any]], optional + Keyword arguments for `plot.mass_errors`. + height : Optional[float], optional + The height of the figure in inches. + width : Optional[float], optional + The width of the figure in inches. + + Returns + ------- + plt.Figure + The matplotlib Figure instance on which the spectra and mass errors + are plotted. + """ + + n_rows = 1 + (spec_mass_errors is not None) + (spec_bottom is not None) + height_ratios = [1] + if spec_mass_errors is not None: + height_ratios.append(0.5) + if spec_bottom is not None: + height_ratios.append(1) + + fig, axes = plt.subplots( + *(n_rows, 1), + figsize=(width or 7.5, height or (3.75 if spec_bottom is None else 6)), + sharex=True, + gridspec_kw={"height_ratios": height_ratios}, + ) + axes = np.array(axes).flatten() + + spectrum(spec_top, ax=axes[0], **spectrum_kws or {}) + + if spec_mass_errors is not None: + mass_errors(spec_mass_errors, ax=axes[1], **mass_errors_kws or {}) + axes[0].get_xaxis().get_label().set_visible(False) + + if spec_bottom is not None: + spectrum( + spec_bottom, + mirror_intensity=True, + ax=axes[-1], + **spectrum_kws or {}, + ) + for ax in axes[:-1]: + ax.get_xaxis().get_label().set_visible(False) + + axes[-1].yaxis.set_major_formatter( + mticker.FuncFormatter(lambda x, pos: f"{abs(x):.0%}") + ) + + fig.align_ylabels(axes) + fig.tight_layout() + + return fig diff --git a/spectrum_utils/utils.py b/spectrum_utils/utils.py index 968e164..4c90f63 100644 --- a/spectrum_utils/utils.py +++ b/spectrum_utils/utils.py @@ -1,4 +1,7 @@ +from typing import Union + import numba as nb +import numpy as np @nb.njit(fastmath=True, cache=True) @@ -20,3 +23,45 @@ def mass_diff(mz1, mz2, mode_is_da): The mass difference(s) between the given m/z values. """ return mz1 - mz2 if mode_is_da else (mz1 - mz2) / mz2 * 10**6 + + +def da_to_ppm( + delta_mz: Union[int, np.ndarray], mz: Union[int, np.ndarray] +) -> Union[int, np.ndarray]: + """ + Convert a mass difference in Dalton to ppm. + + Parameters + ---------- + delta_mz : int or np.ndarray + Mass difference in Dalton. + mz : int or np.ndarray + m/z value of peak. + + Returns + ------- + int or np.ndarray + + """ + return delta_mz / mz * 1e6 + + +def ppm_to_da( + delta_mz: Union[int, np.ndarray], mz: Union[int, np.ndarray] +) -> Union[int, np.ndarray]: + """ + Convert a mass difference in ppm to Dalton. + + Parameters + ---------- + delta_mz : int or np.ndarray + Mass difference in ppm. + mz : int or np.ndarray + m/z value of peak. + + Returns + ------- + int or np.ndarray + + """ + return delta_mz / 1e6 * mz