From f25ace84af22ea0eefb0a96a705cfc7957912d13 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Tue, 2 Jul 2024 16:22:02 -0700
Subject: [PATCH 01/21] rough implementation

---
 casanovo/casanovo.py            |  90 +++++++++++++++++-
 casanovo/data/datasets.py       |  14 +--
 casanovo/data/db_utils.py       | 156 ++++++++++++++++++++++++++++++++
 casanovo/data/ms_io.py          |   4 +-
 casanovo/denovo/dataloaders.py  |  57 +-----------
 casanovo/denovo/model.py        |  62 ++++++-------
 casanovo/denovo/model_runner.py |  39 ++++++--
 7 files changed, 307 insertions(+), 115 deletions(-)
 create mode 100644 casanovo/data/db_utils.py

diff --git a/casanovo/casanovo.py b/casanovo/casanovo.py
index 7db5faa8..df3cc79f 100644
--- a/casanovo/casanovo.py
+++ b/casanovo/casanovo.py
@@ -214,8 +214,74 @@ def annotate(
     nargs=-1,
     type=click.Path(exists=True, dir_okay=False),
 )
+@click.argument(
+    "fasta_path",
+    required=True,
+    nargs=1,
+    type=click.Path(exists=True, dir_okay=False),
+)
+@click.option(
+    "--enzyme",
+    help="Enzyme for in silico digestion, see pyteomics.parser.expasy_rules",
+    type=str,
+    default="trypsin",
+)
+@click.option(
+    "--digestion",
+    help="Digestion: full, partial",
+    type=click.Choice(
+        ["full", "partial"],
+        case_sensitive=False,
+    ),
+    default="full",
+)
+@click.option(
+    "--missed_cleavages",
+    help="Number of allowed missed cleavages",
+    type=int,
+    default=0,
+)
+@click.option(
+    "--max_mods",
+    help="Maximum number of modifications per peptide",
+    type=int,
+    default=0,
+)
+@click.option(
+    "--min_length",
+    help="Minimum peptide length",
+    type=int,
+    default=6,
+)
+@click.option(
+    "--max_length",
+    help="Maximum peptide length",
+    type=int,
+    default=50,
+)
+@click.option(
+    "--precursor_tolerance",
+    help="Precursor tolerance window size (ppm)",
+    type=int,
+    default=20,
+)
+@click.option(
+    "--isotope_error",
+    help="Isotope error levels to consider (list of ints, e.g: 1,2)",
+    type=str,
+    default="0",
+)
 def db_search(
     peak_path: Tuple[str],
+    fasta_path: str,
+    enzyme: str,
+    digestion: str,
+    missed_cleavages: int,
+    max_mods: int,
+    min_length: int,
+    max_length: int,
+    precursor_tolerance: int,
+    isotope_error: str,
     model: Optional[str],
     config: Optional[str],
     output: Optional[str],
@@ -223,14 +289,30 @@ def db_search(
 ) -> None:
     """Perform a search using Casanovo-DB.
 
-    PEAK_PATH must be one MGF file that has ANNOTATED spectra,
-    as output by annotate mode.
+    PEAK_PATH must be one MGF file. FASTA_PATH must be one FASTA file.
     """
     output = setup_logging(output, verbosity)
     config, model = setup_model(model, config, output, False)
     with ModelRunner(config, model) as runner:
-        logger.info("DB-searching peptides from: %s", peak_path)
-        runner.db_search(peak_path, output)
+        logger.info("Performing database search on:")
+        for peak_file in peak_path:
+            logger.info("  %s", peak_file)
+        logger.info("Using the following FASTA file:")
+        logger.info("  %s", fasta_path)
+
+        runner.db_search(
+            peak_path,
+            fasta_path,
+            enzyme,
+            digestion,
+            missed_cleavages,
+            max_mods,
+            min_length,
+            max_length,
+            precursor_tolerance,
+            isotope_error,
+            output,
+        )
 
     logger.info("DONE!")
 
diff --git a/casanovo/data/datasets.py b/casanovo/data/datasets.py
index aff6af85..59f56b68 100644
--- a/casanovo/data/datasets.py
+++ b/casanovo/data/datasets.py
@@ -134,6 +134,8 @@ def _process_peaks(
             The precursor m/z.
         precursor_charge : int
             The precursor charge.
+        track_spectrum_id : Optional[bool]
+            Whether to keep track of the identifier of the MS/MS spectra.
 
         Returns
         -------
@@ -212,8 +214,6 @@ class AnnotatedSpectrumDataset(SpectrumDataset):
     random_state : Optional[int]
         The NumPy random state. ``None`` leaves mass spectra in the order they
         were parsed.
-    track_spectrum_id : Optional[bool]
-        Whether to keep track of the identifier of the MS/MS spectra.
     """
 
     def __init__(
@@ -225,7 +225,6 @@ def __init__(
         min_intensity: float = 0.01,
         remove_precursor_tol: float = 2.0,
         random_state: Optional[int] = None,
-        track_spectrum_id: Optional[bool] = False,
     ):
         super().__init__(
             annotated_spectrum_index,
@@ -236,7 +235,6 @@ def __init__(
             remove_precursor_tol=remove_precursor_tol,
             random_state=random_state,
         )
-        self.track_spectrum_id = track_spectrum_id
 
     def __getitem__(self, idx: int) -> Tuple[torch.Tensor, float, int, str]:
         """
@@ -268,12 +266,4 @@ def __getitem__(self, idx: int) -> Tuple[torch.Tensor, float, int, str]:
         spectrum = self._process_peaks(
             mz_array, int_array, precursor_mz, precursor_charge
         )
-        if self.track_spectrum_id:
-            return (
-                spectrum,
-                precursor_mz,
-                precursor_charge,
-                peptide,
-                self.get_spectrum_id(idx),
-            )
         return spectrum, precursor_mz, precursor_charge, peptide
diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
new file mode 100644
index 00000000..c961e35e
--- /dev/null
+++ b/casanovo/data/db_utils.py
@@ -0,0 +1,156 @@
+"""Unique methods used within db-search mode"""
+
+import os
+import depthcharge.masses
+from pyteomics import fasta, parser
+import bisect
+
+HYDROGEN = 1.007825035
+OXYGEN = 15.99491463
+H2O = 2 * HYDROGEN + OXYGEN
+PROTON = 1.00727646677
+ISOTOPE_SPACING = 1.003355  # - 0.00288
+
+var_mods = {
+    "d": ["N", "Q"],
+    "ox": ["M"],
+    "ace-": True,
+    "carb-": True,
+    "nh3x-": True,
+    "carbnh3x-": True,
+}
+fixed_mods = {"carbm": ["C"]}
+
+
+def convert_from_modx(seq):
+    """Converts peptide sequence from modX format to Casanovo-acceptable modifications.
+
+    Args:
+        seq (str): Peptide in modX format
+    """
+    seq = seq.replace("carbmC", "C+57.021")  # Fixed modification
+    seq = seq.replace("oxM", "M+15.995")
+    seq = seq.replace("dN", "N+0.984")
+    seq = seq.replace("dQ", "Q+0.984")
+    seq = seq.replace("ace-", "+42.011")
+    seq = seq.replace("carbnh3x-", "+43.006-17.027")
+    seq = seq.replace("carb-", "+43.006")
+    seq = seq.replace("nh3x-", "-17.027")
+    return seq
+
+
+def digest_fasta(
+    fasta_filename,
+    enzyme,
+    digestion,
+    missed_cleavages,
+    max_mods,
+    min_length,
+    max_length,
+):
+    """TODO: Add docstring"""
+
+    # Verify the eistence of the file:
+    if not os.path.isfile(fasta_filename):
+        print(f"File {fasta_filename} does not exist.")
+        raise FileNotFoundError(f"File {fasta_filename} does not exist.")
+
+    fasta_data = fasta.read(fasta_filename)
+    peptide_list = []
+    if digestion in ["full", "partial"]:
+        semi = True if digestion == "partial" else False
+        for header, seq in fasta_data:
+            pep_set = parser.cleave(
+                seq,
+                rule=parser.expasy_rules[enzyme],
+                missed_cleavages=missed_cleavages,
+                semi=semi,
+            )
+            protein = header.split()[0]
+            peptide_list.extend([(pep, protein) for pep in pep_set])
+    else:
+        raise ValueError(f"Digestion type {digestion} not recognized.")
+
+    # Generate modified peptides
+    mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
+    mass_calculator.masses.update({"X": 0.0})  # TODO: REMOVE?
+    mod_peptide_list = []
+    for pep, prot in peptide_list:
+        if len(pep) < min_length or len(pep) > max_length:
+            continue
+        peptide_isoforms = parser.isoforms(
+            pep,
+            variable_mods=var_mods,
+            fixed_mods=fixed_mods,
+            max_mods=max_mods,
+        )
+        peptide_isoforms = list(map(convert_from_modx, peptide_isoforms))
+        mod_peptide_list.extend(
+            (mod_pep, mass_calculator.mass(mod_pep), prot)
+            for mod_pep in peptide_isoforms
+        )
+
+    # Sort the peptides by mass and return.
+    mod_peptide_list.sort(key=lambda x: x[1])
+    return mod_peptide_list
+
+
+def get_candidates(
+    precursor_mass, charge, peptide_list, precursor_tolerance, isotope_error
+):
+    """TODO: ADD DOCSTRING"""
+
+    candidates = set()
+
+    isotope_error = [int(x) for x in isotope_error.split(",")]
+    for e in isotope_error:
+        iso_shift = ISOTOPE_SPACING * e
+        upper_bound = (_to_raw_mass(precursor_mass, charge) - iso_shift) * (
+            1 + (precursor_tolerance / 1e6)
+        )
+        lower_bound = (_to_raw_mass(precursor_mass, charge) - iso_shift) * (
+            1 - (precursor_tolerance / 1e6)
+        )
+
+        start, end = get_mass_indices(
+            [x[1] for x in peptide_list], lower_bound, upper_bound
+        )
+
+        candidates.update(peptide_list[start:end])
+
+    candidates = list(candidates)
+    candidates.sort(key=lambda x: x[1])
+    return candidates
+
+
+def _to_mz(precursor_mass, charge):
+    """TODO: ADD DOCSTRING"""
+    return (precursor_mass + (charge * PROTON)) / charge
+
+
+def _to_raw_mass(mz_mass, charge):
+    """TODO: ADD DOCSTRING"""
+    return charge * (mz_mass - PROTON)
+
+
+def get_mass_indices(masses, m_low, m_high):
+    """Grabs mass indices from a list of mass values that fall within a specified range.
+    Requires that the mass values are sorted in ascending order.
+
+    Parameters
+    ----------
+    masses : List[int]
+        List of mass values
+    m_low : int
+        Lower bound of mass range (inclusive)
+    m_high : int
+        Upper bound of mass range (inclusive)
+
+    Return
+    ------
+    indices : Tuple[int, int]
+        Indices of mass values that fall within the specified range
+    """
+    start = bisect.bisect_left(masses, m_low)
+    end = bisect.bisect_right(masses, m_high)
+    return start, end
diff --git a/casanovo/data/ms_io.py b/casanovo/data/ms_io.py
index c4cfc7cb..d47b9b04 100644
--- a/casanovo/data/ms_io.py
+++ b/casanovo/data/ms_io.py
@@ -251,7 +251,6 @@ def save_db_variant(self) -> None:
                     "start",
                     "end",
                     "opt_ms_run[1]_aa_scores",
-                    "opt_cv_MS:1002217_decoy_peptide",
                 ]
             )
             for i, psm in enumerate(self.psms):
@@ -259,7 +258,7 @@ def save_db_variant(self) -> None:
                     [
                         "PSM",
                         psm[0],  # sequence
-                        f"{psm[5]}:{i}",  # spectra_ref
+                        f"{psm[5]}:{i}",  # PSM_ID (spectrum # :candidate #)
                         "null",  # accession
                         "null",  # unique
                         "null",  # database
@@ -284,6 +283,5 @@ def save_db_variant(self) -> None:
                                 )
                             )
                         ),  # opt_ms_run[1]_aa_scores
-                        bool(psm[7]),  # opt_cv_MS:1002217_decoy_peptide
                     ]
                 )
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index aff860a1..ba02936c 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -127,13 +127,12 @@ def setup(self, stage: str = None, annotated: bool = True) -> None:
                 self.test_dataset = make_dataset(self.test_index)
         if stage == "db":
             make_dataset = functools.partial(
-                AnnotatedSpectrumDataset,
+                SpectrumDataset,
                 n_peaks=self.n_peaks,
                 min_mz=self.min_mz,
                 max_mz=self.max_mz,
                 min_intensity=self.min_intensity,
                 remove_precursor_tol=self.remove_precursor_tol,
-                track_spectrum_id=True,
             )
             if self.test_index is not None:
                 self.test_dataset = make_dataset(self.test_index)
@@ -143,7 +142,6 @@ def _make_loader(
         dataset: torch.utils.data.Dataset,
         batch_size: int,
         shuffle: bool = False,
-        db_mode: bool = False,
     ) -> torch.utils.data.DataLoader:
         """
         Create a PyTorch DataLoader.
@@ -167,7 +165,7 @@ def _make_loader(
         return torch.utils.data.DataLoader(
             dataset,
             batch_size=batch_size,
-            collate_fn=prepare_batch if not db_mode else prepare_db_batch,
+            collate_fn=prepare_batch,
             pin_memory=True,
             num_workers=self.n_workers,
             shuffle=shuffle,
@@ -191,12 +189,6 @@ def predict_dataloader(self) -> torch.utils.data.DataLoader:
         """Get the predict DataLoader."""
         return self._make_loader(self.test_dataset, self.eval_batch_size)
 
-    def db_dataloader(self) -> torch.utils.data.DataLoader:
-        """Get the predict DataLoader."""
-        return self._make_loader(
-            self.test_dataset, self.eval_batch_size, db_mode=True
-        )
-
 
 def prepare_batch(
     batch: List[Tuple[torch.Tensor, float, int, str]]
@@ -235,48 +227,3 @@ def prepare_batch(
         [precursor_masses, precursor_charges, precursor_mzs]
     ).T.float()
     return spectra, precursors, np.asarray(spectrum_ids)
-
-
-def prepare_db_batch(
-    batch: List[Tuple[torch.Tensor, float, int, str, Tuple[str, str]]]
-) -> Tuple[torch.Tensor, torch.Tensor, np.ndarray, Tuple[str, str]]:
-    """
-    Collate MS/MS spectra into a batch meant for Casanovo-DB.
-
-    Parameters
-    ----------
-    batch : List[Tuple[torch.Tensor, Tuple[float, int, float], str, Tuple[str, str]]]
-        A batch of data from an AnnotatedSpectrumDataset, consisting of for each
-        spectrum (i) a tensor with the m/z and intensity peak values,
-        (ii) the precursor information [mass, charge, m/z], (iii) the
-        peptide sequence, the precursor m/z, (iv) spectrum identifiers
-        (file and scan).
-
-    Returns
-    -------
-    spectra : torch.Tensor of shape (batch_size, n_peaks, 2)
-        The padded mass spectra tensor with the m/z and intensity peak values
-        for each spectrum.
-    precursors : torch.Tensor of shape (batch_size, 3)
-        A tensor with the precursor neutral mass, precursor charge, and
-        precursor m/z.
-    spectrum_peps : np.ndarray
-        Peptide sequences
-    spectrum_ids : Tuple[str, str]
-        Peak file and spectrum identifier
-    """
-    (
-        spectra,
-        precursor_mzs,
-        precursor_charges,
-        spectrum_peps,
-        spectrum_ids,
-    ) = list(zip(*batch))
-    spectra = torch.nn.utils.rnn.pad_sequence(spectra, batch_first=True)
-    precursor_mzs = torch.tensor(precursor_mzs)
-    precursor_charges = torch.tensor(precursor_charges)
-    precursor_masses = (precursor_mzs - 1.007276) * precursor_charges
-    precursors = torch.vstack(
-        [precursor_masses, precursor_charges, precursor_mzs]
-    ).T.float()
-    return spectra, precursors, np.asarray(spectrum_peps), spectrum_ids
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 71f4a6fa..be7dba9a 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -17,7 +17,7 @@
 
 from . import evaluate
 from .. import config
-from ..data import ms_io
+from ..data import ms_io, db_utils
 
 logger = logging.getLogger("casanovo")
 
@@ -1009,19 +1009,18 @@ def predict_step(self, batch, *args):
         ----------
         batch : Tuple[torch.Tensor, torch.Tensor, torch.Tensor]
             A batch of (i) MS/MS spectra, (ii) precursor information, (iii)
-            spectrum identifiers as torch Tensors, (iv) scan numbers.
+            spectrum identifiers as torch Tensors
 
         Returns
         -------
-        predictions: List[Tuple[int, bool, str, float, np.ndarray, np.ndarray]]
+        predictions: List[Tuple[int, str, float, np.ndarray, np.ndarray]]
             Model predictions for the given batch of spectra containing spectrum
-            scan number, decoy flag, peptide sequence, Casanovo-DB score,
+            scan number, peptide sequence, Casanovo-DB score,
             amino acid-level confidence scores, and precursor information.
         """
         batch_res = []
         for (
             indexes,
-            is_decoy,
             peptides,
             precursors,
             encoded_ms,
@@ -1034,7 +1033,6 @@ def predict_step(self, batch, *args):
             batch_res.append(
                 (
                     indexes,
-                    is_decoy,
                     peptides,
                     score_result.cpu().detach().numpy(),
                     per_aa_score.cpu().detach().numpy(),
@@ -1043,27 +1041,25 @@ def predict_step(self, batch, *args):
             )
         return batch_res
 
-    def smart_batch_gen(self, batch):
+    def smart_batch_gen(self, spectrum_batch):
+        """TODO: ADD DOCSTRING"""
         all_psm = []
-        batch_size = len(batch[0])
-        enc = self.encoder(batch[0])
-        precursors = batch[1]
-        indexes = batch[3]
+        batch_size = len(spectrum_batch[0])
+        enc = self.encoder(spectrum_batch[0])
         enc = list(zip(*enc))
+        precursors = spectrum_batch[1]
+        indexes = spectrum_batch[2]
         for idx in range(batch_size):
-            spec_peptides = batch[2][idx].split(",")
-            # Check for decoy prefixes and create a bit-vector indicating targets (1) or decoys (0)
-            decoy_prefix = "decoy_"  # Decoy prefix
-            id_decoys = np.array(
-                [
-                    (0, p.removeprefix(decoy_prefix))
-                    if p.startswith(decoy_prefix)
-                    else (1, p)
-                    for p in spec_peptides
-                ]
+            spec_peptides = db_utils.get_candidates(
+                precursors[idx][2],
+                precursors[idx][1],
+                self.digest,
+                self.precursor_tolerance,
+                self.isotope_error,
             )
-            decoy_mask = np.array(id_decoys[:, 0], dtype=bool)
-            spec_peptides = list(id_decoys[:, 1])
+            spec_peptides = [
+                a[0] for a in spec_peptides
+            ]  # TODO: USE MASS AND PROTEIN INFORMATION
             spec_precursors = [precursors[idx]] * len(spec_peptides)
             spec_enc = [enc[idx]] * len(spec_peptides)
             spec_idx = [indexes[idx]] * len(spec_peptides)
@@ -1074,24 +1070,22 @@ def smart_batch_gen(self, batch):
                         spec_precursors,
                         spec_peptides,
                         spec_idx,
-                        decoy_mask,
                     )
                 )
             )
         # Continually grab num_pairs items from all_psm until list is exhausted
         while len(all_psm) > 0:
-            batch = all_psm[:batch_size]
+            psm_batch = all_psm[:batch_size]
             all_psm = all_psm[batch_size:]
-            batch = list(zip(*batch))
+            psm_batch = list(zip(*psm_batch))
             encoded_ms = (
-                torch.stack([a[0] for a in batch[0]]),
-                torch.stack([a[1] for a in batch[0]]),
+                torch.stack([a[0] for a in psm_batch[0]]),
+                torch.stack([a[1] for a in psm_batch[0]]),
             )
-            prec_data = torch.stack(batch[1])
-            pep_str = list(batch[2])
-            indexes = [a[1] for a in batch[3]]
-            is_decoy = batch[4]
-            yield (indexes, is_decoy, pep_str, prec_data, encoded_ms)
+            prec_data = torch.stack(psm_batch[1])
+            pep_str = list(psm_batch[2])
+            indexes = [a[1] for a in psm_batch[3]]
+            yield (indexes, pep_str, prec_data, encoded_ms)
 
     def on_predict_batch_end(
         self,
@@ -1102,7 +1096,6 @@ def on_predict_batch_end(
             return
         for (
             indexes,
-            t_or_d,
             peptides,
             score_result,
             per_aa_score,
@@ -1123,7 +1116,6 @@ def on_predict_batch_end(
                 calc_mz,
                 indexes,
                 per_aa_score,
-                t_or_d,
             ):
                 self.out_writer.psms.append(row)
 
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index e150ab2d..73dfdff2 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -18,7 +18,7 @@
 from lightning.pytorch.callbacks import ModelCheckpoint
 
 from ..config import Config
-from ..data import ms_io
+from ..data import ms_io, db_utils
 from ..denovo.dataloaders import DeNovoDataModule
 from ..denovo.model import Spec2Pep, DbSpec2Pep
 
@@ -79,13 +79,29 @@ def __exit__(self, exc_type, exc_value, traceback):
         if self.writer is not None:
             self.writer.save()
 
-    def db_search(self, peak_path: Iterable[str], output: str) -> None:
+    def db_search(
+        self,
+        peak_path: Iterable[str],
+        fasta_path: str,
+        enzyme: str,
+        digestion: str,
+        missed_cleavages: int,
+        max_mods: int,
+        min_length: int,
+        max_length: int,
+        precursor_tolerance: float,
+        isotope_error: float,
+        output: str,
+    ) -> None:
         """Perform database search with Casanovo.
 
         Parameters
         ----------
-        peak_path : iterable of str
-            The path to the annotated .mgf data files for database search.
+        peak_path : Iterable[str]
+            The path to the .mgf data file for database search.
+        fasta_path : str
+            The path to the FASTA file for database search.
+        # TODO: ADD ALL DOCUMENTATION
         output : str
             Where should the output be saved?
 
@@ -105,12 +121,23 @@ def db_search(self, peak_path: Iterable[str], output: str) -> None:
         self.initialize_trainer(train=True)
         self.initialize_model(train=False, db_search=True)
         self.model.out_writer = self.writer
+        self.model.digest = db_utils.digest_fasta(
+            fasta_path,
+            enzyme,
+            digestion,
+            missed_cleavages,
+            max_mods,
+            min_length,
+            max_length,
+        )
+        self.model.precursor_tolerance = precursor_tolerance
+        self.model.isotope_error = isotope_error
 
-        test_index = self._get_index(peak_path, True, "db search")
+        test_index = self._get_index(peak_path, False, "db search")
         self.writer.set_ms_run(test_index.ms_files)
         self.initialize_data_module(test_index=test_index)
         self.loaders.setup(stage="db")
-        self.trainer.predict(self.model, self.loaders.db_dataloader())
+        self.trainer.predict(self.model, self.loaders.predict_dataloader())
 
     def train(
         self,

From f7dfbc8356d8993c219dbfaeccf59753f555fa07 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Tue, 2 Jul 2024 19:54:56 -0700
Subject: [PATCH 02/21] tested implementation of db search

---
 casanovo/casanovo.py            | 107 ++++----
 casanovo/data/annotate_db.py    | 138 ----------
 casanovo/data/db_utils.py       | 109 ++++++--
 casanovo/data/ms_io.py          |   2 +-
 casanovo/denovo/model.py        |  32 ++-
 casanovo/denovo/model_runner.py |  19 +-
 tests/conftest.py               |  51 +++-
 tests/test_integration.py       |  99 +-------
 tests/unit_tests/test_unit.py   | 430 +++++++++++++++++++++++++++++++-
 9 files changed, 666 insertions(+), 321 deletions(-)
 delete mode 100644 casanovo/data/annotate_db.py

diff --git a/casanovo/casanovo.py b/casanovo/casanovo.py
index df3cc79f..8ae9a81b 100644
--- a/casanovo/casanovo.py
+++ b/casanovo/casanovo.py
@@ -42,7 +42,6 @@
 from . import utils
 from .denovo import ModelRunner
 from .config import Config
-from .data.annotate_db import annotate_mgf
 
 logger = logging.getLogger("casanovo")
 click.rich_click.USE_MARKDOWN = True
@@ -146,67 +145,6 @@ def sequence(
     logger.info("DONE!")
 
 
-@main.command()
-@click.argument(
-    "peak_path",
-    required=True,
-    nargs=1,
-    type=click.Path(exists=True, dir_okay=False),
-)
-@click.argument(
-    "tide_path",
-    required=True,
-    nargs=1,
-    type=click.Path(exists=True, dir_okay=True),
-)
-@click.option(
-    "-o",
-    "--output",
-    help="The output annotated MGF file.",
-    type=click.Path(dir_okay=False),
-)
-@click.option(
-    "-v",
-    "--verbosity",
-    help="""
-    Set the verbosity of console logging messages. Log files are
-    always set to 'debug'.
-    """,
-    type=click.Choice(
-        ["debug", "info", "warning", "error"],
-        case_sensitive=False,
-    ),
-    default="info",
-)
-def annotate(
-    peak_path: str,
-    tide_path: str,
-    output: Optional[str],
-    verbosity: str,
-) -> None:
-    """Annotate a given .mgf with candidates as selected by a Tide search for Casanovo-DB.
-
-    PEAK_PATH must be one MGF file from which to annotate spectra.
-
-    TIDE_PATH must be one directory containing the Tide search results of the <PEAK_PATH> .mgf.
-    This directory must contain tide-search.decoy.txt and tide-search.target.txt
-    """
-    if output is None:
-        output = setup_logging(output, verbosity)
-        logger.info(
-            "Output file not specified. \
-            Annotated MGF will be saved in the same directory \
-            as the input MGF."
-        )
-        output = peak_path.replace(".mgf", "_annotated.mgf")
-    else:
-        output = setup_logging(output, verbosity)
-
-    annotate_mgf(peak_path, tide_path, output)
-
-    logger.info("DONE!")
-
-
 @main.command(cls=_SharedParams)
 @click.argument(
     "peak_path",
@@ -222,8 +160,47 @@ def annotate(
 )
 @click.option(
     "--enzyme",
-    help="Enzyme for in silico digestion, see pyteomics.parser.expasy_rules",
-    type=str,
+    help="Enzyme for in silico digestion, \
+    See pyteomics.parser.expasy_rules for valid enzymes",
+    type=click.Choice(
+        [
+            "arg-c",
+            "asp-n",
+            "bnps-skatole",
+            "caspase 1",
+            "caspase 2",
+            "caspase 3",
+            "caspase 4",
+            "caspase 5",
+            "caspase 6",
+            "caspase 7",
+            "caspase 8",
+            "caspase 9",
+            "caspase 10",
+            "chymotrypsin high specificity",
+            "chymotrypsin low specificity",
+            "clostripain",
+            "cnbr",
+            "enterokinase",
+            "factor xa",
+            "formic acid",
+            "glutamyl endopeptidase",
+            "granzyme b",
+            "hydroxylamine",
+            "iodosobenzoic acid",
+            "lysc",
+            "ntcb",
+            "pepsin ph1.3",
+            "pepsin ph2.0",
+            "proline endopeptidase",
+            "proteinase k",
+            "staphylococcal peptidase i",
+            "thermolysin",
+            "thrombin",
+            "trypsin",
+            "trypsin_exception",
+        ]
+    ),
     default="trypsin",
 )
 @click.option(
@@ -287,7 +264,7 @@ def db_search(
     output: Optional[str],
     verbosity: str,
 ) -> None:
-    """Perform a search using Casanovo-DB.
+    """Perform a database search on MS/MS data using Casanovo-DB.
 
     PEAK_PATH must be one MGF file. FASTA_PATH must be one FASTA file.
     """
diff --git a/casanovo/data/annotate_db.py b/casanovo/data/annotate_db.py
deleted file mode 100644
index dd2e6c64..00000000
--- a/casanovo/data/annotate_db.py
+++ /dev/null
@@ -1,138 +0,0 @@
-"""Methods used to annotate an .mgf so that it can be used by Casanovo-DB"""
-
-from pathlib import Path
-from typing import Optional, Tuple
-import os
-import re
-import logging
-
-import pandas as pd
-import pyteomics.mgf as mgf
-
-
-def _normalize_mods(seq: str) -> str:
-    """
-    Turns tide-style modifications into the format used by Casanovo-DB.
-
-        Parameters
-        ----------
-        seq : str
-            The peptide sequence with tide-style modifications.
-
-        Returns
-        -------
-        str
-            The peptide sequence with Casanovo-DB-style modifications.
-    """
-    logger = logging.getLogger("casanovo")
-    seq = seq.replace("C", "C+57.021")
-    seq = re.sub(r"M\[15\.[0-9]*\]", r"M+15.995", seq)
-    seq = re.sub(r"N\[0\.9[0-9]*\]", r"N+0.984", seq)
-    seq = re.sub(r"Q\[0\.9[0-9]*\]", r"Q+0.984", seq)
-    seq = re.sub(r"(.*)\[42\.[0-9]*\]", r"+42.011\1", seq)
-    seq = re.sub(r"(.*)\[43\.[0-9]*\]", r"+43.006\1", seq)
-    seq = re.sub(r"(.*)\[\-17\.[0-9]*\]", r"-17.027\1", seq)
-    seq = re.sub(r"(.*)\[25\.[0-9]*\]", r"+43.006-17.027\1", seq)
-    return seq
-
-
-def annotate_mgf(peak_path: str, tide_path: str, output: Optional[str]):
-    """
-    Accepts a directory containing the results of a successful tide search,
-    and an .mgf file containing MS/MS spectra.
-    The .mgf file is then annotated in the SEQ field with
-    all of the candidate peptides for each spectrum, as well as their target/decoy status.
-    This annotated .mgf can be given directly to Casanovo-DB to perfrom a database search.
-
-        Parameters
-        ----------
-        tide_dir_path : str
-            Path to the directory containing the results of a successful tide search.
-        mgf_file : str
-            Path to the .mgf file containing MS/MS spectra.
-        output_file : str
-            Path to where the annotated .mgf will be written.
-
-    """
-    logger = logging.getLogger("casanovo")
-    # Get paths to tide search text files
-    tdf_path = os.path.join(tide_path, "tide-search.target.txt")
-    ddf_path = os.path.join(tide_path, "tide-search.decoy.txt")
-    try:
-        target_df = pd.read_csv(
-            tdf_path, sep="\t", usecols=["scan", "sequence", "target/decoy"]
-        )
-        decoy_df = pd.read_csv(
-            ddf_path, sep="\t", usecols=["scan", "sequence", "target/decoy"]
-        )
-    except FileNotFoundError as e:
-        logger.error(
-            "Could not find tide search results in the specified directory. "
-            "Please ensure that the directory contains the following files: "
-            "tide-search.target.txt and tide-search.decoy.txt"
-        )
-        raise e
-
-    logger.info("Successfully read tide search results from %s.", tide_path)
-
-    df = pd.concat([target_df, decoy_df])
-    scan_groups = df.groupby("scan")[["sequence", "target/decoy"]]
-
-    scan_map = {}
-
-    for scan, item in scan_groups:
-        td_group = item.groupby("target/decoy")["sequence"].apply(list)
-        if "target" in td_group.index:
-            target_candidate_list = list(
-                map(
-                    _normalize_mods,
-                    td_group["target"],
-                )
-            )
-        else:
-            target_candidate_list = []
-            logger.warn(f"No target peptides found for scan {scan}.")
-        if "decoy" in td_group.index:
-            decoy_candidate_list = list(
-                map(
-                    _normalize_mods,
-                    td_group["decoy"],
-                )
-            )
-            decoy_candidate_list = list(
-                map(lambda x: "decoy_" + str(x), decoy_candidate_list)
-            )
-        else:
-            decoy_candidate_list = []
-            logger.warn(f"No decoy peptides found for scan {scan}.")
-
-        pep_list = target_candidate_list + decoy_candidate_list
-        if len(pep_list) == 0:
-            logger.warn(f"No peptides found for scan {scan}.")
-        else:
-            scan_map[scan] = target_candidate_list + decoy_candidate_list
-
-    all_spec = []
-    for idx, spec_dict in enumerate(mgf.read(peak_path)):
-        try:
-            scan = int(spec_dict["params"]["scans"])
-        except KeyError as e:
-            logger.error(
-                "Could not find the scan number in the .mgf file."
-                "Please ensure that the .mgf file contains the scan number in the 'SCANS' field."
-            )
-            raise e
-        try:
-            spec_dict["params"]["seq"] = ",".join(list(scan_map[scan]))
-            all_spec.append(spec_dict)
-        except KeyError as e:
-            # No need to do anything if the scan is not found in the scan map
-            pass
-    try:
-        output = str(output)
-        mgf.write(all_spec, output, file_mode="w")
-        logger.info("Annotated .mgf file written to %s.", output)
-    except Exception as e:
-        logger.error(
-            "Write to %s failed. Check if the file path is correct.", output
-        )
diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index c961e35e..341a6162 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -5,11 +5,14 @@
 from pyteomics import fasta, parser
 import bisect
 
+from typing import List, Tuple
+
+# CONSTANTS
 HYDROGEN = 1.007825035
 OXYGEN = 15.99491463
 H2O = 2 * HYDROGEN + OXYGEN
 PROTON = 1.00727646677
-ISOTOPE_SPACING = 1.003355  # - 0.00288
+ISOTOPE_SPACING = 1.003355
 
 var_mods = {
     "d": ["N", "Q"],
@@ -22,7 +25,7 @@
 fixed_mods = {"carbm": ["C"]}
 
 
-def convert_from_modx(seq):
+def convert_from_modx(seq: str):
     """Converts peptide sequence from modX format to Casanovo-acceptable modifications.
 
     Args:
@@ -40,15 +43,41 @@ def convert_from_modx(seq):
 
 
 def digest_fasta(
-    fasta_filename,
-    enzyme,
-    digestion,
-    missed_cleavages,
-    max_mods,
-    min_length,
-    max_length,
+    fasta_filename: str,
+    enzyme: str,
+    digestion: str,
+    missed_cleavages: int,
+    max_mods: int,
+    min_length: int,
+    max_length: int,
 ):
-    """TODO: Add docstring"""
+    """
+    Digests a FASTA file and returns the peptides, their masses, and associated protein.
+
+    Parameters
+    ----------
+    fasta_filename : str
+        Path to the FASTA file.
+    enzyme : str
+        The enzyme to use for digestion.
+        See pyteomics.parser.expasy_rules for valid enzymes.
+    digestion : str
+        The type of digestion to perform. Either 'full' or 'partial'.
+    missed_cleavages : int
+        The number of missed cleavages to allow.
+    max_mods : int
+        The maximum number of modifications to allow per peptide.
+    min_length : int
+        The minimum length of peptides to consider.
+    max_length : int
+        The maximum length of peptides to consider.
+
+    Returns
+    -------
+    mod_peptide_list : List[Tuple[str, float, str]]
+        A list of tuples containing the peptide sequence, mass,
+        and associated protein. Sorted by neutral mass in ascending order.
+    """
 
     # Verify the eistence of the file:
     if not os.path.isfile(fasta_filename):
@@ -96,19 +125,39 @@ def digest_fasta(
 
 
 def get_candidates(
-    precursor_mass, charge, peptide_list, precursor_tolerance, isotope_error
+    precursor_mz: float,
+    charge: int,
+    peptide_list: List[Tuple[str, float, str]],
+    precursor_tolerance: int,
+    isotope_error: str,
 ):
-    """TODO: ADD DOCSTRING"""
+    """
+    Returns a list of candidate peptides that fall within the specified mass range.
+
+    Parameters
+    ----------
+    precursor_mz : float
+        The precursor mass-to-charge ratio.
+    charge : int
+        The precursor charge.
+    peptide_list : List[Tuple[str, float, str]]
+        A list of tuples containing the peptide sequence, mass, and associated protein.
+        Must be sorted by mass in ascending order. Uses neutral masses.
+    precursor_tolerance : float
+        The precursor mass tolerance in parts-per-million.
+    isotope_error : str
+        The isotope error levels to consider.
+    """
 
     candidates = set()
 
     isotope_error = [int(x) for x in isotope_error.split(",")]
     for e in isotope_error:
         iso_shift = ISOTOPE_SPACING * e
-        upper_bound = (_to_raw_mass(precursor_mass, charge) - iso_shift) * (
+        upper_bound = (_to_raw_mass(precursor_mz, charge) - iso_shift) * (
             1 + (precursor_tolerance / 1e6)
         )
-        lower_bound = (_to_raw_mass(precursor_mass, charge) - iso_shift) * (
+        lower_bound = (_to_raw_mass(precursor_mz, charge) - iso_shift) * (
             1 - (precursor_tolerance / 1e6)
         )
 
@@ -124,12 +173,40 @@ def get_candidates(
 
 
 def _to_mz(precursor_mass, charge):
-    """TODO: ADD DOCSTRING"""
+    """
+    Convert precursor neutral mass to m/z value.
+
+    Parameters
+    ----------
+    precursor_mass : float
+        The precursor neutral mass.
+    charge : int
+        The precursor charge.
+
+    Returns
+    -------
+    mz : float
+        The calculated precursor mass-to-charge ratio.
+    """
     return (precursor_mass + (charge * PROTON)) / charge
 
 
 def _to_raw_mass(mz_mass, charge):
-    """TODO: ADD DOCSTRING"""
+    """
+    Convert precursor m/z value to neutral mass.
+
+    Parameters
+    ----------
+    mz_mass : float
+        The precursor mass-to-charge ratio.
+    charge : int
+        The precursor charge.
+
+    Returns
+    -------
+    mass : float
+        The calculated precursor neutral mass.
+    """
     return charge * (mz_mass - PROTON)
 
 
diff --git a/casanovo/data/ms_io.py b/casanovo/data/ms_io.py
index d47b9b04..a701b627 100644
--- a/casanovo/data/ms_io.py
+++ b/casanovo/data/ms_io.py
@@ -221,7 +221,7 @@ def save_db_variant(self) -> None:
         Export the Casanovo-DB search results to the mzTab file.
 
         Outputs PSMs in the order they were scored
-        (i.e. the order in the annotated .mgf file).
+        (i.e. the order in the .mgf file).
         """
         with open(self.filename, "w", newline="") as f:
             writer = csv.writer(f, delimiter="\t", lineterminator=os.linesep)
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index be7dba9a..4d9bd41b 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1009,7 +1009,7 @@ def predict_step(self, batch, *args):
         ----------
         batch : Tuple[torch.Tensor, torch.Tensor, torch.Tensor]
             A batch of (i) MS/MS spectra, (ii) precursor information, (iii)
-            spectrum identifiers as torch Tensors
+            spectrum identifiers as torch Tensors.
 
         Returns
         -------
@@ -1042,7 +1042,21 @@ def predict_step(self, batch, *args):
         return batch_res
 
     def smart_batch_gen(self, spectrum_batch):
-        """TODO: ADD DOCSTRING"""
+        """
+        Transforms a batch of spectra into multiple equally-sized batches of PSMs.
+
+        Parameters
+        ----------
+        spectrum batch : Tuple[torch.Tensor, torch.Tensor, torch.Tensor]
+            A batch of (i) MS/MS spectra, (ii) precursor information, (iii)
+            spectrum identifiers as torch Tensors.
+
+        Yields
+        -------
+        psm_batch: Tuple[List[int], List[str], torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]
+            A batch of PSMs containing the spectrum index, peptide sequence,
+            precursor information, and encoded MS/MS spectra.
+        """
         all_psm = []
         batch_size = len(spectrum_batch[0])
         enc = self.encoder(spectrum_batch[0])
@@ -1050,16 +1064,22 @@ def smart_batch_gen(self, spectrum_batch):
         precursors = spectrum_batch[1]
         indexes = spectrum_batch[2]
         for idx in range(batch_size):
-            spec_peptides = db_utils.get_candidates(
+            digest_data = db_utils.get_candidates(
                 precursors[idx][2],
                 precursors[idx][1],
                 self.digest,
                 self.precursor_tolerance,
                 self.isotope_error,
             )
-            spec_peptides = [
-                a[0] for a in spec_peptides
-            ]  # TODO: USE MASS AND PROTEIN INFORMATION
+            logger.debug("%s", digest_data)
+            try:
+                spec_peptides, pep_masses, pep_protein = list(
+                    zip(*digest_data)
+                )
+            except ValueError:
+                logger.info(
+                    "No peptides found for precursor %s", precursors[idx]
+                )
             spec_precursors = [precursors[idx]] * len(spec_peptides)
             spec_enc = [enc[idx]] * len(spec_peptides)
             spec_idx = [indexes[idx]] * len(spec_peptides)
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index 73dfdff2..284acbe8 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -90,7 +90,7 @@ def db_search(
         min_length: int,
         max_length: int,
         precursor_tolerance: float,
-        isotope_error: float,
+        isotope_error: str,
         output: str,
     ) -> None:
         """Perform database search with Casanovo.
@@ -101,7 +101,22 @@ def db_search(
             The path to the .mgf data file for database search.
         fasta_path : str
             The path to the FASTA file for database search.
-        # TODO: ADD ALL DOCUMENTATION
+        enzyme : str
+            The enzyme used for digestion.
+        digestion : str
+            The digestion type, full or partial.
+        missed_cleavages : int
+            The number of missed cleavages allowed.
+        max_mods : int
+            The maximum number of modifications allowed per peptide.
+        min_length : int
+            The minimum peptide length.
+        max_length : int
+            The maximum peptide length.
+        precursor_tolerance : float
+            The precursor mass tolerance in ppm.
+        isotope_error : str
+            Isotope error levels to consider, in comma-delineated string form.
         output : str
             Where should the output be saved?
 
diff --git a/tests/conftest.py b/tests/conftest.py
index eed4f39a..cac1a873 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -5,7 +5,7 @@
 import psims
 import pytest
 import yaml
-from pyteomics.mass import calculate_mass
+from pyteomics.mass import calculate_mass, fast_mass, std_aa_mass
 
 
 @pytest.fixture
@@ -263,6 +263,36 @@ def tiny_config(tmp_path):
     return cfg_file
 
 
+@pytest.fixture
+def tiny_fasta_file(tmp_path, fasta_raw_data):
+    fasta_file = tmp_path / "tiny_fasta.fasta"
+    with fasta_file.open("w+") as fasta_ref:
+        fasta_ref.write(fasta_raw_data)
+
+    return fasta_file
+
+
+@pytest.fixture
+def fasta_raw_data():
+    return ">foo\nMEAPAQLLFLLLLWLPDTTREIVMTQSPPTLSLSPGERVTLSCRASQSVSSSYLTWYQQKPGQAPRLLIYGASTRATSIPARFSGSGSGTDFTLTISSLQPEDFAVYYCQQDYNLP"
+
+
+@pytest.fixture
+def mgf_db_search(tmp_path):
+    """An MGF file with 2 unannotated spectra and scan numbers."""
+    peptides = [
+        "ATSIPAR",
+        "VTLSCR",
+        "LLIYGASTR",
+        "EIVMTQSPPTLSLSPGER",
+        "MEAPAQLLFLLLLWLPDTTR",
+        "ASQSVSSSYLTWYQQKPGQAPR",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYCQQDYNLP",
+    ]
+    mgf_file = tmp_path / "db_search.mgf"
+    return _create_unannotated_mgf(peptides, mgf_file, c_mod=True)
+
+
 @pytest.fixture
 def mgf_small_unannotated(tmp_path):
     """An MGF file with 2 unannotated spectra and scan numbers."""
@@ -271,7 +301,7 @@ def mgf_small_unannotated(tmp_path):
     return _create_unannotated_mgf(peptides, mgf_file)
 
 
-def _create_unannotated_mgf(peptides, mgf_file, random_state=999):
+def _create_unannotated_mgf(peptides, mgf_file, random_state=999, c_mod=False):
     """
     Create a fake MGF file from one or more peptides.
     This file will have no SEQ= parameter, but will have a SCANS= parameter.
@@ -284,6 +314,9 @@ def _create_unannotated_mgf(peptides, mgf_file, random_state=999):
         The MGF file to create.
     random_state : int or numpy.random.Generator, optional
         The random seed. The charge states are chosen to be 2 or 3 randomly.
+    c_mod : bool, optional
+        Whether to use the constant carbamidomethylation
+        of C in mass calculations.
 
     Returns
     -------
@@ -291,7 +324,7 @@ def _create_unannotated_mgf(peptides, mgf_file, random_state=999):
     """
     rng = np.random.default_rng(random_state)
     entries = [
-        _create_unannotated_mgf_entry(p, idx, rng.choice([2, 3]))
+        _create_unannotated_mgf_entry(p, idx, rng.choice([2, 3]), c_mod=c_mod)
         for idx, p in enumerate(peptides)
     ]
     with mgf_file.open("w+") as mgf_ref:
@@ -300,7 +333,7 @@ def _create_unannotated_mgf(peptides, mgf_file, random_state=999):
     return mgf_file
 
 
-def _create_unannotated_mgf_entry(peptide, scan_num, charge):
+def _create_unannotated_mgf_entry(peptide, scan_num, charge, c_mod=False):
     """
     Create a MassIVE-KB style MGF entry for a single PSM.
     Each entry will have no SEQ= parameter, but will have a SCANS= parameter.
@@ -313,13 +346,21 @@ def _create_unannotated_mgf_entry(peptide, scan_num, charge):
         The scan number.
     charge : int, optional
         The peptide charge state.
+    c_mod : bool, optional
+        Whether to use the constant carbamidomethylation
+        of C in mass calculations.
 
     Returns
     -------
     str
         The PSM entry in an MGF file format.
     """
-    precursor_mz = calculate_mass(peptide, charge=int(charge))
+    if not c_mod:
+        precursor_mz = calculate_mass(peptide, charge=int(charge))
+    else:
+        aa_mass = std_aa_mass
+        aa_mass.update({"C": 160.030649})  # Carbamidomethylated C mass
+        precursor_mz = fast_mass(peptide, charge=int(charge), aa_mass=aa_mass)
     mzs, intensities = _peptide_to_peaks(peptide, charge)
     frags = "\n".join([f"{m} {i}" for m, i in zip(mzs, intensities)])
 
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 60e3977b..4bd55174 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -7,50 +7,8 @@
 from casanovo import casanovo
 
 
-def test_annotate(mgf_small_unannotated, tide_dir_small, tmp_path):
-
-    # Run a command:
-    run = functools.partial(
-        CliRunner().invoke, casanovo.main, catch_exceptions=False
-    )
-
-    annotate_args = [
-        "annotate",
-        str(mgf_small_unannotated),
-        str(tide_dir_small),
-        "--output",
-        str(tmp_path / "annotated_mgf.mgf"),
-    ]
-
-    result = run(annotate_args)
-
-    assert result.exit_code == 0
-    assert (tmp_path / "annotated_mgf.mgf").exists()
-
-    # Read in the annotated file
-    with open(tmp_path / "annotated_mgf.mgf") as f:
-        annotated_lines = f.readlines()
-
-    # Get each SEQ= line
-    seq_lines = [line for line in annotated_lines if line.startswith("SEQ=")]
-    assert len(seq_lines) == 3
-    assert (
-        seq_lines[0].strip()
-        == "SEQ=LESLIEK,PEPTIDEK,decoy_KEILSEL,decoy_KEDITEPP"
-    )
-    assert (
-        seq_lines[1].strip()
-        == "SEQ=LESLIEK,PEPTIDEK,decoy_KEILSEL,decoy_KEDITEPP"
-    )
-    assert (
-        seq_lines[2].strip() == "SEQ=+42.011LEM+15.995SLIM+15.995EK,"
-        "+43.006PEN+0.984PTIQ+0.984DEK,decoy_-17.027KM+15.995EILSEL,"
-        "decoy_+43.006-17.027KEDITEPP,decoy_KEDIQ+0.984TEPPQ+0.984"
-    )
-
-
 def test_db_search(
-    mgf_small_unannotated, tide_dir_small, tiny_config, tmp_path, monkeypatch
+    mgf_db_search, tiny_fasta_file, tiny_config, tmp_path, monkeypatch
 ):
     # Run a command:
     monkeypatch.setattr(casanovo, "__version__", "4.1.0")
@@ -58,30 +16,18 @@ def test_db_search(
         CliRunner().invoke, casanovo.main, catch_exceptions=False
     )
 
-    annotate_args = [
-        "annotate",
-        str(mgf_small_unannotated),
-        str(tide_dir_small),
-        "--output",
-        str(tmp_path / "annotated_mgf.mgf"),
-    ]
-
-    result = run(annotate_args)
-
-    assert result.exit_code == 0
-    assert (tmp_path / "annotated_mgf.mgf").exists()
-
-    # Follow up annotate run with db search
-
     output_path = tmp_path / "db_search.mztab"
 
     search_args = [
         "db-search",
-        str(tmp_path / "annotated_mgf.mgf"),
         "--config",
         tiny_config,
         "--output",
         str(output_path),
+        "--precursor_tolerance",
+        str(100),
+        str(mgf_db_search),
+        str(tiny_fasta_file),
     ]
 
     result = run(search_args)
@@ -94,34 +40,13 @@ def test_db_search(
 
     psms = mztab.spectrum_match_table
     assert list(psms.sequence) == [
-        "LESLIEK",
-        "PEPTIDEK",
-        "KEILSEL",
-        "KEDITEPP",
-        "LESLIEK",
-        "PEPTIDEK",
-        "KEILSEL",
-        "KEDITEPP",
-        "+42.011LEM+15.995SLIM+15.995EK",
-        "+43.006PEN+0.984PTIQ+0.984DEK",
-        "-17.027KM+15.995EILSEL",
-        "+43.006-17.027KEDITEPP",
-        "KEDIQ+0.984TEPPQ+0.984",
-    ]
-    assert list(psms["opt_cv_MS:1002217_decoy_peptide"]) == [
-        "True",
-        "True",
-        "False",
-        "False",
-        "True",
-        "True",
-        "False",
-        "False",
-        "True",
-        "True",
-        "False",
-        "False",
-        "False",
+        "ATSIPAR",
+        "VTLSC+57.021R",
+        "LLIYGASTR",
+        "EIVMTQSPPTLSLSPGER",
+        "MEAPAQLLFLLLLWLPDTTR",
+        "ASQSVSSSYLTWYQQKPGQAPR",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
     ]
 
 
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index ec9085c0..e3707917 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -10,10 +10,11 @@
 import numpy as np
 import pytest
 import torch
+import re
 
 from casanovo import casanovo
 from casanovo import utils
-from casanovo.data import ms_io
+from casanovo.data import ms_io, db_utils
 from casanovo.data.datasets import SpectrumDataset, AnnotatedSpectrumDataset
 from casanovo.denovo.evaluate import aa_match_batch, aa_match_metrics
 from casanovo.denovo.model import Spec2Pep, _aa_pep_score, _calc_match_score
@@ -219,6 +220,433 @@ def test_calc_match_score():
     assert np.sum(masked_per_aa_scores.numpy()[3]) == 3
 
 
+def test_digest_fasta_cleave(fasta_raw_data):
+
+    with open("temp_fasta", "w") as file:
+        file.write(fasta_raw_data)
+
+    # No missed cleavages
+    expected_normal = [
+        "ATSIPAR",
+        "VTLSC+57.021R",
+        "LLIYGASTR",
+        "EIVMTQSPPTLSLSPGER",
+        "MEAPAQLLFLLLLWLPDTTR",
+        "ASQSVSSSYLTWYQQKPGQAPR",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+    ]
+
+    # 1 missed cleavage
+    expected_1missedcleavage = [
+        "ATSIPAR",
+        "VTLSC+57.021R",
+        "LLIYGASTR",
+        "LLIYGASTRATSIPAR",
+        "EIVMTQSPPTLSLSPGER",
+        "MEAPAQLLFLLLLWLPDTTR",
+        "ASQSVSSSYLTWYQQKPGQAPR",
+        "EIVMTQSPPTLSLSPGERVTLSC+57.021R",
+        "VTLSC+57.021RASQSVSSSYLTWYQQKPGQAPR",
+        "ASQSVSSSYLTWYQQKPGQAPRLLIYGASTR",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+        "MEAPAQLLFLLLLWLPDTTREIVMTQSPPTLSLSPGER",
+        "ATSIPARFSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+    ]
+
+    # 3 missed cleavages
+    expected_3missedcleavage = [
+        "ATSIPAR",
+        "VTLSC+57.021R",
+        "LLIYGASTR",
+        "LLIYGASTRATSIPAR",
+        "EIVMTQSPPTLSLSPGER",
+        "MEAPAQLLFLLLLWLPDTTR",
+        "ASQSVSSSYLTWYQQKPGQAPR",
+        "EIVMTQSPPTLSLSPGERVTLSC+57.021R",
+        "VTLSC+57.021RASQSVSSSYLTWYQQKPGQAPR",
+        "ASQSVSSSYLTWYQQKPGQAPRLLIYGASTR",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+        "ASQSVSSSYLTWYQQKPGQAPRLLIYGASTRATSIPAR",
+        "VTLSC+57.021RASQSVSSSYLTWYQQKPGQAPRLLIYGASTR",
+        "MEAPAQLLFLLLLWLPDTTREIVMTQSPPTLSLSPGER",
+        "ATSIPARFSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+        "VTLSC+57.021RASQSVSSSYLTWYQQKPGQAPRLLIYGASTRATSIPAR",
+        "MEAPAQLLFLLLLWLPDTTREIVMTQSPPTLSLSPGERVTLSC+57.021R",
+        "EIVMTQSPPTLSLSPGERVTLSC+57.021RASQSVSSSYLTWYQQKPGQAPR",
+        "LLIYGASTRATSIPARFSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+    ]
+
+    peptide_list = db_utils.digest_fasta(
+        fasta_filename="temp_fasta",
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=0,
+        max_mods=0,
+        min_length=6,
+        max_length=50,
+    )
+    peptide_list = [x[0] for x in peptide_list]
+    assert peptide_list == expected_normal
+
+    peptide_list = db_utils.digest_fasta(
+        fasta_filename="temp_fasta",
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=1,
+        max_mods=0,
+        min_length=6,
+        max_length=50,
+    )
+    peptide_list = [x[0] for x in peptide_list]
+    assert peptide_list == expected_1missedcleavage
+
+    peptide_list = db_utils.digest_fasta(
+        fasta_filename="temp_fasta",
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=3,
+        max_mods=0,
+        min_length=6,
+        max_length=50,
+    )
+    peptide_list = [x[0] for x in peptide_list]
+    assert peptide_list == expected_3missedcleavage
+
+
+def test_digest_fasta_mods(fasta_raw_data):
+
+    with open("temp_fasta", "w") as file:
+        file.write(fasta_raw_data)
+
+    # 1 modification allowed
+    # fixed: C+57.02146
+    # variable: 1M+15.994915,1N+0.984016,1Q+0.984016
+    # nterm: 1X+42.010565,1X+43.005814,1X-17.026549,1X+25.980265
+    expected_1mod = [
+        "-17.027ATSIPAR",
+        "ATSIPAR",
+        "-17.027VTLSC+57.021R",
+        "VTLSC+57.021R",
+        "+43.006-17.027ATSIPAR",
+        "+42.011ATSIPAR",
+        "+43.006ATSIPAR",
+        "+43.006-17.027VTLSC+57.021R",
+        "+42.011VTLSC+57.021R",
+        "+43.006VTLSC+57.021R",
+        "-17.027LLIYGASTR",
+        "LLIYGASTR",
+        "+43.006-17.027LLIYGASTR",
+        "+42.011LLIYGASTR",
+        "+43.006LLIYGASTR",
+        "-17.027EIVMTQSPPTLSLSPGER",
+        "EIVMTQSPPTLSLSPGER",
+        "EIVMTQ+0.984SPPTLSLSPGER",
+        "EIVM+15.995TQSPPTLSLSPGER",
+        "+43.006-17.027EIVMTQSPPTLSLSPGER",
+        "+42.011EIVMTQSPPTLSLSPGER",
+        "+43.006EIVMTQSPPTLSLSPGER",
+        "-17.027MEAPAQLLFLLLLWLPDTTR",
+        "MEAPAQLLFLLLLWLPDTTR",
+        "MEAPAQ+0.984LLFLLLLWLPDTTR",
+        "M+15.995EAPAQLLFLLLLWLPDTTR",
+        "+43.006-17.027MEAPAQLLFLLLLWLPDTTR",
+        "+42.011MEAPAQLLFLLLLWLPDTTR",
+        "+43.006MEAPAQLLFLLLLWLPDTTR",
+        "-17.027ASQSVSSSYLTWYQQKPGQAPR",
+        "ASQSVSSSYLTWYQQKPGQAPR",
+        "ASQ+0.984SVSSSYLTWYQQKPGQAPR",
+        "ASQSVSSSYLTWYQ+0.984QKPGQAPR",
+        "ASQSVSSSYLTWYQQ+0.984KPGQAPR",
+        "ASQSVSSSYLTWYQQKPGQ+0.984APR",
+        "+43.006-17.027ASQSVSSSYLTWYQQKPGQAPR",
+        "+42.011ASQSVSSSYLTWYQQKPGQAPR",
+        "+43.006ASQSVSSSYLTWYQQKPGQAPR",
+        "-17.027FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+        "FSGSGSGTDFTLTISSLQ+0.984PEDFAVYYC+57.021QQDYNLP",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021Q+0.984QDYNLP",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQ+0.984DYNLP",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYN+0.984LP",
+        "+43.006-17.027FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+        "+42.011FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+        "+43.006FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+    ]
+
+    peptide_list = db_utils.digest_fasta(
+        fasta_filename="temp_fasta",
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=0,
+        max_mods=1,
+        min_length=6,
+        max_length=50,
+    )
+    peptide_list = [x[0] for x in peptide_list]
+    peptide_list = [
+        x
+        for x in peptide_list
+        if not re.search(
+            r"(\+42\.011|\+43\.006|\-17\.027|\+43\.006\-17\.027)+[A-Z]\+", x
+        )
+    ]
+    assert peptide_list == expected_1mod
+
+
+def test_length_restrictions(fasta_raw_data):
+
+    with open("temp_fasta", "w") as file:
+        file.write(fasta_raw_data)
+
+    # length between 20 and 50
+    expected_long = [
+        "MEAPAQLLFLLLLWLPDTTR",
+        "ASQSVSSSYLTWYQQKPGQAPR",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+    ]
+
+    # length between 6 and 8
+    expected_short = ["ATSIPAR", "VTLSC+57.021R"]
+
+    peptide_list = db_utils.digest_fasta(
+        fasta_filename="temp_fasta",
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=0,
+        max_mods=0,
+        min_length=20,
+        max_length=50,
+    )
+    peptide_list = [x[0] for x in peptide_list]
+    assert peptide_list == expected_long
+
+    peptide_list = db_utils.digest_fasta(
+        fasta_filename="temp_fasta",
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=0,
+        max_mods=0,
+        min_length=6,
+        max_length=8,
+    )
+    peptide_list = [x[0] for x in peptide_list]
+    assert peptide_list == expected_short
+
+
+def test_digest_fasta_enzyme(fasta_raw_data):
+
+    with open("temp_fasta", "w") as file:
+        file.write(fasta_raw_data)
+
+    # arg-c enzyme
+    expected_argc = [
+        "ATSIPAR",
+        "VTLSC+57.021R",
+        "LLIYGASTR",
+        "EIVMTQSPPTLSLSPGER",
+        "MEAPAQLLFLLLLWLPDTTR",
+        "ASQSVSSSYLTWYQQKPGQAPR",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
+    ]
+
+    # asp-n enzyme
+    expected_aspn = ["DFAVYYC+57.021QQ", "DFTLTISSLQPE", "MEAPAQLLFLLLLWLP"]
+
+    peptide_list = db_utils.digest_fasta(
+        fasta_filename="temp_fasta",
+        enzyme="arg-c",
+        digestion="full",
+        missed_cleavages=0,
+        max_mods=0,
+        min_length=6,
+        max_length=50,
+    )
+    peptide_list = [x[0] for x in peptide_list]
+    assert peptide_list == expected_argc
+
+    peptide_list = db_utils.digest_fasta(
+        fasta_filename="temp_fasta",
+        enzyme="asp-n",
+        digestion="full",
+        missed_cleavages=0,
+        max_mods=0,
+        min_length=6,
+        max_length=50,
+    )
+    peptide_list = [x[0] for x in peptide_list]
+    assert peptide_list == expected_aspn
+
+
+def test_get_candidates(fasta_raw_data):
+
+    with open("temp_fasta", "w") as file:
+        file.write(fasta_raw_data)
+
+    # precursor_window is 10000
+    expected_smallwindow = ["LLIYGASTR"]
+
+    # precursor window is 150000
+    expected_midwindow = ["LLIYGASTR"]
+
+    # precursor window is 600000
+    expected_widewindow = ["ATSIPAR", "VTLSC+57.021R", "LLIYGASTR"]
+
+    peptide_list = db_utils.digest_fasta(
+        fasta_filename="temp_fasta",
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=1,
+        max_mods=0,
+        min_length=6,
+        max_length=50,
+    )
+
+    candidates = db_utils.get_candidates(
+        precursor_mz=496.2,
+        charge=2,
+        peptide_list=peptide_list,
+        precursor_tolerance=10000,
+        isotope_error="0",
+    )
+    candidates = [x[0] for x in candidates]
+    assert expected_smallwindow == candidates
+
+    peptide_list = db_utils.digest_fasta(
+        fasta_filename="temp_fasta",
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=1,
+        max_mods=0,
+        min_length=6,
+        max_length=50,
+    )
+
+    candidates = db_utils.get_candidates(
+        precursor_mz=496.2,
+        charge=2,
+        peptide_list=peptide_list,
+        precursor_tolerance=150000,
+        isotope_error="0",
+    )
+    candidates = [x[0] for x in candidates]
+    assert expected_midwindow == candidates
+
+    peptide_list = db_utils.digest_fasta(
+        fasta_filename="temp_fasta",
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=1,
+        max_mods=0,
+        min_length=6,
+        max_length=50,
+    )
+
+    candidates = db_utils.get_candidates(
+        precursor_mz=496.2,
+        charge=2,
+        peptide_list=peptide_list,
+        precursor_tolerance=600000,
+        isotope_error="0",
+    )
+    candidates = [x[0] for x in candidates]
+    assert expected_widewindow == candidates
+
+
+def test_get_candidates_isotope_error():
+
+    # Tide isotope error windows for 496.2, 2+:
+    # 0: [980.481617, 1000.289326]
+    # 1: [979.491114, 999.278813]
+    # 2: [978.500611, 998.268300]
+    # 3: [977.510108, 997.257787]
+
+    peptide_list = [
+        ("A", 1001),
+        ("B", 1000),
+        ("C", 999),
+        ("D", 998),
+        ("E", 997),
+        ("F", 996),
+        ("G", 995),
+        ("H", 994),
+        ("I", 993),
+        ("J", 992),
+        ("K", 991),
+        ("L", 990),
+        ("M", 989),
+        ("N", 988),
+        ("O", 987),
+        ("P", 986),
+        ("Q", 985),
+        ("R", 984),
+        ("S", 983),
+        ("T", 982),
+        ("U", 981),
+        ("V", 980),
+        ("W", 979),
+        ("X", 978),
+        ("Y", 977),
+        ("Z", 976),
+    ]
+
+    peptide_list.sort(key=lambda x: x[1])
+
+    expected_isotope0 = list("UTSRQPONMLKJIHGFEDCB")
+    expected_isotope1 = list("VUTSRQPONMLKJIHGFEDC")
+    expected_isotope2 = list("WVUTSRQPONMLKJIHGFED")
+    expected_isotope3 = list("XWVUTSRQPONMLKJIHGFE")
+    expected_isotope0123 = list("XWVUTSRQPONMLKJIHGFEDCB")
+
+    candidates = db_utils.get_candidates(
+        precursor_mz=496.2,
+        charge=2,
+        peptide_list=peptide_list,
+        precursor_tolerance=10000,
+        isotope_error="0",
+    )
+    candidates = [x[0] for x in candidates]
+    assert expected_isotope0 == candidates
+
+    candidates = db_utils.get_candidates(
+        precursor_mz=496.2,
+        charge=2,
+        peptide_list=peptide_list,
+        precursor_tolerance=10000,
+        isotope_error="1",
+    )
+    candidates = [x[0] for x in candidates]
+    assert expected_isotope1 == candidates
+
+    candidates = db_utils.get_candidates(
+        precursor_mz=496.2,
+        charge=2,
+        peptide_list=peptide_list,
+        precursor_tolerance=10000,
+        isotope_error="2",
+    )
+    candidates = [x[0] for x in candidates]
+    assert expected_isotope2 == candidates
+
+    candidates = db_utils.get_candidates(
+        precursor_mz=496.2,
+        charge=2,
+        peptide_list=peptide_list,
+        precursor_tolerance=10000,
+        isotope_error="3",
+    )
+    candidates = [x[0] for x in candidates]
+    assert expected_isotope3 == candidates
+
+    candidates = db_utils.get_candidates(
+        precursor_mz=496.2,
+        charge=2,
+        peptide_list=peptide_list,
+        precursor_tolerance=10000,
+        isotope_error="0,1,2,3",
+    )
+    candidates = [x[0] for x in candidates]
+    assert expected_isotope0123 == candidates
+
+
 def test_beam_search_decode():
     """
     Test beam search decoding and its sub-functions.

From e2ce3172c89a5c4fc74256689fa3cdf6b01d1faf Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Tue, 2 Jul 2024 20:20:25 -0700
Subject: [PATCH 03/21] fix for issue with 0 candidates

---
 casanovo/denovo/model.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 4d9bd41b..02a324d3 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1071,15 +1071,13 @@ def smart_batch_gen(self, spectrum_batch):
                 self.precursor_tolerance,
                 self.isotope_error,
             )
-            logger.debug("%s", digest_data)
             try:
                 spec_peptides, pep_masses, pep_protein = list(
                     zip(*digest_data)
                 )
             except ValueError:
-                logger.info(
-                    "No peptides found for precursor %s", precursors[idx]
-                )
+                logger.info("No peptides found for spectrum %s", indexes[idx])
+                continue
             spec_precursors = [precursors[idx]] * len(spec_peptides)
             spec_enc = [enc[idx]] * len(spec_peptides)
             spec_idx = [indexes[idx]] * len(spec_peptides)

From 5ef27e0c7dfffd219e5b248205a7ced0187ce4bb Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Wed, 3 Jul 2024 11:33:36 -0700
Subject: [PATCH 04/21] minor fixes added

---
 casanovo/data/datasets.py       |   2 -
 casanovo/denovo/dataloaders.py  |  13 ---
 casanovo/denovo/model.py        |  31 +++---
 casanovo/denovo/model_runner.py |   2 +-
 tests/conftest.py               | 164 +++++++++-----------------------
 5 files changed, 67 insertions(+), 145 deletions(-)

diff --git a/casanovo/data/datasets.py b/casanovo/data/datasets.py
index 59f56b68..6244e88f 100644
--- a/casanovo/data/datasets.py
+++ b/casanovo/data/datasets.py
@@ -134,8 +134,6 @@ def _process_peaks(
             The precursor m/z.
         precursor_charge : int
             The precursor charge.
-        track_spectrum_id : Optional[bool]
-            Whether to keep track of the identifier of the MS/MS spectra.
 
         Returns
         -------
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index ba02936c..97bfb2fc 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -125,17 +125,6 @@ def setup(self, stage: str = None, annotated: bool = True) -> None:
             )
             if self.test_index is not None:
                 self.test_dataset = make_dataset(self.test_index)
-        if stage == "db":
-            make_dataset = functools.partial(
-                SpectrumDataset,
-                n_peaks=self.n_peaks,
-                min_mz=self.min_mz,
-                max_mz=self.max_mz,
-                min_intensity=self.min_intensity,
-                remove_precursor_tol=self.remove_precursor_tol,
-            )
-            if self.test_index is not None:
-                self.test_dataset = make_dataset(self.test_index)
 
     def _make_loader(
         self,
@@ -154,8 +143,6 @@ def _make_loader(
             The batch size to use.
         shuffle : bool
             Option to shuffle the batches.
-        db_mode : bool
-            Option to use the DataLoader for Casanovo-DB.
 
         Returns
         -------
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 02a324d3..312e7f92 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -12,7 +12,6 @@
 import numpy as np
 import lightning.pytorch as pl
 from torch.utils.tensorboard import SummaryWriter
-from pyteomics import mass
 from depthcharge.components import ModelMixin, PeptideDecoder, SpectrumEncoder
 
 from . import evaluate
@@ -992,10 +991,19 @@ def configure_optimizers(
 
 class DbSpec2Pep(Spec2Pep):
     """
-    Inherits Spec2Pep
+    Subclass of Spec2Pep for the use of Casanovo as an MS/MS database search score function.
 
-    Hijacks teacher-forcing implemented in Spec2Pep and
-    uses it to predict scores between a spectra and associated peptide.
+    Uses teacher forcing to 'query' Casanovo for its score for each AA
+    within a candidate peptide, and takes the geometric average of these scores
+    and reports this as the score for the spectrum-peptide pair. Note that the
+    geometric mean of the AA scores is actually calculated by a
+    summation and average of the log of the scores, to preserve numerical
+    stability. This does not affect PSM ranking.
+
+    Also note that although teacher-forcing is used within this method,
+    there is *no training* involved. This is a prediction-only method.
+
+    Output is provided in .mztab format.
     """
 
     def __init__(self, *args, **kwargs):
@@ -1119,7 +1127,6 @@ def on_predict_batch_end(
             per_aa_score,
             precursors,
         ) in outputs:
-            prec_mass = precursors[:, 0]
             prec_charge = precursors[:, 1]
             prec_mz = precursors[:, 2]
             calc_mz = [
@@ -1140,9 +1147,9 @@ def on_predict_batch_end(
 
 def _calc_match_score(
     batch_all_aa_scores: torch.Tensor,
-    truth_aa_indicies: torch.Tensor,
+    truth_aa_indices: torch.Tensor,
     decoder_reverse: bool = False,
-) -> List[float]:
+) -> Tuple[torch.Tensor, torch.Tensor]:
     """
     Calculate the score between the input spectra and associated peptide.
 
@@ -1158,7 +1165,7 @@ def _calc_match_score(
         Amino acid scores for all amino acids in
         the vocabulary for every prediction made to generate
         the associated peptide (for an entire batch)
-    truth_aa_indicies : torch.Tensor
+    truth_aa_indices : torch.Tensor
         Indicies of the score for each actual amino acid
         in the peptide (for an entire batch)
     decoder_reverse : bool
@@ -1166,7 +1173,7 @@ def _calc_match_score(
 
     Returns
     -------
-    score : list[float], list[list[float]]
+    (all_scores, per_aa_scores) : Tuple[torch.Tensor, torch.Tensor]
         The score between the input spectra and associated peptide
         (for an entire batch)
         a list of lists of per amino acid scores
@@ -1175,7 +1182,7 @@ def _calc_match_score(
     # Remove trailing tokens from predictions based on decoder reversal
     if decoder_reverse:
         batch_all_aa_scores = batch_all_aa_scores[:, 1:]
-    elif not decoder_reverse:
+    else:
         batch_all_aa_scores = batch_all_aa_scores[:, :-1]
 
     # Vectorized scoring using efficient indexing.
@@ -1186,10 +1193,10 @@ def _calc_match_score(
     )
     cols = torch.arange(0, batch_all_aa_scores.shape[1]).expand_as(rows)
 
-    per_aa_scores = batch_all_aa_scores[rows, cols, truth_aa_indicies]
+    per_aa_scores = batch_all_aa_scores[rows, cols, truth_aa_indices]
 
     per_aa_scores[per_aa_scores == 0] += 1e-10
-    score_mask = truth_aa_indicies != 0
+    score_mask = truth_aa_indices != 0
     per_aa_scores[~score_mask] = 0
     log_per_aa_scores = torch.log(per_aa_scores)
     all_scores = torch.where(
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index 284acbe8..865df71b 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -151,7 +151,7 @@ def db_search(
         test_index = self._get_index(peak_path, False, "db search")
         self.writer.set_ms_run(test_index.ms_files)
         self.initialize_data_module(test_index=test_index)
-        self.loaders.setup(stage="db")
+        self.loaders.setup(stage="test", annotated=False)
         self.trainer.predict(self.model, self.loaders.predict_dataloader())
 
     def train(
diff --git a/tests/conftest.py b/tests/conftest.py
index cac1a873..b2244308 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -16,7 +16,37 @@ def mgf_small(tmp_path):
     return _create_mgf(peptides, mgf_file)
 
 
-def _create_mgf(peptides, mgf_file, random_state=42):
+@pytest.fixture
+def tiny_fasta_file(tmp_path, fasta_raw_data):
+    fasta_file = tmp_path / "tiny_fasta.fasta"
+    with fasta_file.open("w+") as fasta_ref:
+        fasta_ref.write(fasta_raw_data)
+
+    return fasta_file
+
+
+@pytest.fixture
+def fasta_raw_data():
+    return ">foo\nMEAPAQLLFLLLLWLPDTTREIVMTQSPPTLSLSPGERVTLSCRASQSVSSSYLTWYQQKPGQAPRLLIYGASTRATSIPARFSGSGSGTDFTLTISSLQPEDFAVYYCQQDYNLP"
+
+
+@pytest.fixture
+def mgf_db_search(tmp_path):
+    """An MGF file with 7 spectra and scan numbers, C+57.021 mass modification considered"""
+    peptides = [
+        "ATSIPAR",
+        "VTLSCR",
+        "LLIYGASTR",
+        "EIVMTQSPPTLSLSPGER",
+        "MEAPAQLLFLLLLWLPDTTR",
+        "ASQSVSSSYLTWYQQKPGQAPR",
+        "FSGSGSGTDFTLTISSLQPEDFAVYYCQQDYNLP",
+    ]
+    mgf_file = tmp_path / "db_search.mgf"
+    return _create_mgf(peptides, mgf_file, c_mod=True)
+
+
+def _create_mgf(peptides, mgf_file, random_state=42, c_mod=False):
     """
     Create a fake MGF file from one or more peptides.
 
@@ -28,20 +58,25 @@ def _create_mgf(peptides, mgf_file, random_state=42):
         The MGF file to create.
     random_state : int or numpy.random.Generator, optional
         The random seed. The charge states are chosen to be 2 or 3 randomly.
+    c_mod : bool, optional
+        Whether to use the constant carbamidomethylation
+        of C in mass calculations.
 
     Returns
     -------
     mgf_file : Path
     """
     rng = np.random.default_rng(random_state)
-    entries = [_create_mgf_entry(p, rng.choice([2, 3])) for p in peptides]
+    entries = [
+        _create_mgf_entry(p, rng.choice([2, 3]), c_mod) for p in peptides
+    ]
     with mgf_file.open("w+") as mgf_ref:
         mgf_ref.write("\n".join(entries))
 
     return mgf_file
 
 
-def _create_mgf_entry(peptide, charge=2):
+def _create_mgf_entry(peptide, charge=2, c_mod=False):
     """
     Create a MassIVE-KB style MGF entry for a single PSM.
 
@@ -51,13 +86,21 @@ def _create_mgf_entry(peptide, charge=2):
         A peptide sequence.
     charge : int, optional
         The peptide charge state.
+    c_mod : bool, optional
+        Whether to use the constant carbamidomethylation
+        of C in mass calculations.
 
     Returns
     -------
     str
         The PSM entry in an MGF file format.
     """
-    precursor_mz = calculate_mass(peptide, charge=int(charge))
+    if not c_mod:
+        precursor_mz = calculate_mass(peptide, charge=int(charge))
+    else:
+        aa_mass = std_aa_mass
+        aa_mass.update({"C": 160.030649})  # Carbamidomethylated C mass
+        precursor_mz = fast_mass(peptide, charge=int(charge), aa_mass=aa_mass)
     mzs, intensities = _peptide_to_peaks(peptide, charge)
     frags = "\n".join([f"{m} {i}" for m, i in zip(mzs, intensities)])
 
@@ -263,119 +306,6 @@ def tiny_config(tmp_path):
     return cfg_file
 
 
-@pytest.fixture
-def tiny_fasta_file(tmp_path, fasta_raw_data):
-    fasta_file = tmp_path / "tiny_fasta.fasta"
-    with fasta_file.open("w+") as fasta_ref:
-        fasta_ref.write(fasta_raw_data)
-
-    return fasta_file
-
-
-@pytest.fixture
-def fasta_raw_data():
-    return ">foo\nMEAPAQLLFLLLLWLPDTTREIVMTQSPPTLSLSPGERVTLSCRASQSVSSSYLTWYQQKPGQAPRLLIYGASTRATSIPARFSGSGSGTDFTLTISSLQPEDFAVYYCQQDYNLP"
-
-
-@pytest.fixture
-def mgf_db_search(tmp_path):
-    """An MGF file with 2 unannotated spectra and scan numbers."""
-    peptides = [
-        "ATSIPAR",
-        "VTLSCR",
-        "LLIYGASTR",
-        "EIVMTQSPPTLSLSPGER",
-        "MEAPAQLLFLLLLWLPDTTR",
-        "ASQSVSSSYLTWYQQKPGQAPR",
-        "FSGSGSGTDFTLTISSLQPEDFAVYYCQQDYNLP",
-    ]
-    mgf_file = tmp_path / "db_search.mgf"
-    return _create_unannotated_mgf(peptides, mgf_file, c_mod=True)
-
-
-@pytest.fixture
-def mgf_small_unannotated(tmp_path):
-    """An MGF file with 2 unannotated spectra and scan numbers."""
-    peptides = ["LESLIEK", "PEPTIDEK", "LESTIEK"]
-    mgf_file = tmp_path / "small_unannotated.mgf"
-    return _create_unannotated_mgf(peptides, mgf_file)
-
-
-def _create_unannotated_mgf(peptides, mgf_file, random_state=999, c_mod=False):
-    """
-    Create a fake MGF file from one or more peptides.
-    This file will have no SEQ= parameter, but will have a SCANS= parameter.
-
-    Parameters
-    ----------
-    peptides : str or list of str
-        The peptides for which to create spectra.
-    mgf_file : Path
-        The MGF file to create.
-    random_state : int or numpy.random.Generator, optional
-        The random seed. The charge states are chosen to be 2 or 3 randomly.
-    c_mod : bool, optional
-        Whether to use the constant carbamidomethylation
-        of C in mass calculations.
-
-    Returns
-    -------
-    mgf_file : Path
-    """
-    rng = np.random.default_rng(random_state)
-    entries = [
-        _create_unannotated_mgf_entry(p, idx, rng.choice([2, 3]), c_mod=c_mod)
-        for idx, p in enumerate(peptides)
-    ]
-    with mgf_file.open("w+") as mgf_ref:
-        mgf_ref.write("\n".join(entries))
-
-    return mgf_file
-
-
-def _create_unannotated_mgf_entry(peptide, scan_num, charge, c_mod=False):
-    """
-    Create a MassIVE-KB style MGF entry for a single PSM.
-    Each entry will have no SEQ= parameter, but will have a SCANS= parameter.
-
-    Parameters
-    ----------
-    peptide : str
-        A peptide sequence.
-    scan_num : int
-        The scan number.
-    charge : int, optional
-        The peptide charge state.
-    c_mod : bool, optional
-        Whether to use the constant carbamidomethylation
-        of C in mass calculations.
-
-    Returns
-    -------
-    str
-        The PSM entry in an MGF file format.
-    """
-    if not c_mod:
-        precursor_mz = calculate_mass(peptide, charge=int(charge))
-    else:
-        aa_mass = std_aa_mass
-        aa_mass.update({"C": 160.030649})  # Carbamidomethylated C mass
-        precursor_mz = fast_mass(peptide, charge=int(charge), aa_mass=aa_mass)
-    mzs, intensities = _peptide_to_peaks(peptide, charge)
-    frags = "\n".join([f"{m} {i}" for m, i in zip(mzs, intensities)])
-
-    mgf = [
-        "BEGIN IONS",
-        f"TITLE=title::{scan_num}",
-        f"PEPMASS={precursor_mz}",
-        f"CHARGE={charge}+",
-        f"SCANS={scan_num}",
-        f"{frags}",
-        "END IONS",
-    ]
-    return "\n".join(mgf)
-
-
 @pytest.fixture
 def tide_dir_small(tmp_path):
     """A directory with a very small TIDE search result."""

From 5f0675f032579e2976718c619969bdfd47cc68c5 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Wed, 3 Jul 2024 14:20:56 -0700
Subject: [PATCH 05/21] reordered and renamed variables for consistency

---
 casanovo/denovo/model.py        | 45 ++++++++++++++++++---------------
 casanovo/denovo/model_runner.py | 10 ++++----
 2 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 312e7f92..8bb0dbee 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1021,30 +1021,34 @@ def predict_step(self, batch, *args):
 
         Returns
         -------
-        predictions: List[Tuple[int, str, float, np.ndarray, np.ndarray]]
+        predictions: List[Tuple[int, int, float, str, np.ndarray, np.ndarray]]
             Model predictions for the given batch of spectra containing spectrum
-            scan number, peptide sequence, Casanovo-DB score,
-            amino acid-level confidence scores, and precursor information.
+            ids, precursor charge and m/z, candidate peptide sequences, peptide
+            scores, and amino acid-level scores.
         """
         batch_res = []
         for (
-            indexes,
+            spectrum_i,
             peptides,
             precursors,
             encoded_ms,
         ) in self.smart_batch_gen(batch):
             pred, truth = self.decoder(peptides, precursors, *encoded_ms)
             pred = self.softmax(pred)
-            score_result, per_aa_score = _calc_match_score(
+            peptide_scores, aa_scores = _calc_match_score(
                 pred, truth, self.decoder.reverse
             )
+            precursor_info = precursors.cpu().detach().numpy()
+            precursor_charge = precursor_info[:, 1]
+            precursor_mz = precursor_info[:, 2]
             batch_res.append(
                 (
-                    indexes,
+                    spectrum_i,
+                    precursor_charge,
+                    precursor_mz,
                     peptides,
-                    score_result.cpu().detach().numpy(),
-                    per_aa_score.cpu().detach().numpy(),
-                    precursors.cpu().detach().numpy(),
+                    peptide_scores.cpu().detach().numpy(),
+                    aa_scores.cpu().detach().numpy(),
                 )
             )
         return batch_res
@@ -1121,26 +1125,25 @@ def on_predict_batch_end(
         if self.out_writer is None:
             return
         for (
-            indexes,
+            spectrum_i,
+            precursor_charge,
+            precursor_mz,
             peptides,
-            score_result,
-            per_aa_score,
-            precursors,
+            peptide_scores,
+            aa_scores,
         ) in outputs:
-            prec_charge = precursors[:, 1]
-            prec_mz = precursors[:, 2]
             calc_mz = [
                 self.peptide_mass_calculator.mass(peptide, charge)
-                for peptide, charge in zip(peptides, prec_charge)
+                for peptide, charge in zip(peptides, precursor_charge)
             ]
             for row in zip(
                 peptides,
-                score_result,
-                prec_charge,
-                prec_mz,
+                peptide_scores,
+                precursor_charge,
+                precursor_mz,
                 calc_mz,
-                indexes,
-                per_aa_score,
+                spectrum_i,
+                aa_scores,
             ):
                 self.out_writer.psms.append(row)
 
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index 865df71b..1457df38 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -333,12 +333,12 @@ def initialize_model(
 
         if self.model_filename is None:
             # Train a model from scratch if no model file is provided.
+            if db_search:
+                logger.error("DB search mode requires a model file")
+                raise ValueError(
+                    "A model file must be provided for DB search mode"
+                )
             if train:
-                if db_search:
-                    logger.error("Db search mode requires a model file.")
-                    raise ValueError(
-                        "A model file must be provided for DB search mode"
-                    )
                 self.model = Spec2Pep(**model_params)
                 return
             # Else we're not training, so a model file must be provided.

From b4fd8ff05eaebcf62351627c8ceee2fee3bc23a1 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Thu, 4 Jul 2024 14:39:36 -0700
Subject: [PATCH 06/21] casanovo-db full working version with code
 simplification

---
 casanovo/data/db_utils.py       |  16 +++-
 casanovo/data/ms_io.py          |  80 +---------------
 casanovo/denovo/dataloaders.py  | 111 ++++++++++++++++++++++
 casanovo/denovo/model.py        | 159 +++++++++++---------------------
 casanovo/denovo/model_runner.py |  32 ++++---
 5 files changed, 198 insertions(+), 200 deletions(-)

diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 341a6162..921c75bd 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -4,9 +4,12 @@
 import depthcharge.masses
 from pyteomics import fasta, parser
 import bisect
+import logging
 
 from typing import List, Tuple
 
+logger = logging.getLogger("casanovo")
+
 # CONSTANTS
 HYDROGEN = 1.007825035
 OXYGEN = 15.99491463
@@ -96,17 +99,22 @@ def digest_fasta(
                 semi=semi,
             )
             protein = header.split()[0]
-            peptide_list.extend([(pep, protein) for pep in pep_set])
+            for pep in pep_set:
+                if len(pep) < min_length or len(pep) > max_length:
+                    continue
+                if "X" in pep or "U" in pep:
+                    logger.warn(
+                        "Skipping peptide with ambiguous amino acids: %s", pep
+                    )
+                    continue
+                peptide_list.append((pep, protein))
     else:
         raise ValueError(f"Digestion type {digestion} not recognized.")
 
     # Generate modified peptides
     mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
-    mass_calculator.masses.update({"X": 0.0})  # TODO: REMOVE?
     mod_peptide_list = []
     for pep, prot in peptide_list:
-        if len(pep) < min_length or len(pep) > max_length:
-            continue
         peptide_isoforms = parser.isoforms(
             pep,
             variable_mods=var_mods,
diff --git a/casanovo/data/ms_io.py b/casanovo/data/ms_io.py
index a701b627..b27f083b 100644
--- a/casanovo/data/ms_io.py
+++ b/casanovo/data/ms_io.py
@@ -22,13 +22,10 @@ class MztabWriter:
     ----------
     filename : str
         The name of the mzTab file.
-    is_db_variant : bool
-        Whether the mzTab file is for a Casanovo-DB search.
     """
 
-    def __init__(self, filename: str, is_db_variant: bool = False):
+    def __init__(self, filename: str):
         self.filename = filename
-        self.is_db_variant = is_db_variant
         self.metadata = [
             ("mzTab-version", "1.0.0"),
             ("mzTab-mode", "Summary"),
@@ -150,9 +147,6 @@ def save(self) -> None:
         """
         Export the spectrum identifications to the mzTab file.
         """
-        if self.is_db_variant:
-            self.save_db_variant()
-            return
         with open(self.filename, "w", newline="") as f:
             writer = csv.writer(f, delimiter="\t", lineterminator=os.linesep)
             # Write metadata.
@@ -192,7 +186,7 @@ def save(self) -> None:
                         "PSM",
                         psm[0],  # sequence
                         i,  # PSM_ID
-                        "null",  # accession
+                        "null" if len(psm) < 8 else psm[7],  # accession
                         "null",  # unique
                         "null",  # database
                         "null",  # database_version
@@ -215,73 +209,3 @@ def save(self) -> None:
                         psm[6],  # opt_ms_run[1]_aa_scores
                     ]
                 )
-
-    def save_db_variant(self) -> None:
-        """
-        Export the Casanovo-DB search results to the mzTab file.
-
-        Outputs PSMs in the order they were scored
-        (i.e. the order in the .mgf file).
-        """
-        with open(self.filename, "w", newline="") as f:
-            writer = csv.writer(f, delimiter="\t", lineterminator=os.linesep)
-            # Write metadata.
-            for row in self.metadata:
-                writer.writerow(["MTD", *row])
-            # Write PSMs.
-            writer.writerow(
-                [
-                    "PSH",
-                    "sequence",
-                    "PSM_ID",
-                    "accession",
-                    "unique",
-                    "database",
-                    "database_version",
-                    "search_engine",
-                    "search_engine_score[1]",
-                    "modifications",
-                    "retention_time",
-                    "charge",
-                    "exp_mass_to_charge",
-                    "calc_mass_to_charge",
-                    "spectra_ref",
-                    "pre",
-                    "post",
-                    "start",
-                    "end",
-                    "opt_ms_run[1]_aa_scores",
-                ]
-            )
-            for i, psm in enumerate(self.psms):
-                writer.writerow(
-                    [
-                        "PSM",
-                        psm[0],  # sequence
-                        f"{psm[5]}:{i}",  # PSM_ID (spectrum # :candidate #)
-                        "null",  # accession
-                        "null",  # unique
-                        "null",  # database
-                        "null",  # database_version
-                        "null",  # search_engine
-                        psm[1],  # search_engine_score[1]
-                        "null",  # modifications
-                        "null",  # retention_time
-                        int(psm[2]),  # charge
-                        psm[3],  # exp_mass_to_charge
-                        psm[4],  # calc_mass_to_charge
-                        psm[5],  # spectra_ref
-                        "null",  # pre
-                        "null",  # post
-                        "null",  # start
-                        "null",  # end
-                        ",".join(
-                            list(
-                                map(
-                                    "{:.5f}".format,
-                                    psm[6][psm[6] != 0],
-                                )
-                            )
-                        ),  # opt_ms_run[1]_aa_scores
-                    ]
-                )
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index 97bfb2fc..80a4f7dc 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -3,6 +3,8 @@
 import functools
 import os
 from typing import List, Optional, Tuple
+from functools import partial
+import logging
 
 import lightning.pytorch as pl
 import numpy as np
@@ -13,6 +15,9 @@
     AnnotatedSpectrumDataset,
     SpectrumDataset,
 )
+from ..data import db_utils
+
+logger = logging.getLogger("casanovo")
 
 
 class DeNovoDataModule(pl.LightningDataModule):
@@ -176,6 +181,22 @@ def predict_dataloader(self) -> torch.utils.data.DataLoader:
         """Get the predict DataLoader."""
         return self._make_loader(self.test_dataset, self.eval_batch_size)
 
+    def db_dataloader(self) -> torch.utils.data.DataLoader:
+        """Get a special dataloader for DB search"""
+        return torch.utils.data.DataLoader(
+            self.test_dataset,
+            batch_size=self.eval_batch_size,
+            collate_fn=partial(
+                prepare_psm_batch,
+                digest=self.digest,
+                precursor_tolerance=self.precursor_tolerance,
+                isotope_error=self.isotope_error,
+            ),
+            pin_memory=True,
+            num_workers=self.n_workers,
+            shuffle=False,
+        )
+
 
 def prepare_batch(
     batch: List[Tuple[torch.Tensor, float, int, str]]
@@ -214,3 +235,93 @@ def prepare_batch(
         [precursor_masses, precursor_charges, precursor_mzs]
     ).T.float()
     return spectra, precursors, np.asarray(spectrum_ids)
+
+
+def prepare_psm_batch(
+    batch: List[Tuple[torch.Tensor, float, int, str]],
+    digest: List[Tuple[str, float, str]],
+    precursor_tolerance: float,
+    isotope_error: str,
+):
+    """
+    Collate MS/MS spectra into a batch for DB search.
+
+    The MS/MS spectra will be padded so that they fit nicely as a tensor.
+    However, the padded elements are ignored during the subsequent steps.
+
+    Parameters
+    ----------
+    batch : List[Tuple[torch.Tensor, float, int, str]]
+        A batch of data from an AnnotatedSpectrumDataset, consisting of for each
+        spectrum (i) a tensor with the m/z and intensity peak values, (ii), the
+        precursor m/z, (iii) the precursor charge, (iv) the spectrum identifier.
+    digest : List[Tuple[str, float, str]]
+        A list of tuples containing the peptide sequence, mass, and associated protein
+        from digesting a .fasta file. Sorted by mass in ascending order. Uses neutral masses.
+    precursor_tolerance : float
+        The precursor mass tolerance in parts-per-million.
+    isotope_error : str
+        The isotope error levels to consider.
+
+    Returns
+    -------
+    all_spectra : torch.Tensor of shape (batch_size, n_peaks, 2)
+        The padded mass spectra tensor with the m/z and intensity peak values
+        for each spectrum.
+    all_precursors : torch.Tensor of shape (batch_size, 3)
+        A tensor with the precursor neutral mass, precursor charge, and
+        precursor m/z.
+    all_spectrum_ids : np.ndarray
+        The spectrum identifiers.
+    all_peptides : List[str]
+        The candidate peptides for each spectrum.
+    all_proteins : List[str]
+        The associated proteins for each candidate peptide.
+    """
+    spectra, precursor_mzs, precursor_charges, spectrum_ids = list(zip(*batch))
+    spectra = torch.nn.utils.rnn.pad_sequence(spectra, batch_first=True)
+
+    precursor_mzs = torch.tensor(precursor_mzs)
+    precursor_charges = torch.tensor(precursor_charges)
+    precursor_masses = (precursor_mzs - 1.007276) * precursor_charges
+    precursors = torch.vstack(
+        [precursor_masses, precursor_charges, precursor_mzs]
+    ).T.float()
+
+    all_spectra = []
+    all_precursors = []
+    all_spectrum_ids = []
+    all_peptides = []
+    all_proteins = []
+    for idx in range(len(batch)):
+        digest_data = db_utils.get_candidates(
+            precursor_mzs[idx],
+            precursor_charges[idx],
+            digest,
+            precursor_tolerance,
+            isotope_error,
+        )
+        try:
+            spec_peptides, _, pep_protein = list(zip(*digest_data))
+            all_spectra.append(
+                spectra[idx].unsqueeze(0).repeat(len(spec_peptides), 1, 1)
+            )
+            all_precursors.append(
+                precursors[idx].unsqueeze(0).repeat(len(spec_peptides), 1)
+            )
+            all_spectrum_ids.extend([spectrum_ids[idx]] * len(spec_peptides))
+            all_peptides.extend(spec_peptides)
+            all_proteins.extend(pep_protein)
+        except ValueError:
+            logger.warning(
+                "No candidates found for spectrum %s", spectrum_ids[idx]
+            )
+            continue
+
+    return (
+        torch.cat(all_spectra, dim=0),
+        torch.cat(all_precursors, dim=0),
+        all_spectrum_ids,
+        all_peptides,
+        all_proteins,
+    )
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 8bb0dbee..2256946c 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1008,6 +1008,7 @@ class DbSpec2Pep(Spec2Pep):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
+        self.total_psms = 0
 
     def predict_step(self, batch, *args):
         """
@@ -1015,137 +1016,85 @@ def predict_step(self, batch, *args):
 
         Parameters
         ----------
-        batch : Tuple[torch.Tensor, torch.Tensor, torch.Tensor]
+        batch : Tuple[torch.Tensor, torch.Tensor, np.array, List[str], List[str]]
             A batch of (i) MS/MS spectra, (ii) precursor information, (iii)
-            spectrum identifiers as torch Tensors.
+            spectrum identifiers, (iv) candidate peptides, (v) associated proteins.
 
         Returns
         -------
-        predictions: List[Tuple[int, int, float, str, np.ndarray, np.ndarray]]
+        predictions: List[Tuple[int, int, float, str, np.ndarray, np.ndarray, str]]
             Model predictions for the given batch of spectra containing spectrum
             ids, precursor charge and m/z, candidate peptide sequences, peptide
-            scores, and amino acid-level scores.
+            scores, amino acid-level scores, and associated proteins.
         """
-        batch_res = []
+        predictions = []
+        pred, truth = self.decoder(batch[3], batch[1], *self.encoder(batch[0]))
+        pred = self.softmax(pred)
+        all_scores, per_aa_scores = _calc_match_score(
+            pred, truth, self.decoder.reverse
+        )
         for (
+            precursor_charge,
+            precursor_mz,
             spectrum_i,
-            peptides,
-            precursors,
-            encoded_ms,
-        ) in self.smart_batch_gen(batch):
-            pred, truth = self.decoder(peptides, precursors, *encoded_ms)
-            pred = self.softmax(pred)
-            peptide_scores, aa_scores = _calc_match_score(
-                pred, truth, self.decoder.reverse
-            )
-            precursor_info = precursors.cpu().detach().numpy()
-            precursor_charge = precursor_info[:, 1]
-            precursor_mz = precursor_info[:, 2]
-            batch_res.append(
+            peptide_score,
+            aa_scores,
+            peptide,
+            protein,
+        ) in zip(
+            batch[1][:, 1].cpu().detach().numpy(),
+            batch[1][:, 2].cpu().detach().numpy(),
+            batch[2],
+            all_scores.cpu().detach().numpy(),
+            per_aa_scores.cpu().detach().numpy(),
+            batch[3],
+            batch[4],
+        ):
+            predictions.append(
                 (
                     spectrum_i,
                     precursor_charge,
                     precursor_mz,
-                    peptides,
-                    peptide_scores.cpu().detach().numpy(),
-                    aa_scores.cpu().detach().numpy(),
-                )
-            )
-        return batch_res
-
-    def smart_batch_gen(self, spectrum_batch):
-        """
-        Transforms a batch of spectra into multiple equally-sized batches of PSMs.
-
-        Parameters
-        ----------
-        spectrum batch : Tuple[torch.Tensor, torch.Tensor, torch.Tensor]
-            A batch of (i) MS/MS spectra, (ii) precursor information, (iii)
-            spectrum identifiers as torch Tensors.
-
-        Yields
-        -------
-        psm_batch: Tuple[List[int], List[str], torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]
-            A batch of PSMs containing the spectrum index, peptide sequence,
-            precursor information, and encoded MS/MS spectra.
-        """
-        all_psm = []
-        batch_size = len(spectrum_batch[0])
-        enc = self.encoder(spectrum_batch[0])
-        enc = list(zip(*enc))
-        precursors = spectrum_batch[1]
-        indexes = spectrum_batch[2]
-        for idx in range(batch_size):
-            digest_data = db_utils.get_candidates(
-                precursors[idx][2],
-                precursors[idx][1],
-                self.digest,
-                self.precursor_tolerance,
-                self.isotope_error,
-            )
-            try:
-                spec_peptides, pep_masses, pep_protein = list(
-                    zip(*digest_data)
-                )
-            except ValueError:
-                logger.info("No peptides found for spectrum %s", indexes[idx])
-                continue
-            spec_precursors = [precursors[idx]] * len(spec_peptides)
-            spec_enc = [enc[idx]] * len(spec_peptides)
-            spec_idx = [indexes[idx]] * len(spec_peptides)
-            all_psm.extend(
-                list(
-                    zip(
-                        spec_enc,
-                        spec_precursors,
-                        spec_peptides,
-                        spec_idx,
-                    )
+                    peptide,
+                    peptide_score,
+                    aa_scores,
+                    protein,
                 )
             )
-        # Continually grab num_pairs items from all_psm until list is exhausted
-        while len(all_psm) > 0:
-            psm_batch = all_psm[:batch_size]
-            all_psm = all_psm[batch_size:]
-            psm_batch = list(zip(*psm_batch))
-            encoded_ms = (
-                torch.stack([a[0] for a in psm_batch[0]]),
-                torch.stack([a[1] for a in psm_batch[0]]),
-            )
-            prec_data = torch.stack(psm_batch[1])
-            pep_str = list(psm_batch[2])
-            indexes = [a[1] for a in psm_batch[3]]
-            yield (indexes, pep_str, prec_data, encoded_ms)
+        self.total_psms += len(predictions)
+        return predictions
 
     def on_predict_batch_end(
         self,
         outputs: List[Tuple[np.ndarray, List[str], torch.Tensor]],
         *args,
     ) -> None:
-        if self.out_writer is None:
-            return
+        """
+        Write the database search results to the output file.
+        """
         for (
             spectrum_i,
-            precursor_charge,
+            charge,
             precursor_mz,
-            peptides,
-            peptide_scores,
+            peptide,
+            peptide_score,
             aa_scores,
+            protein,
         ) in outputs:
-            calc_mz = [
-                self.peptide_mass_calculator.mass(peptide, charge)
-                for peptide, charge in zip(peptides, precursor_charge)
-            ]
-            for row in zip(
-                peptides,
-                peptide_scores,
-                precursor_charge,
-                precursor_mz,
-                calc_mz,
-                spectrum_i,
-                aa_scores,
-            ):
-                self.out_writer.psms.append(row)
+            if len(peptide) == 0:
+                continue
+            self.out_writer.psms.append(
+                (
+                    peptide,
+                    tuple(spectrum_i),
+                    peptide_score,
+                    charge,
+                    precursor_mz,
+                    self.peptide_mass_calculator.mass(peptide, charge),
+                    ",".join(list(map("{:.5f}".format, aa_scores))),
+                    protein,
+                ),
+            )
 
 
 def _calc_match_score(
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index 1457df38..3286f4b8 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -10,6 +10,8 @@
 from pathlib import Path
 from typing import Iterable, List, Optional, Union
 
+import time
+
 import lightning.pytorch as pl
 import numpy as np
 import torch
@@ -124,19 +126,21 @@ def db_search(
         -------
         self
         """
-        self.writer = ms_io.MztabWriter(
-            Path(output).with_suffix(".mztab"), is_db_variant=True
-        )
+        self.writer = ms_io.MztabWriter(Path(output).with_suffix(".mztab"))
         self.writer.set_metadata(
             self.config,
             model=str(self.model_filename),
             config_filename=self.config.file,
         )
-
         self.initialize_trainer(train=True)
         self.initialize_model(train=False, db_search=True)
         self.model.out_writer = self.writer
-        self.model.digest = db_utils.digest_fasta(
+        test_index = self._get_index(peak_path, False, "db search")
+        self.writer.set_ms_run(test_index.ms_files)
+
+        self.initialize_data_module(test_index=test_index)
+        self.loaders.setup(stage="test", annotated=False)
+        self.loaders.digest = db_utils.digest_fasta(
             fasta_path,
             enzyme,
             digestion,
@@ -145,14 +149,16 @@ def db_search(
             min_length,
             max_length,
         )
-        self.model.precursor_tolerance = precursor_tolerance
-        self.model.isotope_error = isotope_error
-
-        test_index = self._get_index(peak_path, False, "db search")
-        self.writer.set_ms_run(test_index.ms_files)
-        self.initialize_data_module(test_index=test_index)
-        self.loaders.setup(stage="test", annotated=False)
-        self.trainer.predict(self.model, self.loaders.predict_dataloader())
+        self.loaders.precursor_tolerance = precursor_tolerance
+        self.loaders.isotope_error = isotope_error
+
+        t1 = time.time()
+        self.trainer.predict(self.model, self.loaders.db_dataloader())
+        t2 = time.time()
+        logger.info("Database search took %.3f seconds", t2 - t1)
+        logger.info("Scored %s PSMs", self.model.total_psms)
+        logger.info("%.3f PSMs per second", self.model.total_psms / (t2 - t1))
+        logger.info("%s seconds per PSM", (t2 - t1) / self.model.total_psms)
 
     def train(
         self,

From 35ba7d497cbc0c044ca5e13fd8e6e09162f77590 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Thu, 4 Jul 2024 21:50:44 +0000
Subject: [PATCH 07/21] Generate new screengrabs with rich-codex

---
 docs/images/configure-help.svg | 154 +++++++++++++++-------
 docs/images/evaluate-help.svg  | 182 ++++++++++++++------------
 docs/images/help.svg           | 224 ++++++++++++++------------------
 docs/images/sequence-help.svg  | 182 ++++++++++++++------------
 docs/images/train-help.svg     | 228 ++++++++++++++-------------------
 5 files changed, 493 insertions(+), 477 deletions(-)

diff --git a/docs/images/configure-help.svg b/docs/images/configure-help.svg
index 0822927a..4092bce3 100644
--- a/docs/images/configure-help.svg
+++ b/docs/images/configure-help.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 994 342.79999999999995" xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 1934 757.5999999999999" xmlns="http://www.w3.org/2000/svg">
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -19,83 +19,151 @@
         font-weight: 700;
     }
 
-    .terminal-3936755216-matrix {
+    .terminal-2941406062-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-3936755216-title {
+    .terminal-2941406062-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-3936755216-r1 { fill: #c5c8c6 }
+    .terminal-2941406062-r1 { fill: #c5c8c6 }
     </style>
 
     <defs>
-    <clipPath id="terminal-3936755216-clip-terminal">
-      <rect x="0" y="0" width="975.0" height="291.79999999999995" />
+    <clipPath id="terminal-2941406062-clip-terminal">
+      <rect x="0" y="0" width="1914.3999999999999" height="706.5999999999999" />
     </clipPath>
-    <clipPath id="terminal-3936755216-line-0">
-    <rect x="0" y="1.5" width="976" height="24.65"/>
+    <clipPath id="terminal-2941406062-line-0">
+    <rect x="0" y="1.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3936755216-line-1">
-    <rect x="0" y="25.9" width="976" height="24.65"/>
+<clipPath id="terminal-2941406062-line-1">
+    <rect x="0" y="25.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3936755216-line-2">
-    <rect x="0" y="50.3" width="976" height="24.65"/>
+<clipPath id="terminal-2941406062-line-2">
+    <rect x="0" y="50.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3936755216-line-3">
-    <rect x="0" y="74.7" width="976" height="24.65"/>
+<clipPath id="terminal-2941406062-line-3">
+    <rect x="0" y="74.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3936755216-line-4">
-    <rect x="0" y="99.1" width="976" height="24.65"/>
+<clipPath id="terminal-2941406062-line-4">
+    <rect x="0" y="99.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3936755216-line-5">
-    <rect x="0" y="123.5" width="976" height="24.65"/>
+<clipPath id="terminal-2941406062-line-5">
+    <rect x="0" y="123.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3936755216-line-6">
-    <rect x="0" y="147.9" width="976" height="24.65"/>
+<clipPath id="terminal-2941406062-line-6">
+    <rect x="0" y="147.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3936755216-line-7">
-    <rect x="0" y="172.3" width="976" height="24.65"/>
+<clipPath id="terminal-2941406062-line-7">
+    <rect x="0" y="172.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3936755216-line-8">
-    <rect x="0" y="196.7" width="976" height="24.65"/>
+<clipPath id="terminal-2941406062-line-8">
+    <rect x="0" y="196.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3936755216-line-9">
-    <rect x="0" y="221.1" width="976" height="24.65"/>
+<clipPath id="terminal-2941406062-line-9">
+    <rect x="0" y="221.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-3936755216-line-10">
-    <rect x="0" y="245.5" width="976" height="24.65"/>
+<clipPath id="terminal-2941406062-line-10">
+    <rect x="0" y="245.5" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-11">
+    <rect x="0" y="269.9" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-12">
+    <rect x="0" y="294.3" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-13">
+    <rect x="0" y="318.7" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-14">
+    <rect x="0" y="343.1" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-15">
+    <rect x="0" y="367.5" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-16">
+    <rect x="0" y="391.9" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-17">
+    <rect x="0" y="416.3" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-18">
+    <rect x="0" y="440.7" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-19">
+    <rect x="0" y="465.1" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-20">
+    <rect x="0" y="489.5" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-21">
+    <rect x="0" y="513.9" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-22">
+    <rect x="0" y="538.3" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-23">
+    <rect x="0" y="562.7" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-24">
+    <rect x="0" y="587.1" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-25">
+    <rect x="0" y="611.5" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-26">
+    <rect x="0" y="635.9" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2941406062-line-27">
+    <rect x="0" y="660.3" width="1915.4" height="24.65"/>
             </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="992" height="340.8" rx="8"/>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="1932" height="755.6" rx="8"/>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-3936755216-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-2941406062-clip-terminal)">
     
-    <g class="terminal-3936755216-matrix">
-    <text class="terminal-3936755216-r1" x="0" y="20" textLength="329.4" clip-path="url(#terminal-3936755216-line-0)">$&#160;casanovo&#160;configure&#160;--help</text><text class="terminal-3936755216-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-3936755216-line-0)">
-</text><text class="terminal-3936755216-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-3936755216-line-1)">
-</text><text class="terminal-3936755216-r1" x="0" y="68.8" textLength="976" clip-path="url(#terminal-3936755216-line-2)">&#160;Usage:&#160;casanovo&#160;configure&#160;[OPTIONS]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3936755216-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-3936755216-line-2)">
-</text><text class="terminal-3936755216-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-3936755216-line-3)">
-</text><text class="terminal-3936755216-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-3936755216-line-4)">&#160;Generate&#160;a&#160;Casanovo&#160;configuration&#160;file&#160;to&#160;customize.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3936755216-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-3936755216-line-4)">
-</text><text class="terminal-3936755216-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-3936755216-line-5)">&#160;The&#160;casanovo&#160;configuration&#160;file&#160;is&#160;in&#160;the&#160;YAML&#160;format.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3936755216-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-3936755216-line-5)">
-</text><text class="terminal-3936755216-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-3936755216-line-6)">
-</text><text class="terminal-3936755216-r1" x="0" y="190.8" textLength="976" clip-path="url(#terminal-3936755216-line-7)">╭─&#160;Options&#160;────────────────────────────────────────────────────────────────────╮</text><text class="terminal-3936755216-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-3936755216-line-7)">
-</text><text class="terminal-3936755216-r1" x="0" y="215.2" textLength="976" clip-path="url(#terminal-3936755216-line-8)">│&#160;--output&#160;&#160;-o&#160;&#160;FILE&#160;&#160;The&#160;output&#160;configuration&#160;file.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-3936755216-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-3936755216-line-8)">
-</text><text class="terminal-3936755216-r1" x="0" y="239.6" textLength="976" clip-path="url(#terminal-3936755216-line-9)">│&#160;--help&#160;&#160;&#160;&#160;-h&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-3936755216-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-3936755216-line-9)">
-</text><text class="terminal-3936755216-r1" x="0" y="264" textLength="976" clip-path="url(#terminal-3936755216-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-3936755216-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-3936755216-line-10)">
-</text><text class="terminal-3936755216-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-3936755216-line-11)">
+    <g class="terminal-2941406062-matrix">
+    <text class="terminal-2941406062-r1" x="0" y="20" textLength="329.4" clip-path="url(#terminal-2941406062-line-0)">$&#160;casanovo&#160;configure&#160;--help</text><text class="terminal-2941406062-r1" x="1915.4" y="20" textLength="12.2" clip-path="url(#terminal-2941406062-line-0)">
+</text><text class="terminal-2941406062-r1" x="0" y="44.4" textLength="414.8" clip-path="url(#terminal-2941406062-line-1)">Traceback&#160;(most&#160;recent&#160;call&#160;last):</text><text class="terminal-2941406062-r1" x="1915.4" y="44.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-1)">
+</text><text class="terminal-2941406062-r1" x="0" y="68.8" textLength="1000.4" clip-path="url(#terminal-2941406062-line-2)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/bin/casanovo&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="68.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-2)">
+</text><text class="terminal-2941406062-r1" x="0" y="93.2" textLength="463.6" clip-path="url(#terminal-2941406062-line-3)">&#160;&#160;&#160;&#160;from&#160;casanovo.casanovo&#160;import&#160;main</text><text class="terminal-2941406062-r1" x="1915.4" y="93.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-3)">
+</text><text class="terminal-2941406062-r1" x="0" y="117.6" textLength="1464" clip-path="url(#terminal-2941406062-line-4)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/casanovo/casanovo.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="117.6" textLength="12.2" clip-path="url(#terminal-2941406062-line-4)">
+</text><text class="terminal-2941406062-r1" x="0" y="142" textLength="268.4" clip-path="url(#terminal-2941406062-line-5)">&#160;&#160;&#160;&#160;import&#160;depthcharge</text><text class="terminal-2941406062-r1" x="1915.4" y="142" textLength="12.2" clip-path="url(#terminal-2941406062-line-5)">
+</text><text class="terminal-2941406062-r1" x="0" y="166.4" textLength="1488.4" clip-path="url(#terminal-2941406062-line-6)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/__init__.py&quot;,&#160;line&#160;3,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="166.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-6)">
+</text><text class="terminal-2941406062-r1" x="0" y="190.8" textLength="341.6" clip-path="url(#terminal-2941406062-line-7)">&#160;&#160;&#160;&#160;from&#160;.&#160;import&#160;components</text><text class="terminal-2941406062-r1" x="1915.4" y="190.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-7)">
+</text><text class="terminal-2941406062-r1" x="0" y="215.2" textLength="1622.6" clip-path="url(#terminal-2941406062-line-8)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/__init__.py&quot;,&#160;line&#160;2,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="215.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-8)">
+</text><text class="terminal-2941406062-r1" x="0" y="239.6" textLength="744.2" clip-path="url(#terminal-2941406062-line-9)">&#160;&#160;&#160;&#160;from&#160;.transformers&#160;import&#160;SpectrumEncoder,&#160;PeptideDecoder</text><text class="terminal-2941406062-r1" x="1915.4" y="239.6" textLength="12.2" clip-path="url(#terminal-2941406062-line-9)">
+</text><text class="terminal-2941406062-r1" x="0" y="264" textLength="1671.4" clip-path="url(#terminal-2941406062-line-10)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/transformers.py&quot;,&#160;line&#160;8,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="264" textLength="12.2" clip-path="url(#terminal-2941406062-line-10)">
+</text><text class="terminal-2941406062-r1" x="0" y="288.4" textLength="292.8" clip-path="url(#terminal-2941406062-line-11)">&#160;&#160;&#160;&#160;from&#160;..&#160;import&#160;utils</text><text class="terminal-2941406062-r1" x="1915.4" y="288.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-11)">
+</text><text class="terminal-2941406062-r1" x="0" y="312.8" textLength="1451.8" clip-path="url(#terminal-2941406062-line-12)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/utils.py&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="312.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-12)">
+</text><text class="terminal-2941406062-r1" x="0" y="337.2" textLength="878.4" clip-path="url(#terminal-2941406062-line-13)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing.event_accumulator&#160;import&#160;(</text><text class="terminal-2941406062-r1" x="1915.4" y="337.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-13)">
+</text><text class="terminal-2941406062-r1" x="0" y="361.6" textLength="1915.4" clip-path="url(#terminal-2941406062-line-14)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_accumulator.py&quot;,&#160;line&#160;24,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="361.6" textLength="12.2" clip-path="url(#terminal-2941406062-line-14)">
+</text><text class="terminal-2941406062-r1" x="0" y="386" textLength="854" clip-path="url(#terminal-2941406062-line-15)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing&#160;import&#160;event_file_loader</text><text class="terminal-2941406062-r1" x="1915.4" y="386" textLength="12.2" clip-path="url(#terminal-2941406062-line-15)">
+</text><text class="terminal-2941406062-r1" x="0" y="410.4" textLength="1915.4" clip-path="url(#terminal-2941406062-line-16)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_file_loader.py&quot;,&#160;line&#160;21,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="410.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-16)">
+</text><text class="terminal-2941406062-r1" x="0" y="434.8" textLength="536.8" clip-path="url(#terminal-2941406062-line-17)">&#160;&#160;&#160;&#160;from&#160;tensorboard&#160;import&#160;dataclass_compat</text><text class="terminal-2941406062-r1" x="1915.4" y="434.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-17)">
+</text><text class="terminal-2941406062-r1" x="0" y="459.2" textLength="1598.2" clip-path="url(#terminal-2941406062-line-18)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/dataclass_compat.py&quot;,&#160;line&#160;33,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="459.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-18)">
+</text><text class="terminal-2941406062-r1" x="0" y="483.6" textLength="878.4" clip-path="url(#terminal-2941406062-line-19)">&#160;&#160;&#160;&#160;from&#160;tensorboard.plugins.hparams&#160;import&#160;metadata&#160;as&#160;hparams_metadata</text><text class="terminal-2941406062-r1" x="1915.4" y="483.6" textLength="12.2" clip-path="url(#terminal-2941406062-line-19)">
+</text><text class="terminal-2941406062-r1" x="0" y="508" textLength="1695.8" clip-path="url(#terminal-2941406062-line-20)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/plugins/hparams/metadata.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="508" textLength="12.2" clip-path="url(#terminal-2941406062-line-20)">
+</text><text class="terminal-2941406062-r1" x="0" y="532.4" textLength="585.6" clip-path="url(#terminal-2941406062-line-21)">&#160;&#160;&#160;&#160;NULL_TENSOR&#160;=&#160;tensor_util.make_tensor_proto(</text><text class="terminal-2941406062-r1" x="1915.4" y="532.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-21)">
+</text><text class="terminal-2941406062-r1" x="0" y="556.8" textLength="1720.2" clip-path="url(#terminal-2941406062-line-22)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/util/tensor_util.py&quot;,&#160;line&#160;405,&#160;in&#160;make_tensor_proto</text><text class="terminal-2941406062-r1" x="1915.4" y="556.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-22)">
+</text><text class="terminal-2941406062-r1" x="0" y="581.2" textLength="585.6" clip-path="url(#terminal-2941406062-line-23)">&#160;&#160;&#160;&#160;numpy_dtype&#160;=&#160;dtypes.as_dtype(nparray.dtype)</text><text class="terminal-2941406062-r1" x="1915.4" y="581.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-23)">
+</text><text class="terminal-2941406062-r1" x="0" y="605.6" textLength="1769" clip-path="url(#terminal-2941406062-line-24)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py&quot;,&#160;line&#160;677,&#160;in&#160;as_dtype</text><text class="terminal-2941406062-r1" x="1915.4" y="605.6" textLength="12.2" clip-path="url(#terminal-2941406062-line-24)">
+</text><text class="terminal-2941406062-r1" x="0" y="630" textLength="866.2" clip-path="url(#terminal-2941406062-line-25)">&#160;&#160;&#160;&#160;if&#160;type_value.type&#160;==&#160;np.string_&#160;or&#160;type_value.type&#160;==&#160;np.unicode_:</text><text class="terminal-2941406062-r1" x="1915.4" y="630" textLength="12.2" clip-path="url(#terminal-2941406062-line-25)">
+</text><text class="terminal-2941406062-r1" x="0" y="654.4" textLength="1476.2" clip-path="url(#terminal-2941406062-line-26)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/numpy/__init__.py&quot;,&#160;line&#160;397,&#160;in&#160;__getattr__</text><text class="terminal-2941406062-r1" x="1915.4" y="654.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-26)">
+</text><text class="terminal-2941406062-r1" x="0" y="678.8" textLength="305" clip-path="url(#terminal-2941406062-line-27)">&#160;&#160;&#160;&#160;raise&#160;AttributeError(</text><text class="terminal-2941406062-r1" x="1915.4" y="678.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-27)">
+</text><text class="terminal-2941406062-r1" x="0" y="703.2" textLength="1427.4" clip-path="url(#terminal-2941406062-line-28)">AttributeError:&#160;`np.string_`&#160;was&#160;removed&#160;in&#160;the&#160;NumPy&#160;2.0&#160;release.&#160;Use&#160;`np.bytes_`&#160;instead..&#160;Did&#160;you&#160;mean:&#160;&#x27;strings&#x27;?</text><text class="terminal-2941406062-r1" x="1915.4" y="703.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-28)">
 </text>
     </g>
     </g>
diff --git a/docs/images/evaluate-help.svg b/docs/images/evaluate-help.svg
index b16c4ffd..d86b2497 100644
--- a/docs/images/evaluate-help.svg
+++ b/docs/images/evaluate-help.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 994 684.4" xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 1934 757.5999999999999" xmlns="http://www.w3.org/2000/svg">
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -19,139 +19,151 @@
         font-weight: 700;
     }
 
-    .terminal-1284026435-matrix {
+    .terminal-1991789315-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-1284026435-title {
+    .terminal-1991789315-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-1284026435-r1 { fill: #c5c8c6 }
+    .terminal-1991789315-r1 { fill: #c5c8c6 }
     </style>
 
     <defs>
-    <clipPath id="terminal-1284026435-clip-terminal">
-      <rect x="0" y="0" width="975.0" height="633.4" />
+    <clipPath id="terminal-1991789315-clip-terminal">
+      <rect x="0" y="0" width="1914.3999999999999" height="706.5999999999999" />
     </clipPath>
-    <clipPath id="terminal-1284026435-line-0">
-    <rect x="0" y="1.5" width="976" height="24.65"/>
+    <clipPath id="terminal-1991789315-line-0">
+    <rect x="0" y="1.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-1">
-    <rect x="0" y="25.9" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-1">
+    <rect x="0" y="25.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-2">
-    <rect x="0" y="50.3" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-2">
+    <rect x="0" y="50.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-3">
-    <rect x="0" y="74.7" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-3">
+    <rect x="0" y="74.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-4">
-    <rect x="0" y="99.1" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-4">
+    <rect x="0" y="99.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-5">
-    <rect x="0" y="123.5" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-5">
+    <rect x="0" y="123.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-6">
-    <rect x="0" y="147.9" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-6">
+    <rect x="0" y="147.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-7">
-    <rect x="0" y="172.3" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-7">
+    <rect x="0" y="172.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-8">
-    <rect x="0" y="196.7" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-8">
+    <rect x="0" y="196.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-9">
-    <rect x="0" y="221.1" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-9">
+    <rect x="0" y="221.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-10">
-    <rect x="0" y="245.5" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-10">
+    <rect x="0" y="245.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-11">
-    <rect x="0" y="269.9" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-11">
+    <rect x="0" y="269.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-12">
-    <rect x="0" y="294.3" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-12">
+    <rect x="0" y="294.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-13">
-    <rect x="0" y="318.7" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-13">
+    <rect x="0" y="318.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-14">
-    <rect x="0" y="343.1" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-14">
+    <rect x="0" y="343.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-15">
-    <rect x="0" y="367.5" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-15">
+    <rect x="0" y="367.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-16">
-    <rect x="0" y="391.9" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-16">
+    <rect x="0" y="391.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-17">
-    <rect x="0" y="416.3" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-17">
+    <rect x="0" y="416.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-18">
-    <rect x="0" y="440.7" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-18">
+    <rect x="0" y="440.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-19">
-    <rect x="0" y="465.1" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-19">
+    <rect x="0" y="465.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-20">
-    <rect x="0" y="489.5" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-20">
+    <rect x="0" y="489.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-21">
-    <rect x="0" y="513.9" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-21">
+    <rect x="0" y="513.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-22">
-    <rect x="0" y="538.3" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-22">
+    <rect x="0" y="538.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-23">
-    <rect x="0" y="562.7" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-23">
+    <rect x="0" y="562.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1284026435-line-24">
-    <rect x="0" y="587.1" width="976" height="24.65"/>
+<clipPath id="terminal-1991789315-line-24">
+    <rect x="0" y="587.1" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1991789315-line-25">
+    <rect x="0" y="611.5" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1991789315-line-26">
+    <rect x="0" y="635.9" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-1991789315-line-27">
+    <rect x="0" y="660.3" width="1915.4" height="24.65"/>
             </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="992" height="682.4" rx="8"/>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="1932" height="755.6" rx="8"/>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-1284026435-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-1991789315-clip-terminal)">
     
-    <g class="terminal-1284026435-matrix">
-    <text class="terminal-1284026435-r1" x="0" y="20" textLength="317.2" clip-path="url(#terminal-1284026435-line-0)">$&#160;casanovo&#160;evaluate&#160;--help</text><text class="terminal-1284026435-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-1284026435-line-0)">
-</text><text class="terminal-1284026435-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-1284026435-line-1)">
-</text><text class="terminal-1284026435-r1" x="0" y="68.8" textLength="976" clip-path="url(#terminal-1284026435-line-2)">&#160;Usage:&#160;casanovo&#160;evaluate&#160;[OPTIONS]&#160;ANNOTATED_PEAK_PATH...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1284026435-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-1284026435-line-2)">
-</text><text class="terminal-1284026435-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-1284026435-line-3)">
-</text><text class="terminal-1284026435-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-1284026435-line-4)">&#160;Evaluate&#160;de&#160;novo&#160;peptide&#160;sequencing&#160;performance.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1284026435-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-1284026435-line-4)">
-</text><text class="terminal-1284026435-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-1284026435-line-5)">&#160;ANNOTATED_PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;annoated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1284026435-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-1284026435-line-5)">
-</text><text class="terminal-1284026435-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-1284026435-line-6)">&#160;provided&#160;by&#160;MassIVE-KB.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-1284026435-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-1284026435-line-6)">
-</text><text class="terminal-1284026435-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-1284026435-line-7)">
-</text><text class="terminal-1284026435-r1" x="0" y="215.2" textLength="976" clip-path="url(#terminal-1284026435-line-8)">╭─&#160;Arguments&#160;──────────────────────────────────────────────────────────────────╮</text><text class="terminal-1284026435-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-1284026435-line-8)">
-</text><text class="terminal-1284026435-r1" x="0" y="239.6" textLength="976" clip-path="url(#terminal-1284026435-line-9)">│&#160;*&#160;&#160;ANNOTATED_PEAK_PATH&#160;&#160;&#160;&#160;FILE&#160;&#160;[required]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-1284026435-line-9)">
-</text><text class="terminal-1284026435-r1" x="0" y="264" textLength="976" clip-path="url(#terminal-1284026435-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-1284026435-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-1284026435-line-10)">
-</text><text class="terminal-1284026435-r1" x="0" y="288.4" textLength="976" clip-path="url(#terminal-1284026435-line-11)">╭─&#160;Options&#160;────────────────────────────────────────────────────────────────────╮</text><text class="terminal-1284026435-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-1284026435-line-11)">
-</text><text class="terminal-1284026435-r1" x="0" y="312.8" textLength="976" clip-path="url(#terminal-1284026435-line-12)">│&#160;--model&#160;&#160;&#160;&#160;&#160;&#160;-m&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;model&#160;weights&#160;(.ckpt&#160;file).&#160;│</text><text class="terminal-1284026435-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-1284026435-line-12)">
-</text><text class="terminal-1284026435-r1" x="0" y="337.2" textLength="976" clip-path="url(#terminal-1284026435-line-13)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;If&#160;not&#160;provided,&#160;Casanovo&#160;will&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-1284026435-line-13)">
-</text><text class="terminal-1284026435-r1" x="0" y="361.6" textLength="976" clip-path="url(#terminal-1284026435-line-14)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;try&#160;to&#160;download&#160;the&#160;latest&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-1284026435-line-14)">
-</text><text class="terminal-1284026435-r1" x="0" y="386" textLength="976" clip-path="url(#terminal-1284026435-line-15)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;release.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-1284026435-line-15)">
-</text><text class="terminal-1284026435-r1" x="0" y="410.4" textLength="976" clip-path="url(#terminal-1284026435-line-16)">│&#160;--output&#160;&#160;&#160;&#160;&#160;-o&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;mzTab&#160;file&#160;to&#160;which&#160;results&#160;│</text><text class="terminal-1284026435-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-1284026435-line-16)">
-</text><text class="terminal-1284026435-r1" x="0" y="434.8" textLength="976" clip-path="url(#terminal-1284026435-line-17)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;will&#160;be&#160;written.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-1284026435-line-17)">
-</text><text class="terminal-1284026435-r1" x="0" y="459.2" textLength="976" clip-path="url(#terminal-1284026435-line-18)">│&#160;--config&#160;&#160;&#160;&#160;&#160;-c&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;YAML&#160;configuration&#160;file&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-1284026435-line-18)">
-</text><text class="terminal-1284026435-r1" x="0" y="483.6" textLength="976" clip-path="url(#terminal-1284026435-line-19)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;overriding&#160;the&#160;default&#160;options.&#160;│</text><text class="terminal-1284026435-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-1284026435-line-19)">
-</text><text class="terminal-1284026435-r1" x="0" y="508" textLength="976" clip-path="url(#terminal-1284026435-line-20)">│&#160;--verbosity&#160;&#160;-v&#160;&#160;[debug|info|warning|error]&#160;&#160;Set&#160;the&#160;verbosity&#160;of&#160;console&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-1284026435-line-20)">
-</text><text class="terminal-1284026435-r1" x="0" y="532.4" textLength="976" clip-path="url(#terminal-1284026435-line-21)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;logging&#160;messages.&#160;Log&#160;files&#160;are&#160;│</text><text class="terminal-1284026435-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-1284026435-line-21)">
-</text><text class="terminal-1284026435-r1" x="0" y="556.8" textLength="976" clip-path="url(#terminal-1284026435-line-22)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;always&#160;set&#160;to&#160;&#x27;debug&#x27;.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-1284026435-line-22)">
-</text><text class="terminal-1284026435-r1" x="0" y="581.2" textLength="976" clip-path="url(#terminal-1284026435-line-23)">│&#160;--help&#160;&#160;&#160;&#160;&#160;&#160;&#160;-h&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-1284026435-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-1284026435-line-23)">
-</text><text class="terminal-1284026435-r1" x="0" y="605.6" textLength="976" clip-path="url(#terminal-1284026435-line-24)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-1284026435-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-1284026435-line-24)">
-</text><text class="terminal-1284026435-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-1284026435-line-25)">
+    <g class="terminal-1991789315-matrix">
+    <text class="terminal-1991789315-r1" x="0" y="20" textLength="317.2" clip-path="url(#terminal-1991789315-line-0)">$&#160;casanovo&#160;evaluate&#160;--help</text><text class="terminal-1991789315-r1" x="1915.4" y="20" textLength="12.2" clip-path="url(#terminal-1991789315-line-0)">
+</text><text class="terminal-1991789315-r1" x="0" y="44.4" textLength="414.8" clip-path="url(#terminal-1991789315-line-1)">Traceback&#160;(most&#160;recent&#160;call&#160;last):</text><text class="terminal-1991789315-r1" x="1915.4" y="44.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-1)">
+</text><text class="terminal-1991789315-r1" x="0" y="68.8" textLength="1000.4" clip-path="url(#terminal-1991789315-line-2)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/bin/casanovo&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="68.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-2)">
+</text><text class="terminal-1991789315-r1" x="0" y="93.2" textLength="463.6" clip-path="url(#terminal-1991789315-line-3)">&#160;&#160;&#160;&#160;from&#160;casanovo.casanovo&#160;import&#160;main</text><text class="terminal-1991789315-r1" x="1915.4" y="93.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-3)">
+</text><text class="terminal-1991789315-r1" x="0" y="117.6" textLength="1464" clip-path="url(#terminal-1991789315-line-4)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/casanovo/casanovo.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="117.6" textLength="12.2" clip-path="url(#terminal-1991789315-line-4)">
+</text><text class="terminal-1991789315-r1" x="0" y="142" textLength="268.4" clip-path="url(#terminal-1991789315-line-5)">&#160;&#160;&#160;&#160;import&#160;depthcharge</text><text class="terminal-1991789315-r1" x="1915.4" y="142" textLength="12.2" clip-path="url(#terminal-1991789315-line-5)">
+</text><text class="terminal-1991789315-r1" x="0" y="166.4" textLength="1488.4" clip-path="url(#terminal-1991789315-line-6)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/__init__.py&quot;,&#160;line&#160;3,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="166.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-6)">
+</text><text class="terminal-1991789315-r1" x="0" y="190.8" textLength="341.6" clip-path="url(#terminal-1991789315-line-7)">&#160;&#160;&#160;&#160;from&#160;.&#160;import&#160;components</text><text class="terminal-1991789315-r1" x="1915.4" y="190.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-7)">
+</text><text class="terminal-1991789315-r1" x="0" y="215.2" textLength="1622.6" clip-path="url(#terminal-1991789315-line-8)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/__init__.py&quot;,&#160;line&#160;2,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="215.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-8)">
+</text><text class="terminal-1991789315-r1" x="0" y="239.6" textLength="744.2" clip-path="url(#terminal-1991789315-line-9)">&#160;&#160;&#160;&#160;from&#160;.transformers&#160;import&#160;SpectrumEncoder,&#160;PeptideDecoder</text><text class="terminal-1991789315-r1" x="1915.4" y="239.6" textLength="12.2" clip-path="url(#terminal-1991789315-line-9)">
+</text><text class="terminal-1991789315-r1" x="0" y="264" textLength="1671.4" clip-path="url(#terminal-1991789315-line-10)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/transformers.py&quot;,&#160;line&#160;8,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="264" textLength="12.2" clip-path="url(#terminal-1991789315-line-10)">
+</text><text class="terminal-1991789315-r1" x="0" y="288.4" textLength="292.8" clip-path="url(#terminal-1991789315-line-11)">&#160;&#160;&#160;&#160;from&#160;..&#160;import&#160;utils</text><text class="terminal-1991789315-r1" x="1915.4" y="288.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-11)">
+</text><text class="terminal-1991789315-r1" x="0" y="312.8" textLength="1451.8" clip-path="url(#terminal-1991789315-line-12)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/utils.py&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="312.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-12)">
+</text><text class="terminal-1991789315-r1" x="0" y="337.2" textLength="878.4" clip-path="url(#terminal-1991789315-line-13)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing.event_accumulator&#160;import&#160;(</text><text class="terminal-1991789315-r1" x="1915.4" y="337.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-13)">
+</text><text class="terminal-1991789315-r1" x="0" y="361.6" textLength="1915.4" clip-path="url(#terminal-1991789315-line-14)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_accumulator.py&quot;,&#160;line&#160;24,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="361.6" textLength="12.2" clip-path="url(#terminal-1991789315-line-14)">
+</text><text class="terminal-1991789315-r1" x="0" y="386" textLength="854" clip-path="url(#terminal-1991789315-line-15)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing&#160;import&#160;event_file_loader</text><text class="terminal-1991789315-r1" x="1915.4" y="386" textLength="12.2" clip-path="url(#terminal-1991789315-line-15)">
+</text><text class="terminal-1991789315-r1" x="0" y="410.4" textLength="1915.4" clip-path="url(#terminal-1991789315-line-16)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_file_loader.py&quot;,&#160;line&#160;21,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="410.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-16)">
+</text><text class="terminal-1991789315-r1" x="0" y="434.8" textLength="536.8" clip-path="url(#terminal-1991789315-line-17)">&#160;&#160;&#160;&#160;from&#160;tensorboard&#160;import&#160;dataclass_compat</text><text class="terminal-1991789315-r1" x="1915.4" y="434.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-17)">
+</text><text class="terminal-1991789315-r1" x="0" y="459.2" textLength="1598.2" clip-path="url(#terminal-1991789315-line-18)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/dataclass_compat.py&quot;,&#160;line&#160;33,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="459.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-18)">
+</text><text class="terminal-1991789315-r1" x="0" y="483.6" textLength="878.4" clip-path="url(#terminal-1991789315-line-19)">&#160;&#160;&#160;&#160;from&#160;tensorboard.plugins.hparams&#160;import&#160;metadata&#160;as&#160;hparams_metadata</text><text class="terminal-1991789315-r1" x="1915.4" y="483.6" textLength="12.2" clip-path="url(#terminal-1991789315-line-19)">
+</text><text class="terminal-1991789315-r1" x="0" y="508" textLength="1695.8" clip-path="url(#terminal-1991789315-line-20)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/plugins/hparams/metadata.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="508" textLength="12.2" clip-path="url(#terminal-1991789315-line-20)">
+</text><text class="terminal-1991789315-r1" x="0" y="532.4" textLength="585.6" clip-path="url(#terminal-1991789315-line-21)">&#160;&#160;&#160;&#160;NULL_TENSOR&#160;=&#160;tensor_util.make_tensor_proto(</text><text class="terminal-1991789315-r1" x="1915.4" y="532.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-21)">
+</text><text class="terminal-1991789315-r1" x="0" y="556.8" textLength="1720.2" clip-path="url(#terminal-1991789315-line-22)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/util/tensor_util.py&quot;,&#160;line&#160;405,&#160;in&#160;make_tensor_proto</text><text class="terminal-1991789315-r1" x="1915.4" y="556.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-22)">
+</text><text class="terminal-1991789315-r1" x="0" y="581.2" textLength="585.6" clip-path="url(#terminal-1991789315-line-23)">&#160;&#160;&#160;&#160;numpy_dtype&#160;=&#160;dtypes.as_dtype(nparray.dtype)</text><text class="terminal-1991789315-r1" x="1915.4" y="581.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-23)">
+</text><text class="terminal-1991789315-r1" x="0" y="605.6" textLength="1769" clip-path="url(#terminal-1991789315-line-24)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py&quot;,&#160;line&#160;677,&#160;in&#160;as_dtype</text><text class="terminal-1991789315-r1" x="1915.4" y="605.6" textLength="12.2" clip-path="url(#terminal-1991789315-line-24)">
+</text><text class="terminal-1991789315-r1" x="0" y="630" textLength="866.2" clip-path="url(#terminal-1991789315-line-25)">&#160;&#160;&#160;&#160;if&#160;type_value.type&#160;==&#160;np.string_&#160;or&#160;type_value.type&#160;==&#160;np.unicode_:</text><text class="terminal-1991789315-r1" x="1915.4" y="630" textLength="12.2" clip-path="url(#terminal-1991789315-line-25)">
+</text><text class="terminal-1991789315-r1" x="0" y="654.4" textLength="1476.2" clip-path="url(#terminal-1991789315-line-26)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/numpy/__init__.py&quot;,&#160;line&#160;397,&#160;in&#160;__getattr__</text><text class="terminal-1991789315-r1" x="1915.4" y="654.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-26)">
+</text><text class="terminal-1991789315-r1" x="0" y="678.8" textLength="305" clip-path="url(#terminal-1991789315-line-27)">&#160;&#160;&#160;&#160;raise&#160;AttributeError(</text><text class="terminal-1991789315-r1" x="1915.4" y="678.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-27)">
+</text><text class="terminal-1991789315-r1" x="0" y="703.2" textLength="1427.4" clip-path="url(#terminal-1991789315-line-28)">AttributeError:&#160;`np.string_`&#160;was&#160;removed&#160;in&#160;the&#160;NumPy&#160;2.0&#160;release.&#160;Use&#160;`np.bytes_`&#160;instead..&#160;Did&#160;you&#160;mean:&#160;&#x27;strings&#x27;?</text><text class="terminal-1991789315-r1" x="1915.4" y="703.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-28)">
 </text>
     </g>
     </g>
diff --git a/docs/images/help.svg b/docs/images/help.svg
index 67dca83e..dfb1039c 100644
--- a/docs/images/help.svg
+++ b/docs/images/help.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 994 977.1999999999999" xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 1934 757.5999999999999" xmlns="http://www.w3.org/2000/svg">
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -19,187 +19,151 @@
         font-weight: 700;
     }
 
-    .terminal-2658734560-matrix {
+    .terminal-952518540-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-2658734560-title {
+    .terminal-952518540-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-2658734560-r1 { fill: #c5c8c6 }
+    .terminal-952518540-r1 { fill: #c5c8c6 }
     </style>
 
     <defs>
-    <clipPath id="terminal-2658734560-clip-terminal">
-      <rect x="0" y="0" width="975.0" height="926.1999999999999" />
+    <clipPath id="terminal-952518540-clip-terminal">
+      <rect x="0" y="0" width="1914.3999999999999" height="706.5999999999999" />
     </clipPath>
-    <clipPath id="terminal-2658734560-line-0">
-    <rect x="0" y="1.5" width="976" height="24.65"/>
+    <clipPath id="terminal-952518540-line-0">
+    <rect x="0" y="1.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-1">
-    <rect x="0" y="25.9" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-1">
+    <rect x="0" y="25.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-2">
-    <rect x="0" y="50.3" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-2">
+    <rect x="0" y="50.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-3">
-    <rect x="0" y="74.7" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-3">
+    <rect x="0" y="74.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-4">
-    <rect x="0" y="99.1" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-4">
+    <rect x="0" y="99.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-5">
-    <rect x="0" y="123.5" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-5">
+    <rect x="0" y="123.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-6">
-    <rect x="0" y="147.9" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-6">
+    <rect x="0" y="147.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-7">
-    <rect x="0" y="172.3" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-7">
+    <rect x="0" y="172.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-8">
-    <rect x="0" y="196.7" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-8">
+    <rect x="0" y="196.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-9">
-    <rect x="0" y="221.1" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-9">
+    <rect x="0" y="221.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-10">
-    <rect x="0" y="245.5" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-10">
+    <rect x="0" y="245.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-11">
-    <rect x="0" y="269.9" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-11">
+    <rect x="0" y="269.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-12">
-    <rect x="0" y="294.3" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-12">
+    <rect x="0" y="294.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-13">
-    <rect x="0" y="318.7" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-13">
+    <rect x="0" y="318.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-14">
-    <rect x="0" y="343.1" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-14">
+    <rect x="0" y="343.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-15">
-    <rect x="0" y="367.5" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-15">
+    <rect x="0" y="367.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-16">
-    <rect x="0" y="391.9" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-16">
+    <rect x="0" y="391.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-17">
-    <rect x="0" y="416.3" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-17">
+    <rect x="0" y="416.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-18">
-    <rect x="0" y="440.7" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-18">
+    <rect x="0" y="440.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-19">
-    <rect x="0" y="465.1" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-19">
+    <rect x="0" y="465.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-20">
-    <rect x="0" y="489.5" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-20">
+    <rect x="0" y="489.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-21">
-    <rect x="0" y="513.9" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-21">
+    <rect x="0" y="513.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-22">
-    <rect x="0" y="538.3" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-22">
+    <rect x="0" y="538.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-23">
-    <rect x="0" y="562.7" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-23">
+    <rect x="0" y="562.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-24">
-    <rect x="0" y="587.1" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-24">
+    <rect x="0" y="587.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-25">
-    <rect x="0" y="611.5" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-25">
+    <rect x="0" y="611.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-26">
-    <rect x="0" y="635.9" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-26">
+    <rect x="0" y="635.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2658734560-line-27">
-    <rect x="0" y="660.3" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2658734560-line-28">
-    <rect x="0" y="684.7" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2658734560-line-29">
-    <rect x="0" y="709.1" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2658734560-line-30">
-    <rect x="0" y="733.5" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2658734560-line-31">
-    <rect x="0" y="757.9" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2658734560-line-32">
-    <rect x="0" y="782.3" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2658734560-line-33">
-    <rect x="0" y="806.7" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2658734560-line-34">
-    <rect x="0" y="831.1" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2658734560-line-35">
-    <rect x="0" y="855.5" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2658734560-line-36">
-    <rect x="0" y="879.9" width="976" height="24.65"/>
+<clipPath id="terminal-952518540-line-27">
+    <rect x="0" y="660.3" width="1915.4" height="24.65"/>
             </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="992" height="975.2" rx="8"/>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="1932" height="755.6" rx="8"/>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-2658734560-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-952518540-clip-terminal)">
     
-    <g class="terminal-2658734560-matrix">
-    <text class="terminal-2658734560-r1" x="0" y="20" textLength="207.4" clip-path="url(#terminal-2658734560-line-0)">$&#160;casanovo&#160;--help</text><text class="terminal-2658734560-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-2658734560-line-0)">
-</text><text class="terminal-2658734560-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-1)">
-</text><text class="terminal-2658734560-r1" x="0" y="68.8" textLength="976" clip-path="url(#terminal-2658734560-line-2)">&#160;Usage:&#160;casanovo&#160;[OPTIONS]&#160;COMMAND&#160;[ARGS]...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-2)">
-</text><text class="terminal-2658734560-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-2658734560-line-3)">
-</text><text class="terminal-2658734560-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-2658734560-line-4)">&#160;┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓&#160;</text><text class="terminal-2658734560-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-2658734560-line-4)">
-</text><text class="terminal-2658734560-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-2658734560-line-5)">&#160;┃&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;Casanovo&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;┃&#160;</text><text class="terminal-2658734560-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-2658734560-line-5)">
-</text><text class="terminal-2658734560-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-2658734560-line-6)">&#160;┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛&#160;</text><text class="terminal-2658734560-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-6)">
-</text><text class="terminal-2658734560-r1" x="0" y="190.8" textLength="976" clip-path="url(#terminal-2658734560-line-7)">&#160;Casanovo&#160;de&#160;novo&#160;sequences&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra&#160;using&#160;a&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-7)">
-</text><text class="terminal-2658734560-r1" x="0" y="215.2" textLength="976" clip-path="url(#terminal-2658734560-line-8)">&#160;Transformer&#160;model.&#160;Casanovo&#160;currently&#160;supports&#160;mzML,&#160;mzXML,&#160;and&#160;MGF&#160;files&#160;for&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-2658734560-line-8)">
-</text><text class="terminal-2658734560-r1" x="0" y="239.6" textLength="976" clip-path="url(#terminal-2658734560-line-9)">&#160;de&#160;novo&#160;sequencing&#160;and&#160;annotated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;from&#160;MassIVE-KB,&#160;for&#160;</text><text class="terminal-2658734560-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-2658734560-line-9)">
-</text><text class="terminal-2658734560-r1" x="0" y="264" textLength="976" clip-path="url(#terminal-2658734560-line-10)">&#160;training&#160;new&#160;models.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-2658734560-line-10)">
-</text><text class="terminal-2658734560-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-11)">
-</text><text class="terminal-2658734560-r1" x="0" y="312.8" textLength="976" clip-path="url(#terminal-2658734560-line-12)">&#160;Links:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-12)">
-</text><text class="terminal-2658734560-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-2658734560-line-13)">
-</text><text class="terminal-2658734560-r1" x="0" y="361.6" textLength="976" clip-path="url(#terminal-2658734560-line-14)">&#160;&#160;•&#160;Documentation:&#160;https://casanovo.readthedocs.io&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-2658734560-line-14)">
-</text><text class="terminal-2658734560-r1" x="0" y="386" textLength="976" clip-path="url(#terminal-2658734560-line-15)">&#160;&#160;•&#160;Official&#160;code&#160;repository:&#160;https://github.com/Noble-Lab/casanovo&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-2658734560-line-15)">
-</text><text class="terminal-2658734560-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-16)">
-</text><text class="terminal-2658734560-r1" x="0" y="434.8" textLength="976" clip-path="url(#terminal-2658734560-line-17)">&#160;If&#160;you&#160;use&#160;Casanovo&#160;in&#160;your&#160;work,&#160;please&#160;cite:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-17)">
-</text><text class="terminal-2658734560-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-2658734560-line-18)">
-</text><text class="terminal-2658734560-r1" x="0" y="483.6" textLength="976" clip-path="url(#terminal-2658734560-line-19)">&#160;&#160;•&#160;Yilmaz,&#160;M.,&#160;Fondrie,&#160;W.&#160;E.,&#160;Bittremieux,&#160;W.,&#160;Oh,&#160;S.&#160;&amp;&#160;Noble,&#160;W.&#160;S.&#160;De&#160;novo&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-2658734560-line-19)">
-</text><text class="terminal-2658734560-r1" x="0" y="508" textLength="976" clip-path="url(#terminal-2658734560-line-20)">&#160;&#160;&#160;&#160;mass&#160;spectrometry&#160;peptide&#160;sequencing&#160;with&#160;a&#160;transformer&#160;model.&#160;Proceedings&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-2658734560-line-20)">
-</text><text class="terminal-2658734560-r1" x="0" y="532.4" textLength="976" clip-path="url(#terminal-2658734560-line-21)">&#160;&#160;&#160;&#160;of&#160;the&#160;39th&#160;International&#160;Conference&#160;on&#160;Machine&#160;Learning&#160;-&#160;ICML&#160;&#x27;22&#160;(2022)&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-21)">
-</text><text class="terminal-2658734560-r1" x="0" y="556.8" textLength="976" clip-path="url(#terminal-2658734560-line-22)">&#160;&#160;&#160;&#160;doi:10.1101/2022.02.07.479481.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2658734560-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-22)">
-</text><text class="terminal-2658734560-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-2658734560-line-23)">
-</text><text class="terminal-2658734560-r1" x="0" y="605.6" textLength="976" clip-path="url(#terminal-2658734560-line-24)">╭─&#160;Options&#160;────────────────────────────────────────────────────────────────────╮</text><text class="terminal-2658734560-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-2658734560-line-24)">
-</text><text class="terminal-2658734560-r1" x="0" y="630" textLength="976" clip-path="url(#terminal-2658734560-line-25)">│&#160;--help&#160;&#160;-h&#160;&#160;&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-2658734560-line-25)">
-</text><text class="terminal-2658734560-r1" x="0" y="654.4" textLength="976" clip-path="url(#terminal-2658734560-line-26)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2658734560-r1" x="976" y="654.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-26)">
-</text><text class="terminal-2658734560-r1" x="0" y="678.8" textLength="976" clip-path="url(#terminal-2658734560-line-27)">╭─&#160;Commands&#160;───────────────────────────────────────────────────────────────────╮</text><text class="terminal-2658734560-r1" x="976" y="678.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-27)">
-</text><text class="terminal-2658734560-r1" x="0" y="703.2" textLength="976" clip-path="url(#terminal-2658734560-line-28)">│&#160;annotate&#160;&#160;&#160;Annotate&#160;a&#160;given&#160;.mgf&#160;with&#160;candidates&#160;as&#160;selected&#160;by&#160;a&#160;Tide&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="703.2" textLength="12.2" clip-path="url(#terminal-2658734560-line-28)">
-</text><text class="terminal-2658734560-r1" x="0" y="727.6" textLength="976" clip-path="url(#terminal-2658734560-line-29)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;search&#160;for&#160;Casanovo-DB.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="727.6" textLength="12.2" clip-path="url(#terminal-2658734560-line-29)">
-</text><text class="terminal-2658734560-r1" x="0" y="752" textLength="976" clip-path="url(#terminal-2658734560-line-30)">│&#160;configure&#160;&#160;Generate&#160;a&#160;Casanovo&#160;configuration&#160;file&#160;to&#160;customize.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="752" textLength="12.2" clip-path="url(#terminal-2658734560-line-30)">
-</text><text class="terminal-2658734560-r1" x="0" y="776.4" textLength="976" clip-path="url(#terminal-2658734560-line-31)">│&#160;db-search&#160;&#160;Perform&#160;a&#160;search&#160;using&#160;Casanovo-DB.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="776.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-31)">
-</text><text class="terminal-2658734560-r1" x="0" y="800.8" textLength="976" clip-path="url(#terminal-2658734560-line-32)">│&#160;evaluate&#160;&#160;&#160;Evaluate&#160;de&#160;novo&#160;peptide&#160;sequencing&#160;performance.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="800.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-32)">
-</text><text class="terminal-2658734560-r1" x="0" y="825.2" textLength="976" clip-path="url(#terminal-2658734560-line-33)">│&#160;sequence&#160;&#160;&#160;De&#160;novo&#160;sequence&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="825.2" textLength="12.2" clip-path="url(#terminal-2658734560-line-33)">
-</text><text class="terminal-2658734560-r1" x="0" y="849.6" textLength="976" clip-path="url(#terminal-2658734560-line-34)">│&#160;train&#160;&#160;&#160;&#160;&#160;&#160;Train&#160;a&#160;Casanovo&#160;model&#160;on&#160;your&#160;own&#160;data.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="849.6" textLength="12.2" clip-path="url(#terminal-2658734560-line-34)">
-</text><text class="terminal-2658734560-r1" x="0" y="874" textLength="976" clip-path="url(#terminal-2658734560-line-35)">│&#160;version&#160;&#160;&#160;&#160;Get&#160;the&#160;Casanovo&#160;version&#160;information&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2658734560-r1" x="976" y="874" textLength="12.2" clip-path="url(#terminal-2658734560-line-35)">
-</text><text class="terminal-2658734560-r1" x="0" y="898.4" textLength="976" clip-path="url(#terminal-2658734560-line-36)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2658734560-r1" x="976" y="898.4" textLength="12.2" clip-path="url(#terminal-2658734560-line-36)">
-</text><text class="terminal-2658734560-r1" x="976" y="922.8" textLength="12.2" clip-path="url(#terminal-2658734560-line-37)">
+    <g class="terminal-952518540-matrix">
+    <text class="terminal-952518540-r1" x="0" y="20" textLength="207.4" clip-path="url(#terminal-952518540-line-0)">$&#160;casanovo&#160;--help</text><text class="terminal-952518540-r1" x="1915.4" y="20" textLength="12.2" clip-path="url(#terminal-952518540-line-0)">
+</text><text class="terminal-952518540-r1" x="0" y="44.4" textLength="414.8" clip-path="url(#terminal-952518540-line-1)">Traceback&#160;(most&#160;recent&#160;call&#160;last):</text><text class="terminal-952518540-r1" x="1915.4" y="44.4" textLength="12.2" clip-path="url(#terminal-952518540-line-1)">
+</text><text class="terminal-952518540-r1" x="0" y="68.8" textLength="1000.4" clip-path="url(#terminal-952518540-line-2)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/bin/casanovo&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="68.8" textLength="12.2" clip-path="url(#terminal-952518540-line-2)">
+</text><text class="terminal-952518540-r1" x="0" y="93.2" textLength="463.6" clip-path="url(#terminal-952518540-line-3)">&#160;&#160;&#160;&#160;from&#160;casanovo.casanovo&#160;import&#160;main</text><text class="terminal-952518540-r1" x="1915.4" y="93.2" textLength="12.2" clip-path="url(#terminal-952518540-line-3)">
+</text><text class="terminal-952518540-r1" x="0" y="117.6" textLength="1464" clip-path="url(#terminal-952518540-line-4)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/casanovo/casanovo.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="117.6" textLength="12.2" clip-path="url(#terminal-952518540-line-4)">
+</text><text class="terminal-952518540-r1" x="0" y="142" textLength="268.4" clip-path="url(#terminal-952518540-line-5)">&#160;&#160;&#160;&#160;import&#160;depthcharge</text><text class="terminal-952518540-r1" x="1915.4" y="142" textLength="12.2" clip-path="url(#terminal-952518540-line-5)">
+</text><text class="terminal-952518540-r1" x="0" y="166.4" textLength="1488.4" clip-path="url(#terminal-952518540-line-6)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/__init__.py&quot;,&#160;line&#160;3,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="166.4" textLength="12.2" clip-path="url(#terminal-952518540-line-6)">
+</text><text class="terminal-952518540-r1" x="0" y="190.8" textLength="341.6" clip-path="url(#terminal-952518540-line-7)">&#160;&#160;&#160;&#160;from&#160;.&#160;import&#160;components</text><text class="terminal-952518540-r1" x="1915.4" y="190.8" textLength="12.2" clip-path="url(#terminal-952518540-line-7)">
+</text><text class="terminal-952518540-r1" x="0" y="215.2" textLength="1622.6" clip-path="url(#terminal-952518540-line-8)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/__init__.py&quot;,&#160;line&#160;2,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="215.2" textLength="12.2" clip-path="url(#terminal-952518540-line-8)">
+</text><text class="terminal-952518540-r1" x="0" y="239.6" textLength="744.2" clip-path="url(#terminal-952518540-line-9)">&#160;&#160;&#160;&#160;from&#160;.transformers&#160;import&#160;SpectrumEncoder,&#160;PeptideDecoder</text><text class="terminal-952518540-r1" x="1915.4" y="239.6" textLength="12.2" clip-path="url(#terminal-952518540-line-9)">
+</text><text class="terminal-952518540-r1" x="0" y="264" textLength="1671.4" clip-path="url(#terminal-952518540-line-10)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/transformers.py&quot;,&#160;line&#160;8,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="264" textLength="12.2" clip-path="url(#terminal-952518540-line-10)">
+</text><text class="terminal-952518540-r1" x="0" y="288.4" textLength="292.8" clip-path="url(#terminal-952518540-line-11)">&#160;&#160;&#160;&#160;from&#160;..&#160;import&#160;utils</text><text class="terminal-952518540-r1" x="1915.4" y="288.4" textLength="12.2" clip-path="url(#terminal-952518540-line-11)">
+</text><text class="terminal-952518540-r1" x="0" y="312.8" textLength="1451.8" clip-path="url(#terminal-952518540-line-12)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/utils.py&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="312.8" textLength="12.2" clip-path="url(#terminal-952518540-line-12)">
+</text><text class="terminal-952518540-r1" x="0" y="337.2" textLength="878.4" clip-path="url(#terminal-952518540-line-13)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing.event_accumulator&#160;import&#160;(</text><text class="terminal-952518540-r1" x="1915.4" y="337.2" textLength="12.2" clip-path="url(#terminal-952518540-line-13)">
+</text><text class="terminal-952518540-r1" x="0" y="361.6" textLength="1915.4" clip-path="url(#terminal-952518540-line-14)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_accumulator.py&quot;,&#160;line&#160;24,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="361.6" textLength="12.2" clip-path="url(#terminal-952518540-line-14)">
+</text><text class="terminal-952518540-r1" x="0" y="386" textLength="854" clip-path="url(#terminal-952518540-line-15)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing&#160;import&#160;event_file_loader</text><text class="terminal-952518540-r1" x="1915.4" y="386" textLength="12.2" clip-path="url(#terminal-952518540-line-15)">
+</text><text class="terminal-952518540-r1" x="0" y="410.4" textLength="1915.4" clip-path="url(#terminal-952518540-line-16)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_file_loader.py&quot;,&#160;line&#160;21,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="410.4" textLength="12.2" clip-path="url(#terminal-952518540-line-16)">
+</text><text class="terminal-952518540-r1" x="0" y="434.8" textLength="536.8" clip-path="url(#terminal-952518540-line-17)">&#160;&#160;&#160;&#160;from&#160;tensorboard&#160;import&#160;dataclass_compat</text><text class="terminal-952518540-r1" x="1915.4" y="434.8" textLength="12.2" clip-path="url(#terminal-952518540-line-17)">
+</text><text class="terminal-952518540-r1" x="0" y="459.2" textLength="1598.2" clip-path="url(#terminal-952518540-line-18)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/dataclass_compat.py&quot;,&#160;line&#160;33,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="459.2" textLength="12.2" clip-path="url(#terminal-952518540-line-18)">
+</text><text class="terminal-952518540-r1" x="0" y="483.6" textLength="878.4" clip-path="url(#terminal-952518540-line-19)">&#160;&#160;&#160;&#160;from&#160;tensorboard.plugins.hparams&#160;import&#160;metadata&#160;as&#160;hparams_metadata</text><text class="terminal-952518540-r1" x="1915.4" y="483.6" textLength="12.2" clip-path="url(#terminal-952518540-line-19)">
+</text><text class="terminal-952518540-r1" x="0" y="508" textLength="1695.8" clip-path="url(#terminal-952518540-line-20)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/plugins/hparams/metadata.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="508" textLength="12.2" clip-path="url(#terminal-952518540-line-20)">
+</text><text class="terminal-952518540-r1" x="0" y="532.4" textLength="585.6" clip-path="url(#terminal-952518540-line-21)">&#160;&#160;&#160;&#160;NULL_TENSOR&#160;=&#160;tensor_util.make_tensor_proto(</text><text class="terminal-952518540-r1" x="1915.4" y="532.4" textLength="12.2" clip-path="url(#terminal-952518540-line-21)">
+</text><text class="terminal-952518540-r1" x="0" y="556.8" textLength="1720.2" clip-path="url(#terminal-952518540-line-22)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/util/tensor_util.py&quot;,&#160;line&#160;405,&#160;in&#160;make_tensor_proto</text><text class="terminal-952518540-r1" x="1915.4" y="556.8" textLength="12.2" clip-path="url(#terminal-952518540-line-22)">
+</text><text class="terminal-952518540-r1" x="0" y="581.2" textLength="585.6" clip-path="url(#terminal-952518540-line-23)">&#160;&#160;&#160;&#160;numpy_dtype&#160;=&#160;dtypes.as_dtype(nparray.dtype)</text><text class="terminal-952518540-r1" x="1915.4" y="581.2" textLength="12.2" clip-path="url(#terminal-952518540-line-23)">
+</text><text class="terminal-952518540-r1" x="0" y="605.6" textLength="1769" clip-path="url(#terminal-952518540-line-24)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py&quot;,&#160;line&#160;677,&#160;in&#160;as_dtype</text><text class="terminal-952518540-r1" x="1915.4" y="605.6" textLength="12.2" clip-path="url(#terminal-952518540-line-24)">
+</text><text class="terminal-952518540-r1" x="0" y="630" textLength="866.2" clip-path="url(#terminal-952518540-line-25)">&#160;&#160;&#160;&#160;if&#160;type_value.type&#160;==&#160;np.string_&#160;or&#160;type_value.type&#160;==&#160;np.unicode_:</text><text class="terminal-952518540-r1" x="1915.4" y="630" textLength="12.2" clip-path="url(#terminal-952518540-line-25)">
+</text><text class="terminal-952518540-r1" x="0" y="654.4" textLength="1476.2" clip-path="url(#terminal-952518540-line-26)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/numpy/__init__.py&quot;,&#160;line&#160;397,&#160;in&#160;__getattr__</text><text class="terminal-952518540-r1" x="1915.4" y="654.4" textLength="12.2" clip-path="url(#terminal-952518540-line-26)">
+</text><text class="terminal-952518540-r1" x="0" y="678.8" textLength="305" clip-path="url(#terminal-952518540-line-27)">&#160;&#160;&#160;&#160;raise&#160;AttributeError(</text><text class="terminal-952518540-r1" x="1915.4" y="678.8" textLength="12.2" clip-path="url(#terminal-952518540-line-27)">
+</text><text class="terminal-952518540-r1" x="0" y="703.2" textLength="1427.4" clip-path="url(#terminal-952518540-line-28)">AttributeError:&#160;`np.string_`&#160;was&#160;removed&#160;in&#160;the&#160;NumPy&#160;2.0&#160;release.&#160;Use&#160;`np.bytes_`&#160;instead..&#160;Did&#160;you&#160;mean:&#160;&#x27;strings&#x27;?</text><text class="terminal-952518540-r1" x="1915.4" y="703.2" textLength="12.2" clip-path="url(#terminal-952518540-line-28)">
 </text>
     </g>
     </g>
diff --git a/docs/images/sequence-help.svg b/docs/images/sequence-help.svg
index f5799766..b9b96d74 100644
--- a/docs/images/sequence-help.svg
+++ b/docs/images/sequence-help.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 994 684.4" xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 1934 757.5999999999999" xmlns="http://www.w3.org/2000/svg">
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -19,139 +19,151 @@
         font-weight: 700;
     }
 
-    .terminal-2359602172-matrix {
+    .terminal-2412464901-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-2359602172-title {
+    .terminal-2412464901-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-2359602172-r1 { fill: #c5c8c6 }
+    .terminal-2412464901-r1 { fill: #c5c8c6 }
     </style>
 
     <defs>
-    <clipPath id="terminal-2359602172-clip-terminal">
-      <rect x="0" y="0" width="975.0" height="633.4" />
+    <clipPath id="terminal-2412464901-clip-terminal">
+      <rect x="0" y="0" width="1914.3999999999999" height="706.5999999999999" />
     </clipPath>
-    <clipPath id="terminal-2359602172-line-0">
-    <rect x="0" y="1.5" width="976" height="24.65"/>
+    <clipPath id="terminal-2412464901-line-0">
+    <rect x="0" y="1.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-1">
-    <rect x="0" y="25.9" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-1">
+    <rect x="0" y="25.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-2">
-    <rect x="0" y="50.3" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-2">
+    <rect x="0" y="50.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-3">
-    <rect x="0" y="74.7" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-3">
+    <rect x="0" y="74.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-4">
-    <rect x="0" y="99.1" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-4">
+    <rect x="0" y="99.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-5">
-    <rect x="0" y="123.5" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-5">
+    <rect x="0" y="123.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-6">
-    <rect x="0" y="147.9" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-6">
+    <rect x="0" y="147.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-7">
-    <rect x="0" y="172.3" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-7">
+    <rect x="0" y="172.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-8">
-    <rect x="0" y="196.7" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-8">
+    <rect x="0" y="196.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-9">
-    <rect x="0" y="221.1" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-9">
+    <rect x="0" y="221.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-10">
-    <rect x="0" y="245.5" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-10">
+    <rect x="0" y="245.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-11">
-    <rect x="0" y="269.9" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-11">
+    <rect x="0" y="269.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-12">
-    <rect x="0" y="294.3" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-12">
+    <rect x="0" y="294.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-13">
-    <rect x="0" y="318.7" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-13">
+    <rect x="0" y="318.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-14">
-    <rect x="0" y="343.1" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-14">
+    <rect x="0" y="343.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-15">
-    <rect x="0" y="367.5" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-15">
+    <rect x="0" y="367.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-16">
-    <rect x="0" y="391.9" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-16">
+    <rect x="0" y="391.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-17">
-    <rect x="0" y="416.3" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-17">
+    <rect x="0" y="416.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-18">
-    <rect x="0" y="440.7" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-18">
+    <rect x="0" y="440.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-19">
-    <rect x="0" y="465.1" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-19">
+    <rect x="0" y="465.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-20">
-    <rect x="0" y="489.5" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-20">
+    <rect x="0" y="489.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-21">
-    <rect x="0" y="513.9" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-21">
+    <rect x="0" y="513.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-22">
-    <rect x="0" y="538.3" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-22">
+    <rect x="0" y="538.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-23">
-    <rect x="0" y="562.7" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-23">
+    <rect x="0" y="562.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2359602172-line-24">
-    <rect x="0" y="587.1" width="976" height="24.65"/>
+<clipPath id="terminal-2412464901-line-24">
+    <rect x="0" y="587.1" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2412464901-line-25">
+    <rect x="0" y="611.5" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2412464901-line-26">
+    <rect x="0" y="635.9" width="1915.4" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2412464901-line-27">
+    <rect x="0" y="660.3" width="1915.4" height="24.65"/>
             </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="992" height="682.4" rx="8"/>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="1932" height="755.6" rx="8"/>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-2359602172-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-2412464901-clip-terminal)">
     
-    <g class="terminal-2359602172-matrix">
-    <text class="terminal-2359602172-r1" x="0" y="20" textLength="317.2" clip-path="url(#terminal-2359602172-line-0)">$&#160;casanovo&#160;sequence&#160;--help</text><text class="terminal-2359602172-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-2359602172-line-0)">
-</text><text class="terminal-2359602172-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-2359602172-line-1)">
-</text><text class="terminal-2359602172-r1" x="0" y="68.8" textLength="976" clip-path="url(#terminal-2359602172-line-2)">&#160;Usage:&#160;casanovo&#160;sequence&#160;[OPTIONS]&#160;PEAK_PATH...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2359602172-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-2359602172-line-2)">
-</text><text class="terminal-2359602172-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-2359602172-line-3)">
-</text><text class="terminal-2359602172-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-2359602172-line-4)">&#160;De&#160;novo&#160;sequence&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2359602172-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-2359602172-line-4)">
-</text><text class="terminal-2359602172-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-2359602172-line-5)">&#160;PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;mzMl,&#160;mzXML,&#160;or&#160;MGF&#160;files&#160;from&#160;which&#160;to&#160;sequence&#160;</text><text class="terminal-2359602172-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-2359602172-line-5)">
-</text><text class="terminal-2359602172-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-2359602172-line-6)">&#160;peptides.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2359602172-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-2359602172-line-6)">
-</text><text class="terminal-2359602172-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-2359602172-line-7)">
-</text><text class="terminal-2359602172-r1" x="0" y="215.2" textLength="976" clip-path="url(#terminal-2359602172-line-8)">╭─&#160;Arguments&#160;──────────────────────────────────────────────────────────────────╮</text><text class="terminal-2359602172-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-2359602172-line-8)">
-</text><text class="terminal-2359602172-r1" x="0" y="239.6" textLength="976" clip-path="url(#terminal-2359602172-line-9)">│&#160;*&#160;&#160;PEAK_PATH&#160;&#160;&#160;&#160;FILE&#160;&#160;[required]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-2359602172-line-9)">
-</text><text class="terminal-2359602172-r1" x="0" y="264" textLength="976" clip-path="url(#terminal-2359602172-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2359602172-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-2359602172-line-10)">
-</text><text class="terminal-2359602172-r1" x="0" y="288.4" textLength="976" clip-path="url(#terminal-2359602172-line-11)">╭─&#160;Options&#160;────────────────────────────────────────────────────────────────────╮</text><text class="terminal-2359602172-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-2359602172-line-11)">
-</text><text class="terminal-2359602172-r1" x="0" y="312.8" textLength="976" clip-path="url(#terminal-2359602172-line-12)">│&#160;--model&#160;&#160;&#160;&#160;&#160;&#160;-m&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;model&#160;weights&#160;(.ckpt&#160;file).&#160;│</text><text class="terminal-2359602172-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-2359602172-line-12)">
-</text><text class="terminal-2359602172-r1" x="0" y="337.2" textLength="976" clip-path="url(#terminal-2359602172-line-13)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;If&#160;not&#160;provided,&#160;Casanovo&#160;will&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-2359602172-line-13)">
-</text><text class="terminal-2359602172-r1" x="0" y="361.6" textLength="976" clip-path="url(#terminal-2359602172-line-14)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;try&#160;to&#160;download&#160;the&#160;latest&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-2359602172-line-14)">
-</text><text class="terminal-2359602172-r1" x="0" y="386" textLength="976" clip-path="url(#terminal-2359602172-line-15)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;release.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-2359602172-line-15)">
-</text><text class="terminal-2359602172-r1" x="0" y="410.4" textLength="976" clip-path="url(#terminal-2359602172-line-16)">│&#160;--output&#160;&#160;&#160;&#160;&#160;-o&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;mzTab&#160;file&#160;to&#160;which&#160;results&#160;│</text><text class="terminal-2359602172-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-2359602172-line-16)">
-</text><text class="terminal-2359602172-r1" x="0" y="434.8" textLength="976" clip-path="url(#terminal-2359602172-line-17)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;will&#160;be&#160;written.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-2359602172-line-17)">
-</text><text class="terminal-2359602172-r1" x="0" y="459.2" textLength="976" clip-path="url(#terminal-2359602172-line-18)">│&#160;--config&#160;&#160;&#160;&#160;&#160;-c&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;YAML&#160;configuration&#160;file&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-2359602172-line-18)">
-</text><text class="terminal-2359602172-r1" x="0" y="483.6" textLength="976" clip-path="url(#terminal-2359602172-line-19)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;overriding&#160;the&#160;default&#160;options.&#160;│</text><text class="terminal-2359602172-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-2359602172-line-19)">
-</text><text class="terminal-2359602172-r1" x="0" y="508" textLength="976" clip-path="url(#terminal-2359602172-line-20)">│&#160;--verbosity&#160;&#160;-v&#160;&#160;[debug|info|warning|error]&#160;&#160;Set&#160;the&#160;verbosity&#160;of&#160;console&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-2359602172-line-20)">
-</text><text class="terminal-2359602172-r1" x="0" y="532.4" textLength="976" clip-path="url(#terminal-2359602172-line-21)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;logging&#160;messages.&#160;Log&#160;files&#160;are&#160;│</text><text class="terminal-2359602172-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-2359602172-line-21)">
-</text><text class="terminal-2359602172-r1" x="0" y="556.8" textLength="976" clip-path="url(#terminal-2359602172-line-22)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;always&#160;set&#160;to&#160;&#x27;debug&#x27;.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-2359602172-line-22)">
-</text><text class="terminal-2359602172-r1" x="0" y="581.2" textLength="976" clip-path="url(#terminal-2359602172-line-23)">│&#160;--help&#160;&#160;&#160;&#160;&#160;&#160;&#160;-h&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2359602172-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-2359602172-line-23)">
-</text><text class="terminal-2359602172-r1" x="0" y="605.6" textLength="976" clip-path="url(#terminal-2359602172-line-24)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2359602172-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-2359602172-line-24)">
-</text><text class="terminal-2359602172-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-2359602172-line-25)">
+    <g class="terminal-2412464901-matrix">
+    <text class="terminal-2412464901-r1" x="0" y="20" textLength="317.2" clip-path="url(#terminal-2412464901-line-0)">$&#160;casanovo&#160;sequence&#160;--help</text><text class="terminal-2412464901-r1" x="1915.4" y="20" textLength="12.2" clip-path="url(#terminal-2412464901-line-0)">
+</text><text class="terminal-2412464901-r1" x="0" y="44.4" textLength="414.8" clip-path="url(#terminal-2412464901-line-1)">Traceback&#160;(most&#160;recent&#160;call&#160;last):</text><text class="terminal-2412464901-r1" x="1915.4" y="44.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-1)">
+</text><text class="terminal-2412464901-r1" x="0" y="68.8" textLength="1000.4" clip-path="url(#terminal-2412464901-line-2)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/bin/casanovo&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="68.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-2)">
+</text><text class="terminal-2412464901-r1" x="0" y="93.2" textLength="463.6" clip-path="url(#terminal-2412464901-line-3)">&#160;&#160;&#160;&#160;from&#160;casanovo.casanovo&#160;import&#160;main</text><text class="terminal-2412464901-r1" x="1915.4" y="93.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-3)">
+</text><text class="terminal-2412464901-r1" x="0" y="117.6" textLength="1464" clip-path="url(#terminal-2412464901-line-4)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/casanovo/casanovo.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="117.6" textLength="12.2" clip-path="url(#terminal-2412464901-line-4)">
+</text><text class="terminal-2412464901-r1" x="0" y="142" textLength="268.4" clip-path="url(#terminal-2412464901-line-5)">&#160;&#160;&#160;&#160;import&#160;depthcharge</text><text class="terminal-2412464901-r1" x="1915.4" y="142" textLength="12.2" clip-path="url(#terminal-2412464901-line-5)">
+</text><text class="terminal-2412464901-r1" x="0" y="166.4" textLength="1488.4" clip-path="url(#terminal-2412464901-line-6)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/__init__.py&quot;,&#160;line&#160;3,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="166.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-6)">
+</text><text class="terminal-2412464901-r1" x="0" y="190.8" textLength="341.6" clip-path="url(#terminal-2412464901-line-7)">&#160;&#160;&#160;&#160;from&#160;.&#160;import&#160;components</text><text class="terminal-2412464901-r1" x="1915.4" y="190.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-7)">
+</text><text class="terminal-2412464901-r1" x="0" y="215.2" textLength="1622.6" clip-path="url(#terminal-2412464901-line-8)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/__init__.py&quot;,&#160;line&#160;2,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="215.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-8)">
+</text><text class="terminal-2412464901-r1" x="0" y="239.6" textLength="744.2" clip-path="url(#terminal-2412464901-line-9)">&#160;&#160;&#160;&#160;from&#160;.transformers&#160;import&#160;SpectrumEncoder,&#160;PeptideDecoder</text><text class="terminal-2412464901-r1" x="1915.4" y="239.6" textLength="12.2" clip-path="url(#terminal-2412464901-line-9)">
+</text><text class="terminal-2412464901-r1" x="0" y="264" textLength="1671.4" clip-path="url(#terminal-2412464901-line-10)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/transformers.py&quot;,&#160;line&#160;8,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="264" textLength="12.2" clip-path="url(#terminal-2412464901-line-10)">
+</text><text class="terminal-2412464901-r1" x="0" y="288.4" textLength="292.8" clip-path="url(#terminal-2412464901-line-11)">&#160;&#160;&#160;&#160;from&#160;..&#160;import&#160;utils</text><text class="terminal-2412464901-r1" x="1915.4" y="288.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-11)">
+</text><text class="terminal-2412464901-r1" x="0" y="312.8" textLength="1451.8" clip-path="url(#terminal-2412464901-line-12)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/utils.py&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="312.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-12)">
+</text><text class="terminal-2412464901-r1" x="0" y="337.2" textLength="878.4" clip-path="url(#terminal-2412464901-line-13)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing.event_accumulator&#160;import&#160;(</text><text class="terminal-2412464901-r1" x="1915.4" y="337.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-13)">
+</text><text class="terminal-2412464901-r1" x="0" y="361.6" textLength="1915.4" clip-path="url(#terminal-2412464901-line-14)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_accumulator.py&quot;,&#160;line&#160;24,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="361.6" textLength="12.2" clip-path="url(#terminal-2412464901-line-14)">
+</text><text class="terminal-2412464901-r1" x="0" y="386" textLength="854" clip-path="url(#terminal-2412464901-line-15)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing&#160;import&#160;event_file_loader</text><text class="terminal-2412464901-r1" x="1915.4" y="386" textLength="12.2" clip-path="url(#terminal-2412464901-line-15)">
+</text><text class="terminal-2412464901-r1" x="0" y="410.4" textLength="1915.4" clip-path="url(#terminal-2412464901-line-16)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_file_loader.py&quot;,&#160;line&#160;21,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="410.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-16)">
+</text><text class="terminal-2412464901-r1" x="0" y="434.8" textLength="536.8" clip-path="url(#terminal-2412464901-line-17)">&#160;&#160;&#160;&#160;from&#160;tensorboard&#160;import&#160;dataclass_compat</text><text class="terminal-2412464901-r1" x="1915.4" y="434.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-17)">
+</text><text class="terminal-2412464901-r1" x="0" y="459.2" textLength="1598.2" clip-path="url(#terminal-2412464901-line-18)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/dataclass_compat.py&quot;,&#160;line&#160;33,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="459.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-18)">
+</text><text class="terminal-2412464901-r1" x="0" y="483.6" textLength="878.4" clip-path="url(#terminal-2412464901-line-19)">&#160;&#160;&#160;&#160;from&#160;tensorboard.plugins.hparams&#160;import&#160;metadata&#160;as&#160;hparams_metadata</text><text class="terminal-2412464901-r1" x="1915.4" y="483.6" textLength="12.2" clip-path="url(#terminal-2412464901-line-19)">
+</text><text class="terminal-2412464901-r1" x="0" y="508" textLength="1695.8" clip-path="url(#terminal-2412464901-line-20)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/plugins/hparams/metadata.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="508" textLength="12.2" clip-path="url(#terminal-2412464901-line-20)">
+</text><text class="terminal-2412464901-r1" x="0" y="532.4" textLength="585.6" clip-path="url(#terminal-2412464901-line-21)">&#160;&#160;&#160;&#160;NULL_TENSOR&#160;=&#160;tensor_util.make_tensor_proto(</text><text class="terminal-2412464901-r1" x="1915.4" y="532.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-21)">
+</text><text class="terminal-2412464901-r1" x="0" y="556.8" textLength="1720.2" clip-path="url(#terminal-2412464901-line-22)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/util/tensor_util.py&quot;,&#160;line&#160;405,&#160;in&#160;make_tensor_proto</text><text class="terminal-2412464901-r1" x="1915.4" y="556.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-22)">
+</text><text class="terminal-2412464901-r1" x="0" y="581.2" textLength="585.6" clip-path="url(#terminal-2412464901-line-23)">&#160;&#160;&#160;&#160;numpy_dtype&#160;=&#160;dtypes.as_dtype(nparray.dtype)</text><text class="terminal-2412464901-r1" x="1915.4" y="581.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-23)">
+</text><text class="terminal-2412464901-r1" x="0" y="605.6" textLength="1769" clip-path="url(#terminal-2412464901-line-24)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py&quot;,&#160;line&#160;677,&#160;in&#160;as_dtype</text><text class="terminal-2412464901-r1" x="1915.4" y="605.6" textLength="12.2" clip-path="url(#terminal-2412464901-line-24)">
+</text><text class="terminal-2412464901-r1" x="0" y="630" textLength="866.2" clip-path="url(#terminal-2412464901-line-25)">&#160;&#160;&#160;&#160;if&#160;type_value.type&#160;==&#160;np.string_&#160;or&#160;type_value.type&#160;==&#160;np.unicode_:</text><text class="terminal-2412464901-r1" x="1915.4" y="630" textLength="12.2" clip-path="url(#terminal-2412464901-line-25)">
+</text><text class="terminal-2412464901-r1" x="0" y="654.4" textLength="1476.2" clip-path="url(#terminal-2412464901-line-26)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/numpy/__init__.py&quot;,&#160;line&#160;397,&#160;in&#160;__getattr__</text><text class="terminal-2412464901-r1" x="1915.4" y="654.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-26)">
+</text><text class="terminal-2412464901-r1" x="0" y="678.8" textLength="305" clip-path="url(#terminal-2412464901-line-27)">&#160;&#160;&#160;&#160;raise&#160;AttributeError(</text><text class="terminal-2412464901-r1" x="1915.4" y="678.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-27)">
+</text><text class="terminal-2412464901-r1" x="0" y="703.2" textLength="1427.4" clip-path="url(#terminal-2412464901-line-28)">AttributeError:&#160;`np.string_`&#160;was&#160;removed&#160;in&#160;the&#160;NumPy&#160;2.0&#160;release.&#160;Use&#160;`np.bytes_`&#160;instead..&#160;Did&#160;you&#160;mean:&#160;&#x27;strings&#x27;?</text><text class="terminal-2412464901-r1" x="1915.4" y="703.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-28)">
 </text>
     </g>
     </g>
diff --git a/docs/images/train-help.svg b/docs/images/train-help.svg
index fccd4140..a71b8915 100644
--- a/docs/images/train-help.svg
+++ b/docs/images/train-help.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 994 1001.5999999999999" xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 1934 757.5999999999999" xmlns="http://www.w3.org/2000/svg">
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -19,191 +19,151 @@
         font-weight: 700;
     }
 
-    .terminal-2430201580-matrix {
+    .terminal-844581322-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-2430201580-title {
+    .terminal-844581322-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-2430201580-r1 { fill: #c5c8c6 }
+    .terminal-844581322-r1 { fill: #c5c8c6 }
     </style>
 
     <defs>
-    <clipPath id="terminal-2430201580-clip-terminal">
-      <rect x="0" y="0" width="975.0" height="950.5999999999999" />
+    <clipPath id="terminal-844581322-clip-terminal">
+      <rect x="0" y="0" width="1914.3999999999999" height="706.5999999999999" />
     </clipPath>
-    <clipPath id="terminal-2430201580-line-0">
-    <rect x="0" y="1.5" width="976" height="24.65"/>
+    <clipPath id="terminal-844581322-line-0">
+    <rect x="0" y="1.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-1">
-    <rect x="0" y="25.9" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-1">
+    <rect x="0" y="25.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-2">
-    <rect x="0" y="50.3" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-2">
+    <rect x="0" y="50.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-3">
-    <rect x="0" y="74.7" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-3">
+    <rect x="0" y="74.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-4">
-    <rect x="0" y="99.1" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-4">
+    <rect x="0" y="99.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-5">
-    <rect x="0" y="123.5" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-5">
+    <rect x="0" y="123.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-6">
-    <rect x="0" y="147.9" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-6">
+    <rect x="0" y="147.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-7">
-    <rect x="0" y="172.3" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-7">
+    <rect x="0" y="172.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-8">
-    <rect x="0" y="196.7" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-8">
+    <rect x="0" y="196.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-9">
-    <rect x="0" y="221.1" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-9">
+    <rect x="0" y="221.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-10">
-    <rect x="0" y="245.5" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-10">
+    <rect x="0" y="245.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-11">
-    <rect x="0" y="269.9" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-11">
+    <rect x="0" y="269.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-12">
-    <rect x="0" y="294.3" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-12">
+    <rect x="0" y="294.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-13">
-    <rect x="0" y="318.7" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-13">
+    <rect x="0" y="318.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-14">
-    <rect x="0" y="343.1" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-14">
+    <rect x="0" y="343.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-15">
-    <rect x="0" y="367.5" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-15">
+    <rect x="0" y="367.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-16">
-    <rect x="0" y="391.9" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-16">
+    <rect x="0" y="391.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-17">
-    <rect x="0" y="416.3" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-17">
+    <rect x="0" y="416.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-18">
-    <rect x="0" y="440.7" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-18">
+    <rect x="0" y="440.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-19">
-    <rect x="0" y="465.1" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-19">
+    <rect x="0" y="465.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-20">
-    <rect x="0" y="489.5" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-20">
+    <rect x="0" y="489.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-21">
-    <rect x="0" y="513.9" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-21">
+    <rect x="0" y="513.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-22">
-    <rect x="0" y="538.3" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-22">
+    <rect x="0" y="538.3" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-23">
-    <rect x="0" y="562.7" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-23">
+    <rect x="0" y="562.7" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-24">
-    <rect x="0" y="587.1" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-24">
+    <rect x="0" y="587.1" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-25">
-    <rect x="0" y="611.5" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-25">
+    <rect x="0" y="611.5" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-26">
-    <rect x="0" y="635.9" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-26">
+    <rect x="0" y="635.9" width="1915.4" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2430201580-line-27">
-    <rect x="0" y="660.3" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2430201580-line-28">
-    <rect x="0" y="684.7" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2430201580-line-29">
-    <rect x="0" y="709.1" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2430201580-line-30">
-    <rect x="0" y="733.5" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2430201580-line-31">
-    <rect x="0" y="757.9" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2430201580-line-32">
-    <rect x="0" y="782.3" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2430201580-line-33">
-    <rect x="0" y="806.7" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2430201580-line-34">
-    <rect x="0" y="831.1" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2430201580-line-35">
-    <rect x="0" y="855.5" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2430201580-line-36">
-    <rect x="0" y="879.9" width="976" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2430201580-line-37">
-    <rect x="0" y="904.3" width="976" height="24.65"/>
+<clipPath id="terminal-844581322-line-27">
+    <rect x="0" y="660.3" width="1915.4" height="24.65"/>
             </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="992" height="999.6" rx="8"/>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="1932" height="755.6" rx="8"/>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-2430201580-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-844581322-clip-terminal)">
     
-    <g class="terminal-2430201580-matrix">
-    <text class="terminal-2430201580-r1" x="0" y="20" textLength="280.6" clip-path="url(#terminal-2430201580-line-0)">$&#160;casanovo&#160;train&#160;--help</text><text class="terminal-2430201580-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-2430201580-line-0)">
-</text><text class="terminal-2430201580-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-1)">
-</text><text class="terminal-2430201580-r1" x="0" y="68.8" textLength="976" clip-path="url(#terminal-2430201580-line-2)">&#160;Usage:&#160;casanovo&#160;train&#160;[OPTIONS]&#160;TRAIN_PEAK_PATH...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2430201580-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-2)">
-</text><text class="terminal-2430201580-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-3)">
-</text><text class="terminal-2430201580-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-2430201580-line-4)">&#160;Train&#160;a&#160;Casanovo&#160;model&#160;on&#160;your&#160;own&#160;data.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2430201580-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-2430201580-line-4)">
-</text><text class="terminal-2430201580-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-2430201580-line-5)">&#160;TRAIN_PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;annoated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;provided&#160;</text><text class="terminal-2430201580-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-2430201580-line-5)">
-</text><text class="terminal-2430201580-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-2430201580-line-6)">&#160;by&#160;MassIVE-KB,&#160;from&#160;which&#160;to&#160;train&#160;a&#160;new&#160;Casnovo&#160;model.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2430201580-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-6)">
-</text><text class="terminal-2430201580-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-7)">
-</text><text class="terminal-2430201580-r1" x="0" y="215.2" textLength="976" clip-path="url(#terminal-2430201580-line-8)">╭─&#160;Arguments&#160;──────────────────────────────────────────────────────────────────╮</text><text class="terminal-2430201580-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-8)">
-</text><text class="terminal-2430201580-r1" x="0" y="239.6" textLength="976" clip-path="url(#terminal-2430201580-line-9)">│&#160;*&#160;&#160;TRAIN_PEAK_PATH&#160;&#160;&#160;&#160;FILE&#160;&#160;[required]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-2430201580-line-9)">
-</text><text class="terminal-2430201580-r1" x="0" y="264" textLength="976" clip-path="url(#terminal-2430201580-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2430201580-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-2430201580-line-10)">
-</text><text class="terminal-2430201580-r1" x="0" y="288.4" textLength="976" clip-path="url(#terminal-2430201580-line-11)">╭─&#160;Options&#160;────────────────────────────────────────────────────────────────────╮</text><text class="terminal-2430201580-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-11)">
-</text><text class="terminal-2430201580-r1" x="0" y="312.8" textLength="976" clip-path="url(#terminal-2430201580-line-12)">│&#160;*&#160;&#160;--validation_peak_pa…&#160;&#160;-p&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;An&#160;annotated&#160;MGF&#160;file&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-12)">
-</text><text class="terminal-2430201580-r1" x="0" y="337.2" textLength="976" clip-path="url(#terminal-2430201580-line-13)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;for&#160;validation,&#160;like&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-13)">
-</text><text class="terminal-2430201580-r1" x="0" y="361.6" textLength="976" clip-path="url(#terminal-2430201580-line-14)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;from&#160;MassIVE-KB.&#160;Use&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-2430201580-line-14)">
-</text><text class="terminal-2430201580-r1" x="0" y="386" textLength="976" clip-path="url(#terminal-2430201580-line-15)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;this&#160;option&#160;multiple&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-2430201580-line-15)">
-</text><text class="terminal-2430201580-r1" x="0" y="410.4" textLength="976" clip-path="url(#terminal-2430201580-line-16)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;times&#160;to&#160;specify&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-16)">
-</text><text class="terminal-2430201580-r1" x="0" y="434.8" textLength="976" clip-path="url(#terminal-2430201580-line-17)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;multiple&#160;files.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-17)">
-</text><text class="terminal-2430201580-r1" x="0" y="459.2" textLength="976" clip-path="url(#terminal-2430201580-line-18)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;[required]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-18)">
-</text><text class="terminal-2430201580-r1" x="0" y="483.6" textLength="976" clip-path="url(#terminal-2430201580-line-19)">│&#160;&#160;&#160;&#160;--model&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;-m&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;model&#160;weights&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-2430201580-line-19)">
-</text><text class="terminal-2430201580-r1" x="0" y="508" textLength="976" clip-path="url(#terminal-2430201580-line-20)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;(.ckpt&#160;file).&#160;If&#160;not&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-2430201580-line-20)">
-</text><text class="terminal-2430201580-r1" x="0" y="532.4" textLength="976" clip-path="url(#terminal-2430201580-line-21)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;provided,&#160;Casanovo&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-21)">
-</text><text class="terminal-2430201580-r1" x="0" y="556.8" textLength="976" clip-path="url(#terminal-2430201580-line-22)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;will&#160;try&#160;to&#160;download&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-22)">
-</text><text class="terminal-2430201580-r1" x="0" y="581.2" textLength="976" clip-path="url(#terminal-2430201580-line-23)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;the&#160;latest&#160;release.&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-23)">
-</text><text class="terminal-2430201580-r1" x="0" y="605.6" textLength="976" clip-path="url(#terminal-2430201580-line-24)">│&#160;&#160;&#160;&#160;--output&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;-o&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;mzTab&#160;file&#160;to&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-2430201580-line-24)">
-</text><text class="terminal-2430201580-r1" x="0" y="630" textLength="976" clip-path="url(#terminal-2430201580-line-25)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;which&#160;results&#160;will&#160;be&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-2430201580-line-25)">
-</text><text class="terminal-2430201580-r1" x="0" y="654.4" textLength="976" clip-path="url(#terminal-2430201580-line-26)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;written.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="654.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-26)">
-</text><text class="terminal-2430201580-r1" x="0" y="678.8" textLength="976" clip-path="url(#terminal-2430201580-line-27)">│&#160;&#160;&#160;&#160;--config&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;-c&#160;&#160;FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;The&#160;YAML&#160;configuration&#160;│</text><text class="terminal-2430201580-r1" x="976" y="678.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-27)">
-</text><text class="terminal-2430201580-r1" x="0" y="703.2" textLength="976" clip-path="url(#terminal-2430201580-line-28)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;file&#160;overriding&#160;the&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="703.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-28)">
-</text><text class="terminal-2430201580-r1" x="0" y="727.6" textLength="976" clip-path="url(#terminal-2430201580-line-29)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;default&#160;options.&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="727.6" textLength="12.2" clip-path="url(#terminal-2430201580-line-29)">
-</text><text class="terminal-2430201580-r1" x="0" y="752" textLength="976" clip-path="url(#terminal-2430201580-line-30)">│&#160;&#160;&#160;&#160;--verbosity&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;-v&#160;&#160;[debug|info|warning|er&#160;&#160;Set&#160;the&#160;verbosity&#160;of&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="752" textLength="12.2" clip-path="url(#terminal-2430201580-line-30)">
-</text><text class="terminal-2430201580-r1" x="0" y="776.4" textLength="976" clip-path="url(#terminal-2430201580-line-31)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;ror]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;console&#160;logging&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="776.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-31)">
-</text><text class="terminal-2430201580-r1" x="0" y="800.8" textLength="976" clip-path="url(#terminal-2430201580-line-32)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;messages.&#160;Log&#160;files&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="800.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-32)">
-</text><text class="terminal-2430201580-r1" x="0" y="825.2" textLength="976" clip-path="url(#terminal-2430201580-line-33)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;are&#160;always&#160;set&#160;to&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="825.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-33)">
-</text><text class="terminal-2430201580-r1" x="0" y="849.6" textLength="976" clip-path="url(#terminal-2430201580-line-34)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#x27;debug&#x27;.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="849.6" textLength="12.2" clip-path="url(#terminal-2430201580-line-34)">
-</text><text class="terminal-2430201580-r1" x="0" y="874" textLength="976" clip-path="url(#terminal-2430201580-line-35)">│&#160;&#160;&#160;&#160;--help&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;-h&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;Show&#160;this&#160;message&#160;and&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="874" textLength="12.2" clip-path="url(#terminal-2430201580-line-35)">
-</text><text class="terminal-2430201580-r1" x="0" y="898.4" textLength="976" clip-path="url(#terminal-2430201580-line-36)">│&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;│</text><text class="terminal-2430201580-r1" x="976" y="898.4" textLength="12.2" clip-path="url(#terminal-2430201580-line-36)">
-</text><text class="terminal-2430201580-r1" x="0" y="922.8" textLength="976" clip-path="url(#terminal-2430201580-line-37)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2430201580-r1" x="976" y="922.8" textLength="12.2" clip-path="url(#terminal-2430201580-line-37)">
-</text><text class="terminal-2430201580-r1" x="976" y="947.2" textLength="12.2" clip-path="url(#terminal-2430201580-line-38)">
+    <g class="terminal-844581322-matrix">
+    <text class="terminal-844581322-r1" x="0" y="20" textLength="280.6" clip-path="url(#terminal-844581322-line-0)">$&#160;casanovo&#160;train&#160;--help</text><text class="terminal-844581322-r1" x="1915.4" y="20" textLength="12.2" clip-path="url(#terminal-844581322-line-0)">
+</text><text class="terminal-844581322-r1" x="0" y="44.4" textLength="414.8" clip-path="url(#terminal-844581322-line-1)">Traceback&#160;(most&#160;recent&#160;call&#160;last):</text><text class="terminal-844581322-r1" x="1915.4" y="44.4" textLength="12.2" clip-path="url(#terminal-844581322-line-1)">
+</text><text class="terminal-844581322-r1" x="0" y="68.8" textLength="1000.4" clip-path="url(#terminal-844581322-line-2)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/bin/casanovo&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="68.8" textLength="12.2" clip-path="url(#terminal-844581322-line-2)">
+</text><text class="terminal-844581322-r1" x="0" y="93.2" textLength="463.6" clip-path="url(#terminal-844581322-line-3)">&#160;&#160;&#160;&#160;from&#160;casanovo.casanovo&#160;import&#160;main</text><text class="terminal-844581322-r1" x="1915.4" y="93.2" textLength="12.2" clip-path="url(#terminal-844581322-line-3)">
+</text><text class="terminal-844581322-r1" x="0" y="117.6" textLength="1464" clip-path="url(#terminal-844581322-line-4)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/casanovo/casanovo.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="117.6" textLength="12.2" clip-path="url(#terminal-844581322-line-4)">
+</text><text class="terminal-844581322-r1" x="0" y="142" textLength="268.4" clip-path="url(#terminal-844581322-line-5)">&#160;&#160;&#160;&#160;import&#160;depthcharge</text><text class="terminal-844581322-r1" x="1915.4" y="142" textLength="12.2" clip-path="url(#terminal-844581322-line-5)">
+</text><text class="terminal-844581322-r1" x="0" y="166.4" textLength="1488.4" clip-path="url(#terminal-844581322-line-6)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/__init__.py&quot;,&#160;line&#160;3,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="166.4" textLength="12.2" clip-path="url(#terminal-844581322-line-6)">
+</text><text class="terminal-844581322-r1" x="0" y="190.8" textLength="341.6" clip-path="url(#terminal-844581322-line-7)">&#160;&#160;&#160;&#160;from&#160;.&#160;import&#160;components</text><text class="terminal-844581322-r1" x="1915.4" y="190.8" textLength="12.2" clip-path="url(#terminal-844581322-line-7)">
+</text><text class="terminal-844581322-r1" x="0" y="215.2" textLength="1622.6" clip-path="url(#terminal-844581322-line-8)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/__init__.py&quot;,&#160;line&#160;2,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="215.2" textLength="12.2" clip-path="url(#terminal-844581322-line-8)">
+</text><text class="terminal-844581322-r1" x="0" y="239.6" textLength="744.2" clip-path="url(#terminal-844581322-line-9)">&#160;&#160;&#160;&#160;from&#160;.transformers&#160;import&#160;SpectrumEncoder,&#160;PeptideDecoder</text><text class="terminal-844581322-r1" x="1915.4" y="239.6" textLength="12.2" clip-path="url(#terminal-844581322-line-9)">
+</text><text class="terminal-844581322-r1" x="0" y="264" textLength="1671.4" clip-path="url(#terminal-844581322-line-10)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/transformers.py&quot;,&#160;line&#160;8,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="264" textLength="12.2" clip-path="url(#terminal-844581322-line-10)">
+</text><text class="terminal-844581322-r1" x="0" y="288.4" textLength="292.8" clip-path="url(#terminal-844581322-line-11)">&#160;&#160;&#160;&#160;from&#160;..&#160;import&#160;utils</text><text class="terminal-844581322-r1" x="1915.4" y="288.4" textLength="12.2" clip-path="url(#terminal-844581322-line-11)">
+</text><text class="terminal-844581322-r1" x="0" y="312.8" textLength="1451.8" clip-path="url(#terminal-844581322-line-12)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/utils.py&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="312.8" textLength="12.2" clip-path="url(#terminal-844581322-line-12)">
+</text><text class="terminal-844581322-r1" x="0" y="337.2" textLength="878.4" clip-path="url(#terminal-844581322-line-13)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing.event_accumulator&#160;import&#160;(</text><text class="terminal-844581322-r1" x="1915.4" y="337.2" textLength="12.2" clip-path="url(#terminal-844581322-line-13)">
+</text><text class="terminal-844581322-r1" x="0" y="361.6" textLength="1915.4" clip-path="url(#terminal-844581322-line-14)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_accumulator.py&quot;,&#160;line&#160;24,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="361.6" textLength="12.2" clip-path="url(#terminal-844581322-line-14)">
+</text><text class="terminal-844581322-r1" x="0" y="386" textLength="854" clip-path="url(#terminal-844581322-line-15)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing&#160;import&#160;event_file_loader</text><text class="terminal-844581322-r1" x="1915.4" y="386" textLength="12.2" clip-path="url(#terminal-844581322-line-15)">
+</text><text class="terminal-844581322-r1" x="0" y="410.4" textLength="1915.4" clip-path="url(#terminal-844581322-line-16)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_file_loader.py&quot;,&#160;line&#160;21,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="410.4" textLength="12.2" clip-path="url(#terminal-844581322-line-16)">
+</text><text class="terminal-844581322-r1" x="0" y="434.8" textLength="536.8" clip-path="url(#terminal-844581322-line-17)">&#160;&#160;&#160;&#160;from&#160;tensorboard&#160;import&#160;dataclass_compat</text><text class="terminal-844581322-r1" x="1915.4" y="434.8" textLength="12.2" clip-path="url(#terminal-844581322-line-17)">
+</text><text class="terminal-844581322-r1" x="0" y="459.2" textLength="1598.2" clip-path="url(#terminal-844581322-line-18)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/dataclass_compat.py&quot;,&#160;line&#160;33,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="459.2" textLength="12.2" clip-path="url(#terminal-844581322-line-18)">
+</text><text class="terminal-844581322-r1" x="0" y="483.6" textLength="878.4" clip-path="url(#terminal-844581322-line-19)">&#160;&#160;&#160;&#160;from&#160;tensorboard.plugins.hparams&#160;import&#160;metadata&#160;as&#160;hparams_metadata</text><text class="terminal-844581322-r1" x="1915.4" y="483.6" textLength="12.2" clip-path="url(#terminal-844581322-line-19)">
+</text><text class="terminal-844581322-r1" x="0" y="508" textLength="1695.8" clip-path="url(#terminal-844581322-line-20)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/plugins/hparams/metadata.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="508" textLength="12.2" clip-path="url(#terminal-844581322-line-20)">
+</text><text class="terminal-844581322-r1" x="0" y="532.4" textLength="585.6" clip-path="url(#terminal-844581322-line-21)">&#160;&#160;&#160;&#160;NULL_TENSOR&#160;=&#160;tensor_util.make_tensor_proto(</text><text class="terminal-844581322-r1" x="1915.4" y="532.4" textLength="12.2" clip-path="url(#terminal-844581322-line-21)">
+</text><text class="terminal-844581322-r1" x="0" y="556.8" textLength="1720.2" clip-path="url(#terminal-844581322-line-22)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/util/tensor_util.py&quot;,&#160;line&#160;405,&#160;in&#160;make_tensor_proto</text><text class="terminal-844581322-r1" x="1915.4" y="556.8" textLength="12.2" clip-path="url(#terminal-844581322-line-22)">
+</text><text class="terminal-844581322-r1" x="0" y="581.2" textLength="585.6" clip-path="url(#terminal-844581322-line-23)">&#160;&#160;&#160;&#160;numpy_dtype&#160;=&#160;dtypes.as_dtype(nparray.dtype)</text><text class="terminal-844581322-r1" x="1915.4" y="581.2" textLength="12.2" clip-path="url(#terminal-844581322-line-23)">
+</text><text class="terminal-844581322-r1" x="0" y="605.6" textLength="1769" clip-path="url(#terminal-844581322-line-24)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py&quot;,&#160;line&#160;677,&#160;in&#160;as_dtype</text><text class="terminal-844581322-r1" x="1915.4" y="605.6" textLength="12.2" clip-path="url(#terminal-844581322-line-24)">
+</text><text class="terminal-844581322-r1" x="0" y="630" textLength="866.2" clip-path="url(#terminal-844581322-line-25)">&#160;&#160;&#160;&#160;if&#160;type_value.type&#160;==&#160;np.string_&#160;or&#160;type_value.type&#160;==&#160;np.unicode_:</text><text class="terminal-844581322-r1" x="1915.4" y="630" textLength="12.2" clip-path="url(#terminal-844581322-line-25)">
+</text><text class="terminal-844581322-r1" x="0" y="654.4" textLength="1476.2" clip-path="url(#terminal-844581322-line-26)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/numpy/__init__.py&quot;,&#160;line&#160;397,&#160;in&#160;__getattr__</text><text class="terminal-844581322-r1" x="1915.4" y="654.4" textLength="12.2" clip-path="url(#terminal-844581322-line-26)">
+</text><text class="terminal-844581322-r1" x="0" y="678.8" textLength="305" clip-path="url(#terminal-844581322-line-27)">&#160;&#160;&#160;&#160;raise&#160;AttributeError(</text><text class="terminal-844581322-r1" x="1915.4" y="678.8" textLength="12.2" clip-path="url(#terminal-844581322-line-27)">
+</text><text class="terminal-844581322-r1" x="0" y="703.2" textLength="1427.4" clip-path="url(#terminal-844581322-line-28)">AttributeError:&#160;`np.string_`&#160;was&#160;removed&#160;in&#160;the&#160;NumPy&#160;2.0&#160;release.&#160;Use&#160;`np.bytes_`&#160;instead..&#160;Did&#160;you&#160;mean:&#160;&#x27;strings&#x27;?</text><text class="terminal-844581322-r1" x="1915.4" y="703.2" textLength="12.2" clip-path="url(#terminal-844581322-line-28)">
 </text>
     </g>
     </g>

From f8a1a8964f929b793cd58844072d76656b4ac0f1 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Mon, 8 Jul 2024 12:14:52 -0700
Subject: [PATCH 08/21] fix batching issues

---
 casanovo/denovo/model.py        | 71 ++++++++++++++++++---------------
 casanovo/denovo/model_runner.py |  1 +
 2 files changed, 40 insertions(+), 32 deletions(-)

diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 2256946c..3a069dcd 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1009,6 +1009,7 @@ class DbSpec2Pep(Spec2Pep):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.total_psms = 0
+        self.psm_batch_size = 1024
 
     def predict_step(self, batch, *args):
         """
@@ -1028,39 +1029,45 @@ def predict_step(self, batch, *args):
             scores, amino acid-level scores, and associated proteins.
         """
         predictions = []
-        pred, truth = self.decoder(batch[3], batch[1], *self.encoder(batch[0]))
-        pred = self.softmax(pred)
-        all_scores, per_aa_scores = _calc_match_score(
-            pred, truth, self.decoder.reverse
-        )
-        for (
-            precursor_charge,
-            precursor_mz,
-            spectrum_i,
-            peptide_score,
-            aa_scores,
-            peptide,
-            protein,
-        ) in zip(
-            batch[1][:, 1].cpu().detach().numpy(),
-            batch[1][:, 2].cpu().detach().numpy(),
-            batch[2],
-            all_scores.cpu().detach().numpy(),
-            per_aa_scores.cpu().detach().numpy(),
-            batch[3],
-            batch[4],
-        ):
-            predictions.append(
-                (
-                    spectrum_i,
-                    precursor_charge,
-                    precursor_mz,
-                    peptide,
-                    peptide_score,
-                    aa_scores,
-                    protein,
-                )
+        while len(batch[0]) > 0:
+            next_batch = [b[self.psm_batch_size :] for b in batch]
+            batch = [b[: self.psm_batch_size] for b in batch]
+            pred, truth = self.decoder(
+                batch[3], batch[1], *self.encoder(batch[0])
             )
+            pred = self.softmax(pred)
+            all_scores, per_aa_scores = _calc_match_score(
+                pred, truth, self.decoder.reverse
+            )
+            for (
+                precursor_charge,
+                precursor_mz,
+                spectrum_i,
+                peptide_score,
+                aa_scores,
+                peptide,
+                protein,
+            ) in zip(
+                batch[1][:, 1].cpu().detach().numpy(),
+                batch[1][:, 2].cpu().detach().numpy(),
+                batch[2],
+                all_scores.cpu().detach().numpy(),
+                per_aa_scores.cpu().detach().numpy(),
+                batch[3],
+                batch[4],
+            ):
+                predictions.append(
+                    (
+                        spectrum_i,
+                        precursor_charge,
+                        precursor_mz,
+                        peptide,
+                        peptide_score,
+                        aa_scores,
+                        protein,
+                    )
+                )
+            batch = next_batch
         self.total_psms += len(predictions)
         return predictions
 
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index 3286f4b8..a6b59ed9 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -135,6 +135,7 @@ def db_search(
         self.initialize_trainer(train=True)
         self.initialize_model(train=False, db_search=True)
         self.model.out_writer = self.writer
+        self.model.psm_batch_size = self.config.predict_batch_size
         test_index = self._get_index(peak_path, False, "db search")
         self.writer.set_ms_run(test_index.ms_files)
 

From 7cb8e141ccab5b865a3af00711d290cd6cab788d Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Mon, 12 Aug 2024 14:50:18 -0700
Subject: [PATCH 09/21] small fixes regarding documentation, import syntax,
 etc.

---
 casanovo/casanovo.py            |  39 ++++++----
 casanovo/data/db_utils.py       |  71 +++++++++--------
 casanovo/denovo/dataloaders.py  |  10 +--
 casanovo/denovo/model.py        |  31 ++++----
 casanovo/denovo/model_runner.py |  24 ++----
 tests/conftest.py               |  11 +--
 tests/unit_tests/test_unit.py   | 132 +++++++++++---------------------
 7 files changed, 137 insertions(+), 181 deletions(-)

diff --git a/casanovo/casanovo.py b/casanovo/casanovo.py
index 8ae9a81b..4b9b4e38 100644
--- a/casanovo/casanovo.py
+++ b/casanovo/casanovo.py
@@ -130,7 +130,7 @@ def sequence(
 ) -> None:
     """De novo sequence peptides from tandem mass spectra.
 
-    PEAK_PATH must be one or more mzMl, mzXML, or MGF files from which
+    PEAK_PATH must be one or more mzML, mzXML, or MGF files from which
     to sequence peptides.
     """
     output = setup_logging(output, verbosity)
@@ -205,7 +205,7 @@ def sequence(
 )
 @click.option(
     "--digestion",
-    help="Digestion: full, partial",
+    help="Full: standard digestion. Semi: Include products of semi-specific cleavage",
     type=click.Choice(
         ["full", "partial"],
         case_sensitive=False,
@@ -214,37 +214,41 @@ def sequence(
 )
 @click.option(
     "--missed_cleavages",
-    help="Number of allowed missed cleavages",
+    help="Number of allowed missed cleavages when digesting protein",
     type=int,
     default=0,
 )
 @click.option(
     "--max_mods",
-    help="Maximum number of modifications per peptide",
+    help="Maximum number of amino acid modifications per peptide",
     type=int,
     default=0,
 )
 @click.option(
-    "--min_length",
-    help="Minimum peptide length",
+    "--min_peptide_length",
+    help="Minimum peptide length to consider",
     type=int,
     default=6,
 )
 @click.option(
-    "--max_length",
-    help="Maximum peptide length",
+    "--max_peptide_length",
+    help="Maximum peptide length to consider",
     type=int,
     default=50,
 )
 @click.option(
     "--precursor_tolerance",
-    help="Precursor tolerance window size (ppm)",
-    type=int,
+    help="Precursor tolerance window size (units: ppm)",
+    type=float,
     default=20,
 )
 @click.option(
     "--isotope_error",
-    help="Isotope error levels to consider (list of ints, e.g: 1,2)",
+    help="Isotope error levels to consider. \
+        Creates multiple mass windows to consider per spectrum \
+        to account for observed mass not matching monoisotopic mass \
+        due to the instrument assigning the 13C isotope \
+        peak as the precursor (list of ints, e.g: 1,2)",
     type=str,
     default="0",
 )
@@ -255,9 +259,9 @@ def db_search(
     digestion: str,
     missed_cleavages: int,
     max_mods: int,
-    min_length: int,
-    max_length: int,
-    precursor_tolerance: int,
+    min_peptide_length: int,
+    max_peptide_length: int,
+    precursor_tolerance: float,
     isotope_error: str,
     model: Optional[str],
     config: Optional[str],
@@ -266,7 +270,8 @@ def db_search(
 ) -> None:
     """Perform a database search on MS/MS data using Casanovo-DB.
 
-    PEAK_PATH must be one MGF file. FASTA_PATH must be one FASTA file.
+    PEAK_PATH must be one or more mzML, mzXML, or MGF files.
+    FASTA_PATH must be one FASTA file.
     """
     output = setup_logging(output, verbosity)
     config, model = setup_model(model, config, output, False)
@@ -284,8 +289,8 @@ def db_search(
             digestion,
             missed_cleavages,
             max_mods,
-            min_length,
-            max_length,
+            min_peptide_length,
+            max_peptide_length,
             precursor_tolerance,
             isotope_error,
             output,
diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 921c75bd..1af09a47 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -1,15 +1,16 @@
 """Unique methods used within db-search mode"""
 
-import os
-import depthcharge.masses
-from pyteomics import fasta, parser
 import bisect
 import logging
-
+import os
 from typing import List, Tuple
 
+import depthcharge.masses
+from pyteomics import fasta, parser
+
 logger = logging.getLogger("casanovo")
 
+
 # CONSTANTS
 HYDROGEN = 1.007825035
 OXYGEN = 15.99491463
@@ -51,8 +52,8 @@ def digest_fasta(
     digestion: str,
     missed_cleavages: int,
     max_mods: int,
-    min_length: int,
-    max_length: int,
+    min_peptide_length: int,
+    max_peptide_length: int,
 ):
     """
     Digests a FASTA file and returns the peptides, their masses, and associated protein.
@@ -70,9 +71,9 @@ def digest_fasta(
         The number of missed cleavages to allow.
     max_mods : int
         The maximum number of modifications to allow per peptide.
-    min_length : int
+    min_peptide_length : int
         The minimum length of peptides to consider.
-    max_length : int
+    max_peptide_length : int
         The maximum length of peptides to consider.
 
     Returns
@@ -81,35 +82,36 @@ def digest_fasta(
         A list of tuples containing the peptide sequence, mass,
         and associated protein. Sorted by neutral mass in ascending order.
     """
-
-    # Verify the eistence of the file:
+    # Verify the existence of the file:
     if not os.path.isfile(fasta_filename):
-        print(f"File {fasta_filename} does not exist.")
+        logger.error("File %s does not exist.", fasta_filename)
         raise FileNotFoundError(f"File {fasta_filename} does not exist.")
 
     fasta_data = fasta.read(fasta_filename)
     peptide_list = []
-    if digestion in ["full", "partial"]:
-        semi = True if digestion == "partial" else False
-        for header, seq in fasta_data:
-            pep_set = parser.cleave(
-                seq,
-                rule=parser.expasy_rules[enzyme],
-                missed_cleavages=missed_cleavages,
-                semi=semi,
-            )
-            protein = header.split()[0]
-            for pep in pep_set:
-                if len(pep) < min_length or len(pep) > max_length:
-                    continue
-                if "X" in pep or "U" in pep:
-                    logger.warn(
-                        "Skipping peptide with ambiguous amino acids: %s", pep
-                    )
-                    continue
-                peptide_list.append((pep, protein))
-    else:
+    if digestion not in ["full", "partial"]:
+        logger.error("Digestion type %s not recognized.", digestion)
         raise ValueError(f"Digestion type {digestion} not recognized.")
+    semi = digestion == "partial"
+    for header, seq in fasta_data:
+        pep_set = parser.cleave(
+            seq,
+            rule=parser.expasy_rules[enzyme],
+            missed_cleavages=missed_cleavages,
+            semi=semi,
+        )
+        protein = header.split()[0]
+        for pep in pep_set:
+            if len(pep) < min_peptide_length or len(pep) > max_peptide_length:
+                continue
+            if any(
+                aa in pep for aa in "BJOUXZ"
+            ):  # Check for incorrect AA letters
+                logger.warn(
+                    "Skipping peptide with ambiguous amino acids: %s", pep
+                )
+                continue
+            peptide_list.append((pep, protein))
 
     # Generate modified peptides
     mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
@@ -136,7 +138,7 @@ def get_candidates(
     precursor_mz: float,
     charge: int,
     peptide_list: List[Tuple[str, float, str]],
-    precursor_tolerance: int,
+    precursor_tolerance: float,
     isotope_error: str,
 ):
     """
@@ -156,7 +158,6 @@ def get_candidates(
     isotope_error : str
         The isotope error levels to consider.
     """
-
     candidates = set()
 
     isotope_error = [int(x) for x in isotope_error.split(",")]
@@ -219,7 +220,9 @@ def _to_raw_mass(mz_mass, charge):
 
 
 def get_mass_indices(masses, m_low, m_high):
-    """Grabs mass indices from a list of mass values that fall within a specified range.
+    """Grabs mass indices that fall within a specified range.
+
+    Pulls from masses, a list of mass values.
     Requires that the mass values are sorted in ascending order.
 
     Parameters
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index 80a4f7dc..14a0ff99 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -2,20 +2,20 @@
 
 import functools
 import os
-from typing import List, Optional, Tuple
-from functools import partial
 import logging
+from typing import List, Optional, Tuple
 
+from depthcharge.data import AnnotatedSpectrumIndex
 import lightning.pytorch as pl
 import numpy as np
 import torch
-from depthcharge.data import AnnotatedSpectrumIndex
 
+from ..data import db_utils
 from ..data.datasets import (
     AnnotatedSpectrumDataset,
     SpectrumDataset,
 )
-from ..data import db_utils
+
 
 logger = logging.getLogger("casanovo")
 
@@ -186,7 +186,7 @@ def db_dataloader(self) -> torch.utils.data.DataLoader:
         return torch.utils.data.DataLoader(
             self.test_dataset,
             batch_size=self.eval_batch_size,
-            collate_fn=partial(
+            collate_fn=functools.partial(
                 prepare_psm_batch,
                 digest=self.digest,
                 precursor_tolerance=self.precursor_tolerance,
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 3a069dcd..79848682 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -16,7 +16,7 @@
 
 from . import evaluate
 from .. import config
-from ..data import ms_io, db_utils
+from ..data import ms_io
 
 logger = logging.getLogger("casanovo")
 
@@ -991,7 +991,8 @@ def configure_optimizers(
 
 class DbSpec2Pep(Spec2Pep):
     """
-    Subclass of Spec2Pep for the use of Casanovo as an MS/MS database search score function.
+    Subclass of Spec2Pep for the use of Casanovo as an \
+    MS/MS database search score function.
 
     Uses teacher forcing to 'query' Casanovo for its score for each AA
     within a candidate peptide, and takes the geometric average of these scores
@@ -1008,7 +1009,6 @@ class DbSpec2Pep(Spec2Pep):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.total_psms = 0
         self.psm_batch_size = 1024
 
     def predict_step(self, batch, *args):
@@ -1029,11 +1029,14 @@ def predict_step(self, batch, *args):
             scores, amino acid-level scores, and associated proteins.
         """
         predictions = []
-        while len(batch[0]) > 0:
-            next_batch = [b[self.psm_batch_size :] for b in batch]
-            batch = [b[: self.psm_batch_size] for b in batch]
+        for start_idx in range(0, len(batch[0]), self.psm_batch_size):
+            current_batch = [
+                b[start_idx : start_idx + self.psm_batch_size] for b in batch
+            ]
             pred, truth = self.decoder(
-                batch[3], batch[1], *self.encoder(batch[0])
+                current_batch[3],
+                current_batch[1],
+                *self.encoder(current_batch[0]),
             )
             pred = self.softmax(pred)
             all_scores, per_aa_scores = _calc_match_score(
@@ -1048,13 +1051,13 @@ def predict_step(self, batch, *args):
                 peptide,
                 protein,
             ) in zip(
-                batch[1][:, 1].cpu().detach().numpy(),
-                batch[1][:, 2].cpu().detach().numpy(),
-                batch[2],
+                current_batch[1][:, 1].cpu().detach().numpy(),
+                current_batch[1][:, 2].cpu().detach().numpy(),
+                current_batch[2],
                 all_scores.cpu().detach().numpy(),
                 per_aa_scores.cpu().detach().numpy(),
-                batch[3],
-                batch[4],
+                current_batch[3],
+                current_batch[4],
             ):
                 predictions.append(
                     (
@@ -1067,8 +1070,6 @@ def predict_step(self, batch, *args):
                         protein,
                     )
                 )
-            batch = next_batch
-        self.total_psms += len(predictions)
         return predictions
 
     def on_predict_batch_end(
@@ -1088,8 +1089,6 @@ def on_predict_batch_end(
             aa_scores,
             protein,
         ) in outputs:
-            if len(peptide) == 0:
-                continue
             self.out_writer.psms.append(
                 (
                     peptide,
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index a6b59ed9..c2b71098 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -10,8 +10,6 @@
 from pathlib import Path
 from typing import Iterable, List, Optional, Union
 
-import time
-
 import lightning.pytorch as pl
 import numpy as np
 import torch
@@ -20,7 +18,7 @@
 from lightning.pytorch.callbacks import ModelCheckpoint
 
 from ..config import Config
-from ..data import ms_io, db_utils
+from ..data import db_utils, ms_io
 from ..denovo.dataloaders import DeNovoDataModule
 from ..denovo.model import Spec2Pep, DbSpec2Pep
 
@@ -89,8 +87,8 @@ def db_search(
         digestion: str,
         missed_cleavages: int,
         max_mods: int,
-        min_length: int,
-        max_length: int,
+        min_peptide_length: int,
+        max_peptide_length: int,
         precursor_tolerance: float,
         isotope_error: str,
         output: str,
@@ -100,7 +98,7 @@ def db_search(
         Parameters
         ----------
         peak_path : Iterable[str]
-            The path to the .mgf data file for database search.
+            The paths to the .mgf data files for database search.
         fasta_path : str
             The path to the FASTA file for database search.
         enzyme : str
@@ -111,9 +109,9 @@ def db_search(
             The number of missed cleavages allowed.
         max_mods : int
             The maximum number of modifications allowed per peptide.
-        min_length : int
+        min_peptide_length : int
             The minimum peptide length.
-        max_length : int
+        max_peptide_length : int
             The maximum peptide length.
         precursor_tolerance : float
             The precursor mass tolerance in ppm.
@@ -147,19 +145,13 @@ def db_search(
             digestion,
             missed_cleavages,
             max_mods,
-            min_length,
-            max_length,
+            min_peptide_length,
+            max_peptide_length,
         )
         self.loaders.precursor_tolerance = precursor_tolerance
         self.loaders.isotope_error = isotope_error
 
-        t1 = time.time()
         self.trainer.predict(self.model, self.loaders.db_dataloader())
-        t2 = time.time()
-        logger.info("Database search took %.3f seconds", t2 - t1)
-        logger.info("Scored %s PSMs", self.model.total_psms)
-        logger.info("%.3f PSMs per second", self.model.total_psms / (t2 - t1))
-        logger.info("%s seconds per PSM", (t2 - t1) / self.model.total_psms)
 
     def train(
         self,
diff --git a/tests/conftest.py b/tests/conftest.py
index b2244308..60afcd83 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -17,19 +17,16 @@ def mgf_small(tmp_path):
 
 
 @pytest.fixture
-def tiny_fasta_file(tmp_path, fasta_raw_data):
+def tiny_fasta_file(tmp_path):
     fasta_file = tmp_path / "tiny_fasta.fasta"
     with fasta_file.open("w+") as fasta_ref:
-        fasta_ref.write(fasta_raw_data)
+        fasta_ref.write(
+            ">foo\nMEAPAQLLFLLLLWLPDTTREIVMTQSPPTLSLSPGERVTLSCRASQSVSSSYLTWYQQKPGQAPRLLIYGASTRATSIPARFSGSGSGTDFTLTISSLQPEDFAVYYCQQDYNLP"
+        )
 
     return fasta_file
 
 
-@pytest.fixture
-def fasta_raw_data():
-    return ">foo\nMEAPAQLLFLLLLWLPDTTREIVMTQSPPTLSLSPGERVTLSCRASQSVSSSYLTWYQQKPGQAPRLLIYGASTRATSIPARFSGSGSGTDFTLTISSLQPEDFAVYYCQQDYNLP"
-
-
 @pytest.fixture
 def mgf_db_search(tmp_path):
     """An MGF file with 7 spectra and scan numbers, C+57.021 mass modification considered"""
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index e3707917..419cf3ef 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -2,6 +2,7 @@
 import heapq
 import os
 import platform
+import re
 import shutil
 import tempfile
 
@@ -10,11 +11,10 @@
 import numpy as np
 import pytest
 import torch
-import re
 
 from casanovo import casanovo
 from casanovo import utils
-from casanovo.data import ms_io, db_utils
+from casanovo.data import db_utils, ms_io
 from casanovo.data.datasets import SpectrumDataset, AnnotatedSpectrumDataset
 from casanovo.denovo.evaluate import aa_match_batch, aa_match_metrics
 from casanovo.denovo.model import Spec2Pep, _aa_pep_score, _calc_match_score
@@ -220,10 +220,7 @@ def test_calc_match_score():
     assert np.sum(masked_per_aa_scores.numpy()[3]) == 3
 
 
-def test_digest_fasta_cleave(fasta_raw_data):
-
-    with open("temp_fasta", "w") as file:
-        file.write(fasta_raw_data)
+def test_digest_fasta_cleave(tiny_fasta_file):
 
     # No missed cleavages
     expected_normal = [
@@ -275,49 +272,24 @@ def test_digest_fasta_cleave(fasta_raw_data):
         "EIVMTQSPPTLSLSPGERVTLSC+57.021RASQSVSSSYLTWYQQKPGQAPR",
         "LLIYGASTRATSIPARFSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
     ]
+    for missed_cleavages, expected in zip(
+        (0, 1, 3),
+        (expected_normal, expected_1missedcleavage, expected_3missedcleavage),
+    ):
+        peptide_list = db_utils.digest_fasta(
+            fasta_filename=str(tiny_fasta_file),
+            enzyme="trypsin",
+            digestion="full",
+            missed_cleavages=missed_cleavages,
+            max_mods=0,
+            min_peptide_length=6,
+            max_peptide_length=50,
+        )
+        peptide_list = [x[0] for x in peptide_list]
+        assert peptide_list == expected
 
-    peptide_list = db_utils.digest_fasta(
-        fasta_filename="temp_fasta",
-        enzyme="trypsin",
-        digestion="full",
-        missed_cleavages=0,
-        max_mods=0,
-        min_length=6,
-        max_length=50,
-    )
-    peptide_list = [x[0] for x in peptide_list]
-    assert peptide_list == expected_normal
-
-    peptide_list = db_utils.digest_fasta(
-        fasta_filename="temp_fasta",
-        enzyme="trypsin",
-        digestion="full",
-        missed_cleavages=1,
-        max_mods=0,
-        min_length=6,
-        max_length=50,
-    )
-    peptide_list = [x[0] for x in peptide_list]
-    assert peptide_list == expected_1missedcleavage
-
-    peptide_list = db_utils.digest_fasta(
-        fasta_filename="temp_fasta",
-        enzyme="trypsin",
-        digestion="full",
-        missed_cleavages=3,
-        max_mods=0,
-        min_length=6,
-        max_length=50,
-    )
-    peptide_list = [x[0] for x in peptide_list]
-    assert peptide_list == expected_3missedcleavage
-
-
-def test_digest_fasta_mods(fasta_raw_data):
-
-    with open("temp_fasta", "w") as file:
-        file.write(fasta_raw_data)
 
+def test_digest_fasta_mods(tiny_fasta_file):
     # 1 modification allowed
     # fixed: C+57.02146
     # variable: 1M+15.994915,1N+0.984016,1Q+0.984016
@@ -373,13 +345,13 @@ def test_digest_fasta_mods(fasta_raw_data):
     ]
 
     peptide_list = db_utils.digest_fasta(
-        fasta_filename="temp_fasta",
+        fasta_filename=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=0,
         max_mods=1,
-        min_length=6,
-        max_length=50,
+        min_peptide_length=6,
+        max_peptide_length=50,
     )
     peptide_list = [x[0] for x in peptide_list]
     peptide_list = [
@@ -392,11 +364,7 @@ def test_digest_fasta_mods(fasta_raw_data):
     assert peptide_list == expected_1mod
 
 
-def test_length_restrictions(fasta_raw_data):
-
-    with open("temp_fasta", "w") as file:
-        file.write(fasta_raw_data)
-
+def test_length_restrictions(tiny_fasta_file):
     # length between 20 and 50
     expected_long = [
         "MEAPAQLLFLLLLWLPDTTR",
@@ -408,35 +376,31 @@ def test_length_restrictions(fasta_raw_data):
     expected_short = ["ATSIPAR", "VTLSC+57.021R"]
 
     peptide_list = db_utils.digest_fasta(
-        fasta_filename="temp_fasta",
+        fasta_filename=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=0,
         max_mods=0,
-        min_length=20,
-        max_length=50,
+        min_peptide_length=20,
+        max_peptide_length=50,
     )
     peptide_list = [x[0] for x in peptide_list]
     assert peptide_list == expected_long
 
     peptide_list = db_utils.digest_fasta(
-        fasta_filename="temp_fasta",
+        fasta_filename=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=0,
         max_mods=0,
-        min_length=6,
-        max_length=8,
+        min_peptide_length=6,
+        max_peptide_length=8,
     )
     peptide_list = [x[0] for x in peptide_list]
     assert peptide_list == expected_short
 
 
-def test_digest_fasta_enzyme(fasta_raw_data):
-
-    with open("temp_fasta", "w") as file:
-        file.write(fasta_raw_data)
-
+def test_digest_fasta_enzyme(tiny_fasta_file):
     # arg-c enzyme
     expected_argc = [
         "ATSIPAR",
@@ -452,35 +416,31 @@ def test_digest_fasta_enzyme(fasta_raw_data):
     expected_aspn = ["DFAVYYC+57.021QQ", "DFTLTISSLQPE", "MEAPAQLLFLLLLWLP"]
 
     peptide_list = db_utils.digest_fasta(
-        fasta_filename="temp_fasta",
+        fasta_filename=str(tiny_fasta_file),
         enzyme="arg-c",
         digestion="full",
         missed_cleavages=0,
         max_mods=0,
-        min_length=6,
-        max_length=50,
+        min_peptide_length=6,
+        max_peptide_length=50,
     )
     peptide_list = [x[0] for x in peptide_list]
     assert peptide_list == expected_argc
 
     peptide_list = db_utils.digest_fasta(
-        fasta_filename="temp_fasta",
+        fasta_filename=str(tiny_fasta_file),
         enzyme="asp-n",
         digestion="full",
         missed_cleavages=0,
         max_mods=0,
-        min_length=6,
-        max_length=50,
+        min_peptide_length=6,
+        max_peptide_length=50,
     )
     peptide_list = [x[0] for x in peptide_list]
     assert peptide_list == expected_aspn
 
 
-def test_get_candidates(fasta_raw_data):
-
-    with open("temp_fasta", "w") as file:
-        file.write(fasta_raw_data)
-
+def test_get_candidates(tiny_fasta_file):
     # precursor_window is 10000
     expected_smallwindow = ["LLIYGASTR"]
 
@@ -491,13 +451,13 @@ def test_get_candidates(fasta_raw_data):
     expected_widewindow = ["ATSIPAR", "VTLSC+57.021R", "LLIYGASTR"]
 
     peptide_list = db_utils.digest_fasta(
-        fasta_filename="temp_fasta",
+        fasta_filename=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=1,
         max_mods=0,
-        min_length=6,
-        max_length=50,
+        min_peptide_length=6,
+        max_peptide_length=50,
     )
 
     candidates = db_utils.get_candidates(
@@ -511,13 +471,13 @@ def test_get_candidates(fasta_raw_data):
     assert expected_smallwindow == candidates
 
     peptide_list = db_utils.digest_fasta(
-        fasta_filename="temp_fasta",
+        fasta_filename=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=1,
         max_mods=0,
-        min_length=6,
-        max_length=50,
+        min_peptide_length=6,
+        max_peptide_length=50,
     )
 
     candidates = db_utils.get_candidates(
@@ -531,13 +491,13 @@ def test_get_candidates(fasta_raw_data):
     assert expected_midwindow == candidates
 
     peptide_list = db_utils.digest_fasta(
-        fasta_filename="temp_fasta",
+        fasta_filename=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=1,
         max_mods=0,
-        min_length=6,
-        max_length=50,
+        min_peptide_length=6,
+        max_peptide_length=50,
     )
 
     candidates = db_utils.get_candidates(

From b2f08ac307f50c4dabc458745cd79b3ec2058f35 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Mon, 19 Aug 2024 19:09:26 -0700
Subject: [PATCH 10/21] add proteindatabase

---
 casanovo/casanovo.py            | 110 --------
 casanovo/config.yaml            |  36 ++-
 casanovo/data/datasets.py       |   2 +-
 casanovo/data/db_utils.py       | 442 +++++++++++++++++---------------
 casanovo/denovo/dataloaders.py  |  28 +-
 casanovo/denovo/model_runner.py |  45 +---
 tests/conftest.py               |   5 +
 tests/test_integration.py       |   2 -
 tests/unit_tests/test_unit.py   | 200 +++++++++------
 9 files changed, 404 insertions(+), 466 deletions(-)

diff --git a/casanovo/casanovo.py b/casanovo/casanovo.py
index 4b9b4e38..b153512d 100644
--- a/casanovo/casanovo.py
+++ b/casanovo/casanovo.py
@@ -158,111 +158,9 @@ def sequence(
     nargs=1,
     type=click.Path(exists=True, dir_okay=False),
 )
-@click.option(
-    "--enzyme",
-    help="Enzyme for in silico digestion, \
-    See pyteomics.parser.expasy_rules for valid enzymes",
-    type=click.Choice(
-        [
-            "arg-c",
-            "asp-n",
-            "bnps-skatole",
-            "caspase 1",
-            "caspase 2",
-            "caspase 3",
-            "caspase 4",
-            "caspase 5",
-            "caspase 6",
-            "caspase 7",
-            "caspase 8",
-            "caspase 9",
-            "caspase 10",
-            "chymotrypsin high specificity",
-            "chymotrypsin low specificity",
-            "clostripain",
-            "cnbr",
-            "enterokinase",
-            "factor xa",
-            "formic acid",
-            "glutamyl endopeptidase",
-            "granzyme b",
-            "hydroxylamine",
-            "iodosobenzoic acid",
-            "lysc",
-            "ntcb",
-            "pepsin ph1.3",
-            "pepsin ph2.0",
-            "proline endopeptidase",
-            "proteinase k",
-            "staphylococcal peptidase i",
-            "thermolysin",
-            "thrombin",
-            "trypsin",
-            "trypsin_exception",
-        ]
-    ),
-    default="trypsin",
-)
-@click.option(
-    "--digestion",
-    help="Full: standard digestion. Semi: Include products of semi-specific cleavage",
-    type=click.Choice(
-        ["full", "partial"],
-        case_sensitive=False,
-    ),
-    default="full",
-)
-@click.option(
-    "--missed_cleavages",
-    help="Number of allowed missed cleavages when digesting protein",
-    type=int,
-    default=0,
-)
-@click.option(
-    "--max_mods",
-    help="Maximum number of amino acid modifications per peptide",
-    type=int,
-    default=0,
-)
-@click.option(
-    "--min_peptide_length",
-    help="Minimum peptide length to consider",
-    type=int,
-    default=6,
-)
-@click.option(
-    "--max_peptide_length",
-    help="Maximum peptide length to consider",
-    type=int,
-    default=50,
-)
-@click.option(
-    "--precursor_tolerance",
-    help="Precursor tolerance window size (units: ppm)",
-    type=float,
-    default=20,
-)
-@click.option(
-    "--isotope_error",
-    help="Isotope error levels to consider. \
-        Creates multiple mass windows to consider per spectrum \
-        to account for observed mass not matching monoisotopic mass \
-        due to the instrument assigning the 13C isotope \
-        peak as the precursor (list of ints, e.g: 1,2)",
-    type=str,
-    default="0",
-)
 def db_search(
     peak_path: Tuple[str],
     fasta_path: str,
-    enzyme: str,
-    digestion: str,
-    missed_cleavages: int,
-    max_mods: int,
-    min_peptide_length: int,
-    max_peptide_length: int,
-    precursor_tolerance: float,
-    isotope_error: str,
     model: Optional[str],
     config: Optional[str],
     output: Optional[str],
@@ -285,14 +183,6 @@ def db_search(
         runner.db_search(
             peak_path,
             fasta_path,
-            enzyme,
-            digestion,
-            missed_cleavages,
-            max_mods,
-            min_peptide_length,
-            max_peptide_length,
-            precursor_tolerance,
-            isotope_error,
             output,
         )
 
diff --git a/casanovo/config.yaml b/casanovo/config.yaml
index c7186ff7..860cfabb 100644
--- a/casanovo/config.yaml
+++ b/casanovo/config.yaml
@@ -5,18 +5,26 @@
 
 ###
 # The following parameters can be modified when running inference or when
-# fine-tuning an existing Casanovo model.
+# fine-tuning an existing Casanovo model. They also affect database search
+# parameters when running Casanovo in DB-search mode.
 ###
 
 # Max absolute difference allowed with respect to observed precursor m/z.
-# Predictions outside the tolerance range are assigned a negative peptide score.
+# denovo: Predictions outside the tolerance range are assigned a negative peptide score.
+# db-search: Used to create mas windows for candidate generation.
 precursor_mass_tol: 50  # ppm
 # Isotopes to consider when comparing predicted and observed precursor m/z's.
 isotope_error_range: [0, 1]
-# The minimum length of predicted peptides.
+# The minimum length of predicted/scored peptides.
 min_peptide_len: 6
-# Number of spectra in one inference batch.
+# Number of spectra or psms in one inference batch.
 predict_batch_size: 1024
+
+
+###
+# The following parameters are unique to Casanovo's inference/finetuning mode.
+###
+
 # Number of beams used in beam search.
 n_beams: 1
 # Number of PSMs for each spectrum.
@@ -29,6 +37,26 @@ accelerator: "auto"
 # number will be automatically selected for based on the chosen accelerator.
 devices:
 
+
+###
+# The following parameters are unique to Casanovo's database search mode.
+###
+
+# Enzyme for in silico digestion, used to generate candidate peptides.
+# See pyteomics.parser.expasy_rules for valid enzymes
+enzyme: "trypsin"
+# Digestion type for candidate peptide generation.
+# Full: standard digestion. Semi: Include products of semi-specific cleavage
+digestion: "full"
+# Number of allowed missed cleavages when digesting protein
+missed_cleavages: 0
+# Maximum number of amino acid modifications per peptide.
+# None generates all possible isoforms as candidates.
+max_mods: 
+# Maximum peptide length to consider
+max_peptide_len: 50
+
+
 ###
 # The following parameters should only be modified if you are training a new
 # Casanovo model from scratch.
diff --git a/casanovo/data/datasets.py b/casanovo/data/datasets.py
index 6244e88f..3f05811f 100644
--- a/casanovo/data/datasets.py
+++ b/casanovo/data/datasets.py
@@ -1,6 +1,6 @@
 """A PyTorch Dataset class for annotated spectra."""
 
-from typing import Optional, Tuple
+from typing import List, Optional, Tuple
 
 import depthcharge
 import numpy as np
diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 1af09a47..a7b5e850 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -6,15 +6,12 @@
 from typing import List, Tuple
 
 import depthcharge.masses
+from numba import jit
 from pyteomics import fasta, parser
 
 logger = logging.getLogger("casanovo")
 
-
 # CONSTANTS
-HYDROGEN = 1.007825035
-OXYGEN = 15.99491463
-H2O = 2 * HYDROGEN + OXYGEN
 PROTON = 1.00727646677
 ISOTOPE_SPACING = 1.003355
 
@@ -29,216 +26,243 @@
 fixed_mods = {"carbm": ["C"]}
 
 
-def convert_from_modx(seq: str):
-    """Converts peptide sequence from modX format to Casanovo-acceptable modifications.
-
-    Args:
-        seq (str): Peptide in modX format
-    """
-    seq = seq.replace("carbmC", "C+57.021")  # Fixed modification
-    seq = seq.replace("oxM", "M+15.995")
-    seq = seq.replace("dN", "N+0.984")
-    seq = seq.replace("dQ", "Q+0.984")
-    seq = seq.replace("ace-", "+42.011")
-    seq = seq.replace("carbnh3x-", "+43.006-17.027")
-    seq = seq.replace("carb-", "+43.006")
-    seq = seq.replace("nh3x-", "-17.027")
-    return seq
-
-
-def digest_fasta(
-    fasta_filename: str,
-    enzyme: str,
-    digestion: str,
-    missed_cleavages: int,
-    max_mods: int,
-    min_peptide_length: int,
-    max_peptide_length: int,
-):
-    """
-    Digests a FASTA file and returns the peptides, their masses, and associated protein.
-
-    Parameters
-    ----------
-    fasta_filename : str
-        Path to the FASTA file.
-    enzyme : str
-        The enzyme to use for digestion.
-        See pyteomics.parser.expasy_rules for valid enzymes.
-    digestion : str
-        The type of digestion to perform. Either 'full' or 'partial'.
-    missed_cleavages : int
-        The number of missed cleavages to allow.
-    max_mods : int
-        The maximum number of modifications to allow per peptide.
-    min_peptide_length : int
-        The minimum length of peptides to consider.
-    max_peptide_length : int
-        The maximum length of peptides to consider.
-
-    Returns
-    -------
-    mod_peptide_list : List[Tuple[str, float, str]]
-        A list of tuples containing the peptide sequence, mass,
-        and associated protein. Sorted by neutral mass in ascending order.
-    """
-    # Verify the existence of the file:
-    if not os.path.isfile(fasta_filename):
-        logger.error("File %s does not exist.", fasta_filename)
-        raise FileNotFoundError(f"File {fasta_filename} does not exist.")
-
-    fasta_data = fasta.read(fasta_filename)
-    peptide_list = []
-    if digestion not in ["full", "partial"]:
-        logger.error("Digestion type %s not recognized.", digestion)
-        raise ValueError(f"Digestion type {digestion} not recognized.")
-    semi = digestion == "partial"
-    for header, seq in fasta_data:
-        pep_set = parser.cleave(
-            seq,
-            rule=parser.expasy_rules[enzyme],
-            missed_cleavages=missed_cleavages,
-            semi=semi,
-        )
-        protein = header.split()[0]
-        for pep in pep_set:
-            if len(pep) < min_peptide_length or len(pep) > max_peptide_length:
-                continue
-            if any(
-                aa in pep for aa in "BJOUXZ"
-            ):  # Check for incorrect AA letters
-                logger.warn(
-                    "Skipping peptide with ambiguous amino acids: %s", pep
-                )
-                continue
-            peptide_list.append((pep, protein))
-
-    # Generate modified peptides
-    mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
-    mod_peptide_list = []
-    for pep, prot in peptide_list:
-        peptide_isoforms = parser.isoforms(
-            pep,
-            variable_mods=var_mods,
-            fixed_mods=fixed_mods,
-            max_mods=max_mods,
-        )
-        peptide_isoforms = list(map(convert_from_modx, peptide_isoforms))
-        mod_peptide_list.extend(
-            (mod_pep, mass_calculator.mass(mod_pep), prot)
-            for mod_pep in peptide_isoforms
-        )
-
-    # Sort the peptides by mass and return.
-    mod_peptide_list.sort(key=lambda x: x[1])
-    return mod_peptide_list
-
-
-def get_candidates(
-    precursor_mz: float,
-    charge: int,
-    peptide_list: List[Tuple[str, float, str]],
-    precursor_tolerance: float,
-    isotope_error: str,
-):
+class ProteinDatabase:
     """
-    Returns a list of candidate peptides that fall within the specified mass range.
+    TODO
 
     Parameters
     ----------
-    precursor_mz : float
-        The precursor mass-to-charge ratio.
-    charge : int
-        The precursor charge.
-    peptide_list : List[Tuple[str, float, str]]
-        A list of tuples containing the peptide sequence, mass, and associated protein.
-        Must be sorted by mass in ascending order. Uses neutral masses.
-    precursor_tolerance : float
-        The precursor mass tolerance in parts-per-million.
-    isotope_error : str
-        The isotope error levels to consider.
+    TODO
     """
-    candidates = set()
 
-    isotope_error = [int(x) for x in isotope_error.split(",")]
-    for e in isotope_error:
-        iso_shift = ISOTOPE_SPACING * e
-        upper_bound = (_to_raw_mass(precursor_mz, charge) - iso_shift) * (
-            1 + (precursor_tolerance / 1e6)
-        )
-        lower_bound = (_to_raw_mass(precursor_mz, charge) - iso_shift) * (
-            1 - (precursor_tolerance / 1e6)
+    def __init__(
+        self,
+        fasta_path: str,
+        enzyme: str,
+        digestion: str,
+        missed_cleavages: int,
+        min_peptide_len: int,
+        max_peptide_len: int,
+        max_mods: int,
+        precursor_tolerance: float,
+        isotope_error: List[int],
+    ):
+        self.digest = self._digest_fasta(
+            fasta_path,
+            enzyme,
+            digestion,
+            missed_cleavages,
+            max_mods,
+            min_peptide_len,
+            max_peptide_len,
         )
-
-        start, end = get_mass_indices(
-            [x[1] for x in peptide_list], lower_bound, upper_bound
+        self.precursor_tolerance = precursor_tolerance
+        self.isotope_error = isotope_error
+
+    def get_candidates(
+        self,
+        precursor_mz: float,
+        charge: int,
+    ):
+        """
+        Returns a list of candidate peptides that fall within the specified mass range.
+
+        Parameters
+        ----------
+        precursor_mz : float
+            The precursor mass-to-charge ratio.
+        charge : int
+            The precursor charge.
+        """
+        candidates = set()
+
+        for e in self.isotope_error:
+            iso_shift = ISOTOPE_SPACING * e
+            upper_bound = (
+                ProteinDatabase._to_raw_mass(precursor_mz, charge) - iso_shift
+            ) * (1 + (self.precursor_tolerance / 1e6))
+            lower_bound = (
+                ProteinDatabase._to_raw_mass(precursor_mz, charge) - iso_shift
+            ) * (1 - (self.precursor_tolerance / 1e6))
+
+            start, end = ProteinDatabase._get_mass_indices(
+                [x[1] for x in self.digest], lower_bound, upper_bound
+            )
+
+            candidates.update(self.digest[start:end])
+
+        candidates = list(candidates)
+        candidates.sort(key=lambda x: x[1])
+        return candidates
+
+    def _digest_fasta(
+        self,
+        fasta_filename: str,
+        enzyme: str,
+        digestion: str,
+        missed_cleavages: int,
+        max_mods: int,
+        min_peptide_length: int,
+        max_peptide_length: int,
+    ):
+        """
+        Digests a FASTA file and returns the peptides, their masses, and associated protein.
+
+        Parameters
+        ----------
+        fasta_filename : str
+            Path to the FASTA file.
+        enzyme : str
+            The enzyme to use for digestion.
+            See pyteomics.parser.expasy_rules for valid enzymes.
+        digestion : str
+            The type of digestion to perform. Either 'full' or 'partial'.
+        missed_cleavages : int
+            The number of missed cleavages to allow.
+        max_mods : int
+            The maximum number of modifications to allow per peptide.
+        min_peptide_length : int
+            The minimum length of peptides to consider.
+        max_peptide_length : int
+            The maximum length of peptides to consider.
+
+        Returns
+        -------
+        mod_peptide_list : List[Tuple[str, float, str]]
+            A list of tuples containing the peptide sequence, mass,
+            and associated protein. Sorted by neutral mass in ascending order.
+        """
+        # Verify the existence of the file:
+        if not os.path.isfile(fasta_filename):
+            logger.error("File %s does not exist.", fasta_filename)
+            raise FileNotFoundError(f"File {fasta_filename} does not exist.")
+
+        fasta_data = fasta.read(fasta_filename)
+        peptide_list = []
+        if digestion not in ["full", "partial"]:
+            logger.error("Digestion type %s not recognized.", digestion)
+            raise ValueError(f"Digestion type {digestion} not recognized.")
+        semi = digestion == "partial"
+        for header, seq in fasta_data:
+            pep_set = parser.cleave(
+                seq,
+                rule=parser.expasy_rules[enzyme],
+                missed_cleavages=missed_cleavages,
+                semi=semi,
+            )
+            protein = header.split()[0]
+            for pep in pep_set:
+                if (
+                    len(pep) < min_peptide_length
+                    or len(pep) > max_peptide_length
+                ):
+                    continue
+                if any(
+                    aa in pep for aa in "BJOUXZ"
+                ):  # Check for incorrect AA letters
+                    logger.warn(
+                        "Skipping peptide with ambiguous amino acids: %s", pep
+                    )
+                    continue
+                peptide_list.append((pep, protein))
+
+        # Generate modified peptides
+        mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
+        mod_peptide_list = []
+        for pep, prot in peptide_list:
+            peptide_isoforms = parser.isoforms(
+                pep,
+                variable_mods=var_mods,
+                fixed_mods=fixed_mods,
+                max_mods=max_mods,
+            )
+            peptide_isoforms = list(
+                map(ProteinDatabase._convert_from_modx, peptide_isoforms)
+            )
+            mod_peptide_list.extend(
+                (mod_pep, mass_calculator.mass(mod_pep), prot)
+                for mod_pep in peptide_isoforms
+            )
+
+        # Sort the peptides by mass and return.
+        mod_peptide_list.sort(key=lambda x: x[1])
+        logger.info(
+            "Digestion complete. %d peptides generated.", len(mod_peptide_list)
         )
-
-        candidates.update(peptide_list[start:end])
-
-    candidates = list(candidates)
-    candidates.sort(key=lambda x: x[1])
-    return candidates
-
-
-def _to_mz(precursor_mass, charge):
-    """
-    Convert precursor neutral mass to m/z value.
-
-    Parameters
-    ----------
-    precursor_mass : float
-        The precursor neutral mass.
-    charge : int
-        The precursor charge.
-
-    Returns
-    -------
-    mz : float
-        The calculated precursor mass-to-charge ratio.
-    """
-    return (precursor_mass + (charge * PROTON)) / charge
-
-
-def _to_raw_mass(mz_mass, charge):
-    """
-    Convert precursor m/z value to neutral mass.
-
-    Parameters
-    ----------
-    mz_mass : float
-        The precursor mass-to-charge ratio.
-    charge : int
-        The precursor charge.
-
-    Returns
-    -------
-    mass : float
-        The calculated precursor neutral mass.
-    """
-    return charge * (mz_mass - PROTON)
-
-
-def get_mass_indices(masses, m_low, m_high):
-    """Grabs mass indices that fall within a specified range.
-
-    Pulls from masses, a list of mass values.
-    Requires that the mass values are sorted in ascending order.
-
-    Parameters
-    ----------
-    masses : List[int]
-        List of mass values
-    m_low : int
-        Lower bound of mass range (inclusive)
-    m_high : int
-        Upper bound of mass range (inclusive)
-
-    Return
-    ------
-    indices : Tuple[int, int]
-        Indices of mass values that fall within the specified range
-    """
-    start = bisect.bisect_left(masses, m_low)
-    end = bisect.bisect_right(masses, m_high)
-    return start, end
+        return mod_peptide_list
+
+    def _to_mz(precursor_mass, charge):
+        """
+        Convert precursor neutral mass to m/z value.
+
+        Parameters
+        ----------
+        precursor_mass : float
+            The precursor neutral mass.
+        charge : int
+            The precursor charge.
+
+        Returns
+        -------
+        mz : float
+            The calculated precursor mass-to-charge ratio.
+        """
+        return (precursor_mass + (charge * PROTON)) / charge
+
+    def _to_raw_mass(mz_mass, charge):
+        """
+        Convert precursor m/z value to neutral mass.
+
+        Parameters
+        ----------
+        mz_mass : float
+            The precursor mass-to-charge ratio.
+        charge : int
+            The precursor charge.
+
+        Returns
+        -------
+        mass : float
+            The calculated precursor neutral mass.
+        """
+        return charge * (mz_mass - PROTON)
+
+    def _get_mass_indices(masses, m_low, m_high):
+        """Grabs mass indices that fall within a specified range.
+
+        Pulls from masses, a list of mass values.
+        Requires that the mass values are sorted in ascending order.
+
+        Parameters
+        ----------
+        masses : List[int]
+            List of mass values
+        m_low : int
+            Lower bound of mass range (inclusive)
+        m_high : int
+            Upper bound of mass range (inclusive)
+
+        Return
+        ------
+        indices : Tuple[int, int]
+            Indices of mass values that fall within the specified range
+        """
+        start = bisect.bisect_left(masses, m_low)
+        end = bisect.bisect_right(masses, m_high)
+        return start, end
+
+    def _convert_from_modx(seq: str):
+        """Converts peptide sequence from modX format to Casanovo-acceptable modifications.
+
+        Args:
+            seq (str): Peptide in modX format
+        """
+        seq = seq.replace("carbmC", "C+57.021")  # Fixed modification
+        seq = seq.replace("oxM", "M+15.995")
+        seq = seq.replace("dN", "N+0.984")
+        seq = seq.replace("dQ", "Q+0.984")
+        seq = seq.replace("ace-", "+42.011")
+        seq = seq.replace("carbnh3x-", "+43.006-17.027")
+        seq = seq.replace("carb-", "+43.006")
+        seq = seq.replace("nh3x-", "-17.027")
+        return seq
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index 14a0ff99..4d5524f4 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -89,6 +89,7 @@ def __init__(
         self.train_dataset = None
         self.valid_dataset = None
         self.test_dataset = None
+        self.pdb = None
 
     def setup(self, stage: str = None, annotated: bool = True) -> None:
         """
@@ -96,7 +97,7 @@ def setup(self, stage: str = None, annotated: bool = True) -> None:
 
         Parameters
         ----------
-        stage : str {"fit", "validate", "test", "db"}
+        stage : str {"fit", "validate", "test"}
             The stage indicating which Datasets to prepare. All are prepared by
             default.
         annotated: bool
@@ -186,12 +187,7 @@ def db_dataloader(self) -> torch.utils.data.DataLoader:
         return torch.utils.data.DataLoader(
             self.test_dataset,
             batch_size=self.eval_batch_size,
-            collate_fn=functools.partial(
-                prepare_psm_batch,
-                digest=self.digest,
-                precursor_tolerance=self.precursor_tolerance,
-                isotope_error=self.isotope_error,
-            ),
+            collate_fn=functools.partial(prepare_psm_batch, pdb=self.pdb),
             pin_memory=True,
             num_workers=self.n_workers,
             shuffle=False,
@@ -239,9 +235,7 @@ def prepare_batch(
 
 def prepare_psm_batch(
     batch: List[Tuple[torch.Tensor, float, int, str]],
-    digest: List[Tuple[str, float, str]],
-    precursor_tolerance: float,
-    isotope_error: str,
+    pdb: db_utils.ProteinDatabase,
 ):
     """
     Collate MS/MS spectra into a batch for DB search.
@@ -255,13 +249,8 @@ def prepare_psm_batch(
         A batch of data from an AnnotatedSpectrumDataset, consisting of for each
         spectrum (i) a tensor with the m/z and intensity peak values, (ii), the
         precursor m/z, (iii) the precursor charge, (iv) the spectrum identifier.
-    digest : List[Tuple[str, float, str]]
-        A list of tuples containing the peptide sequence, mass, and associated protein
-        from digesting a .fasta file. Sorted by mass in ascending order. Uses neutral masses.
-    precursor_tolerance : float
-        The precursor mass tolerance in parts-per-million.
-    isotope_error : str
-        The isotope error levels to consider.
+    pdb : db_utils.ProteinDatabase
+        The protein database to use for candidate peptide retrieval.
 
     Returns
     -------
@@ -294,12 +283,9 @@ def prepare_psm_batch(
     all_peptides = []
     all_proteins = []
     for idx in range(len(batch)):
-        digest_data = db_utils.get_candidates(
+        digest_data = pdb.get_candidates(
             precursor_mzs[idx],
             precursor_charges[idx],
-            digest,
-            precursor_tolerance,
-            isotope_error,
         )
         try:
             spec_peptides, _, pep_protein = list(zip(*digest_data))
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index c2b71098..b90f06b0 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -83,14 +83,6 @@ def db_search(
         self,
         peak_path: Iterable[str],
         fasta_path: str,
-        enzyme: str,
-        digestion: str,
-        missed_cleavages: int,
-        max_mods: int,
-        min_peptide_length: int,
-        max_peptide_length: int,
-        precursor_tolerance: float,
-        isotope_error: str,
         output: str,
     ) -> None:
         """Perform database search with Casanovo.
@@ -101,22 +93,6 @@ def db_search(
             The paths to the .mgf data files for database search.
         fasta_path : str
             The path to the FASTA file for database search.
-        enzyme : str
-            The enzyme used for digestion.
-        digestion : str
-            The digestion type, full or partial.
-        missed_cleavages : int
-            The number of missed cleavages allowed.
-        max_mods : int
-            The maximum number of modifications allowed per peptide.
-        min_peptide_length : int
-            The minimum peptide length.
-        max_peptide_length : int
-            The maximum peptide length.
-        precursor_tolerance : float
-            The precursor mass tolerance in ppm.
-        isotope_error : str
-            Isotope error levels to consider, in comma-delineated string form.
         output : str
             Where should the output be saved?
 
@@ -138,19 +114,18 @@ def db_search(
         self.writer.set_ms_run(test_index.ms_files)
 
         self.initialize_data_module(test_index=test_index)
-        self.loaders.setup(stage="test", annotated=False)
-        self.loaders.digest = db_utils.digest_fasta(
+        self.loaders.pdb = db_utils.ProteinDatabase(
             fasta_path,
-            enzyme,
-            digestion,
-            missed_cleavages,
-            max_mods,
-            min_peptide_length,
-            max_peptide_length,
+            self.config.enzyme,
+            self.config.digestion,
+            self.config.missed_cleavages,
+            self.config.min_peptide_len,
+            self.config.max_peptide_len,
+            self.config.max_mods,
+            self.config.precursor_mass_tol,
+            self.config.isotope_error_range,
         )
-        self.loaders.precursor_tolerance = precursor_tolerance
-        self.loaders.isotope_error = isotope_error
-
+        self.loaders.setup(stage="test", annotated=False)
         self.trainer.predict(self.model, self.loaders.db_dataloader())
 
     def train(
diff --git a/tests/conftest.py b/tests/conftest.py
index 60afcd83..f20d7879 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -242,6 +242,11 @@ def tiny_config(tmp_path):
         "precursor_mass_tol": 5,
         "isotope_error_range": [0, 1],
         "min_peptide_len": 6,
+        "max_peptide_len": 50,
+        "enzyme": "trypsin",
+        "digestion": "full",
+        "missed_cleavages": 0,
+        "max_mods": None,
         "predict_batch_size": 1024,
         "n_beams": 1,
         "top_match": 1,
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 4bd55174..61f735c3 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -24,8 +24,6 @@ def test_db_search(
         tiny_config,
         "--output",
         str(output_path),
-        "--precursor_tolerance",
-        str(100),
         str(mgf_db_search),
         str(tiny_fasta_file),
     ]
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index 419cf3ef..7a37e771 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -276,15 +276,18 @@ def test_digest_fasta_cleave(tiny_fasta_file):
         (0, 1, 3),
         (expected_normal, expected_1missedcleavage, expected_3missedcleavage),
     ):
-        peptide_list = db_utils.digest_fasta(
-            fasta_filename=str(tiny_fasta_file),
+        pdb = db_utils.ProteinDatabase(
+            fasta_path=str(tiny_fasta_file),
             enzyme="trypsin",
             digestion="full",
             missed_cleavages=missed_cleavages,
+            min_peptide_len=6,
+            max_peptide_len=50,
             max_mods=0,
-            min_peptide_length=6,
-            max_peptide_length=50,
+            precursor_tolerance=20,
+            isotope_error=[0],
         )
+        peptide_list = pdb.digest
         peptide_list = [x[0] for x in peptide_list]
         assert peptide_list == expected
 
@@ -343,16 +346,18 @@ def test_digest_fasta_mods(tiny_fasta_file):
         "+42.011FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
         "+43.006FSGSGSGTDFTLTISSLQPEDFAVYYC+57.021QQDYNLP",
     ]
-
-    peptide_list = db_utils.digest_fasta(
-        fasta_filename=str(tiny_fasta_file),
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=0,
+        min_peptide_len=6,
+        max_peptide_len=50,
         max_mods=1,
-        min_peptide_length=6,
-        max_peptide_length=50,
+        precursor_tolerance=20,
+        isotope_error=[0],
     )
+    peptide_list = pdb.digest
     peptide_list = [x[0] for x in peptide_list]
     peptide_list = [
         x
@@ -375,27 +380,33 @@ def test_length_restrictions(tiny_fasta_file):
     # length between 6 and 8
     expected_short = ["ATSIPAR", "VTLSC+57.021R"]
 
-    peptide_list = db_utils.digest_fasta(
-        fasta_filename=str(tiny_fasta_file),
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=0,
+        min_peptide_len=20,
+        max_peptide_len=50,
         max_mods=0,
-        min_peptide_length=20,
-        max_peptide_length=50,
+        precursor_tolerance=20,
+        isotope_error=[0],
     )
+    peptide_list = pdb.digest
     peptide_list = [x[0] for x in peptide_list]
     assert peptide_list == expected_long
 
-    peptide_list = db_utils.digest_fasta(
-        fasta_filename=str(tiny_fasta_file),
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=0,
+        min_peptide_len=6,
+        max_peptide_len=8,
         max_mods=0,
-        min_peptide_length=6,
-        max_peptide_length=8,
+        precursor_tolerance=20,
+        isotope_error=[0],
     )
+    peptide_list = pdb.digest
     peptide_list = [x[0] for x in peptide_list]
     assert peptide_list == expected_short
 
@@ -415,27 +426,33 @@ def test_digest_fasta_enzyme(tiny_fasta_file):
     # asp-n enzyme
     expected_aspn = ["DFAVYYC+57.021QQ", "DFTLTISSLQPE", "MEAPAQLLFLLLLWLP"]
 
-    peptide_list = db_utils.digest_fasta(
-        fasta_filename=str(tiny_fasta_file),
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
         enzyme="arg-c",
         digestion="full",
         missed_cleavages=0,
+        min_peptide_len=6,
+        max_peptide_len=50,
         max_mods=0,
-        min_peptide_length=6,
-        max_peptide_length=50,
+        precursor_tolerance=20,
+        isotope_error=[0],
     )
+    peptide_list = pdb.digest
     peptide_list = [x[0] for x in peptide_list]
     assert peptide_list == expected_argc
 
-    peptide_list = db_utils.digest_fasta(
-        fasta_filename=str(tiny_fasta_file),
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
         enzyme="asp-n",
         digestion="full",
         missed_cleavages=0,
+        min_peptide_len=6,
+        max_peptide_len=50,
         max_mods=0,
-        min_peptide_length=6,
-        max_peptide_length=50,
+        precursor_tolerance=20,
+        isotope_error=[0],
     )
+    peptide_list = pdb.digest
     peptide_list = [x[0] for x in peptide_list]
     assert peptide_list == expected_aspn
 
@@ -450,68 +467,53 @@ def test_get_candidates(tiny_fasta_file):
     # precursor window is 600000
     expected_widewindow = ["ATSIPAR", "VTLSC+57.021R", "LLIYGASTR"]
 
-    peptide_list = db_utils.digest_fasta(
-        fasta_filename=str(tiny_fasta_file),
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=1,
+        min_peptide_len=6,
+        max_peptide_len=50,
         max_mods=0,
-        min_peptide_length=6,
-        max_peptide_length=50,
-    )
-
-    candidates = db_utils.get_candidates(
-        precursor_mz=496.2,
-        charge=2,
-        peptide_list=peptide_list,
         precursor_tolerance=10000,
-        isotope_error="0",
+        isotope_error=[0],
     )
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     candidates = [x[0] for x in candidates]
     assert expected_smallwindow == candidates
 
-    peptide_list = db_utils.digest_fasta(
-        fasta_filename=str(tiny_fasta_file),
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=1,
+        min_peptide_len=6,
+        max_peptide_len=50,
         max_mods=0,
-        min_peptide_length=6,
-        max_peptide_length=50,
-    )
-
-    candidates = db_utils.get_candidates(
-        precursor_mz=496.2,
-        charge=2,
-        peptide_list=peptide_list,
         precursor_tolerance=150000,
-        isotope_error="0",
+        isotope_error=[0],
     )
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     candidates = [x[0] for x in candidates]
     assert expected_midwindow == candidates
 
-    peptide_list = db_utils.digest_fasta(
-        fasta_filename=str(tiny_fasta_file),
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
         enzyme="trypsin",
         digestion="full",
         missed_cleavages=1,
+        min_peptide_len=6,
+        max_peptide_len=50,
         max_mods=0,
-        min_peptide_length=6,
-        max_peptide_length=50,
-    )
-
-    candidates = db_utils.get_candidates(
-        precursor_mz=496.2,
-        charge=2,
-        peptide_list=peptide_list,
         precursor_tolerance=600000,
-        isotope_error="0",
+        isotope_error=[0],
     )
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     candidates = [x[0] for x in candidates]
     assert expected_widewindow == candidates
 
 
-def test_get_candidates_isotope_error():
+def test_get_candidates_isotope_error(tiny_fasta_file):
 
     # Tide isotope error windows for 496.2, 2+:
     # 0: [980.481617, 1000.289326]
@@ -556,53 +558,83 @@ def test_get_candidates_isotope_error():
     expected_isotope3 = list("XWVUTSRQPONMLKJIHGFE")
     expected_isotope0123 = list("XWVUTSRQPONMLKJIHGFEDCB")
 
-    candidates = db_utils.get_candidates(
-        precursor_mz=496.2,
-        charge=2,
-        peptide_list=peptide_list,
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=0,
+        min_peptide_len=0,
+        max_peptide_len=0,
+        max_mods=0,
         precursor_tolerance=10000,
-        isotope_error="0",
+        isotope_error=[0],
     )
+    pdb.digest = peptide_list
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     candidates = [x[0] for x in candidates]
     assert expected_isotope0 == candidates
 
-    candidates = db_utils.get_candidates(
-        precursor_mz=496.2,
-        charge=2,
-        peptide_list=peptide_list,
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=0,
+        min_peptide_len=0,
+        max_peptide_len=0,
+        max_mods=0,
         precursor_tolerance=10000,
-        isotope_error="1",
+        isotope_error=[1],
     )
+    pdb.digest = peptide_list
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     candidates = [x[0] for x in candidates]
     assert expected_isotope1 == candidates
 
-    candidates = db_utils.get_candidates(
-        precursor_mz=496.2,
-        charge=2,
-        peptide_list=peptide_list,
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=0,
+        min_peptide_len=0,
+        max_peptide_len=0,
+        max_mods=0,
         precursor_tolerance=10000,
-        isotope_error="2",
+        isotope_error=[2],
     )
+    pdb.digest = peptide_list
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     candidates = [x[0] for x in candidates]
     assert expected_isotope2 == candidates
 
-    candidates = db_utils.get_candidates(
-        precursor_mz=496.2,
-        charge=2,
-        peptide_list=peptide_list,
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=0,
+        min_peptide_len=0,
+        max_peptide_len=0,
+        max_mods=0,
         precursor_tolerance=10000,
-        isotope_error="3",
+        isotope_error=[3],
     )
+    pdb.digest = peptide_list
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     candidates = [x[0] for x in candidates]
     assert expected_isotope3 == candidates
 
-    candidates = db_utils.get_candidates(
-        precursor_mz=496.2,
-        charge=2,
-        peptide_list=peptide_list,
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
+        enzyme="trypsin",
+        digestion="full",
+        missed_cleavages=0,
+        min_peptide_len=0,
+        max_peptide_len=0,
+        max_mods=0,
         precursor_tolerance=10000,
-        isotope_error="0,1,2,3",
+        isotope_error=[0, 1, 2, 3],
     )
+    pdb.digest = peptide_list
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     candidates = [x[0] for x in candidates]
     assert expected_isotope0123 == candidates
 

From 3d0b0b9b6f3c4efedd7034aab4ecc62de2a9a4ca Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Tue, 20 Aug 2024 02:12:46 +0000
Subject: [PATCH 11/21] Generate new screengrabs with rich-codex

---
 docs/images/configure-help.svg | 160 +++++++---------------
 docs/images/evaluate-help.svg  | 191 +++++++++++++-------------
 docs/images/help.svg           | 223 ++++++++++++++++++-------------
 docs/images/sequence-help.svg  | 191 +++++++++++++-------------
 docs/images/train-help.svg     | 237 ++++++++++++++++++++-------------
 5 files changed, 509 insertions(+), 493 deletions(-)

diff --git a/docs/images/configure-help.svg b/docs/images/configure-help.svg
index 4092bce3..b1fcce10 100644
--- a/docs/images/configure-help.svg
+++ b/docs/images/configure-help.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 1934 757.5999999999999" xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 994 342.79999999999995" xmlns="http://www.w3.org/2000/svg">
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -19,151 +19,89 @@
         font-weight: 700;
     }
 
-    .terminal-2941406062-matrix {
+    .terminal-2766440694-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-2941406062-title {
+    .terminal-2766440694-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-2941406062-r1 { fill: #c5c8c6 }
+    .terminal-2766440694-r1 { fill: #c5c8c6 }
+.terminal-2766440694-r2 { fill: #d0b344 }
+.terminal-2766440694-r3 { fill: #c5c8c6;font-weight: bold }
+.terminal-2766440694-r4 { fill: #68a0b3;font-weight: bold }
+.terminal-2766440694-r5 { fill: #868887 }
+.terminal-2766440694-r6 { fill: #98a84b;font-weight: bold }
+.terminal-2766440694-r7 { fill: #d0b344;font-weight: bold }
     </style>
 
     <defs>
-    <clipPath id="terminal-2941406062-clip-terminal">
-      <rect x="0" y="0" width="1914.3999999999999" height="706.5999999999999" />
+    <clipPath id="terminal-2766440694-clip-terminal">
+      <rect x="0" y="0" width="975.0" height="291.79999999999995" />
     </clipPath>
-    <clipPath id="terminal-2941406062-line-0">
-    <rect x="0" y="1.5" width="1915.4" height="24.65"/>
+    <clipPath id="terminal-2766440694-line-0">
+    <rect x="0" y="1.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2941406062-line-1">
-    <rect x="0" y="25.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2766440694-line-1">
+    <rect x="0" y="25.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2941406062-line-2">
-    <rect x="0" y="50.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2766440694-line-2">
+    <rect x="0" y="50.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2941406062-line-3">
-    <rect x="0" y="74.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2766440694-line-3">
+    <rect x="0" y="74.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2941406062-line-4">
-    <rect x="0" y="99.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2766440694-line-4">
+    <rect x="0" y="99.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2941406062-line-5">
-    <rect x="0" y="123.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2766440694-line-5">
+    <rect x="0" y="123.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2941406062-line-6">
-    <rect x="0" y="147.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2766440694-line-6">
+    <rect x="0" y="147.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2941406062-line-7">
-    <rect x="0" y="172.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2766440694-line-7">
+    <rect x="0" y="172.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2941406062-line-8">
-    <rect x="0" y="196.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2766440694-line-8">
+    <rect x="0" y="196.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2941406062-line-9">
-    <rect x="0" y="221.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2766440694-line-9">
+    <rect x="0" y="221.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2941406062-line-10">
-    <rect x="0" y="245.5" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-11">
-    <rect x="0" y="269.9" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-12">
-    <rect x="0" y="294.3" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-13">
-    <rect x="0" y="318.7" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-14">
-    <rect x="0" y="343.1" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-15">
-    <rect x="0" y="367.5" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-16">
-    <rect x="0" y="391.9" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-17">
-    <rect x="0" y="416.3" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-18">
-    <rect x="0" y="440.7" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-19">
-    <rect x="0" y="465.1" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-20">
-    <rect x="0" y="489.5" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-21">
-    <rect x="0" y="513.9" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-22">
-    <rect x="0" y="538.3" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-23">
-    <rect x="0" y="562.7" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-24">
-    <rect x="0" y="587.1" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-25">
-    <rect x="0" y="611.5" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-26">
-    <rect x="0" y="635.9" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2941406062-line-27">
-    <rect x="0" y="660.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2766440694-line-10">
+    <rect x="0" y="245.5" width="976" height="24.65"/>
             </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="1932" height="755.6" rx="8"/>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="992" height="340.8" rx="8"/>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-2941406062-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-2766440694-clip-terminal)">
     
-    <g class="terminal-2941406062-matrix">
-    <text class="terminal-2941406062-r1" x="0" y="20" textLength="329.4" clip-path="url(#terminal-2941406062-line-0)">$&#160;casanovo&#160;configure&#160;--help</text><text class="terminal-2941406062-r1" x="1915.4" y="20" textLength="12.2" clip-path="url(#terminal-2941406062-line-0)">
-</text><text class="terminal-2941406062-r1" x="0" y="44.4" textLength="414.8" clip-path="url(#terminal-2941406062-line-1)">Traceback&#160;(most&#160;recent&#160;call&#160;last):</text><text class="terminal-2941406062-r1" x="1915.4" y="44.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-1)">
-</text><text class="terminal-2941406062-r1" x="0" y="68.8" textLength="1000.4" clip-path="url(#terminal-2941406062-line-2)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/bin/casanovo&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="68.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-2)">
-</text><text class="terminal-2941406062-r1" x="0" y="93.2" textLength="463.6" clip-path="url(#terminal-2941406062-line-3)">&#160;&#160;&#160;&#160;from&#160;casanovo.casanovo&#160;import&#160;main</text><text class="terminal-2941406062-r1" x="1915.4" y="93.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-3)">
-</text><text class="terminal-2941406062-r1" x="0" y="117.6" textLength="1464" clip-path="url(#terminal-2941406062-line-4)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/casanovo/casanovo.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="117.6" textLength="12.2" clip-path="url(#terminal-2941406062-line-4)">
-</text><text class="terminal-2941406062-r1" x="0" y="142" textLength="268.4" clip-path="url(#terminal-2941406062-line-5)">&#160;&#160;&#160;&#160;import&#160;depthcharge</text><text class="terminal-2941406062-r1" x="1915.4" y="142" textLength="12.2" clip-path="url(#terminal-2941406062-line-5)">
-</text><text class="terminal-2941406062-r1" x="0" y="166.4" textLength="1488.4" clip-path="url(#terminal-2941406062-line-6)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/__init__.py&quot;,&#160;line&#160;3,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="166.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-6)">
-</text><text class="terminal-2941406062-r1" x="0" y="190.8" textLength="341.6" clip-path="url(#terminal-2941406062-line-7)">&#160;&#160;&#160;&#160;from&#160;.&#160;import&#160;components</text><text class="terminal-2941406062-r1" x="1915.4" y="190.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-7)">
-</text><text class="terminal-2941406062-r1" x="0" y="215.2" textLength="1622.6" clip-path="url(#terminal-2941406062-line-8)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/__init__.py&quot;,&#160;line&#160;2,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="215.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-8)">
-</text><text class="terminal-2941406062-r1" x="0" y="239.6" textLength="744.2" clip-path="url(#terminal-2941406062-line-9)">&#160;&#160;&#160;&#160;from&#160;.transformers&#160;import&#160;SpectrumEncoder,&#160;PeptideDecoder</text><text class="terminal-2941406062-r1" x="1915.4" y="239.6" textLength="12.2" clip-path="url(#terminal-2941406062-line-9)">
-</text><text class="terminal-2941406062-r1" x="0" y="264" textLength="1671.4" clip-path="url(#terminal-2941406062-line-10)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/transformers.py&quot;,&#160;line&#160;8,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="264" textLength="12.2" clip-path="url(#terminal-2941406062-line-10)">
-</text><text class="terminal-2941406062-r1" x="0" y="288.4" textLength="292.8" clip-path="url(#terminal-2941406062-line-11)">&#160;&#160;&#160;&#160;from&#160;..&#160;import&#160;utils</text><text class="terminal-2941406062-r1" x="1915.4" y="288.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-11)">
-</text><text class="terminal-2941406062-r1" x="0" y="312.8" textLength="1451.8" clip-path="url(#terminal-2941406062-line-12)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/utils.py&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="312.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-12)">
-</text><text class="terminal-2941406062-r1" x="0" y="337.2" textLength="878.4" clip-path="url(#terminal-2941406062-line-13)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing.event_accumulator&#160;import&#160;(</text><text class="terminal-2941406062-r1" x="1915.4" y="337.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-13)">
-</text><text class="terminal-2941406062-r1" x="0" y="361.6" textLength="1915.4" clip-path="url(#terminal-2941406062-line-14)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_accumulator.py&quot;,&#160;line&#160;24,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="361.6" textLength="12.2" clip-path="url(#terminal-2941406062-line-14)">
-</text><text class="terminal-2941406062-r1" x="0" y="386" textLength="854" clip-path="url(#terminal-2941406062-line-15)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing&#160;import&#160;event_file_loader</text><text class="terminal-2941406062-r1" x="1915.4" y="386" textLength="12.2" clip-path="url(#terminal-2941406062-line-15)">
-</text><text class="terminal-2941406062-r1" x="0" y="410.4" textLength="1915.4" clip-path="url(#terminal-2941406062-line-16)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_file_loader.py&quot;,&#160;line&#160;21,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="410.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-16)">
-</text><text class="terminal-2941406062-r1" x="0" y="434.8" textLength="536.8" clip-path="url(#terminal-2941406062-line-17)">&#160;&#160;&#160;&#160;from&#160;tensorboard&#160;import&#160;dataclass_compat</text><text class="terminal-2941406062-r1" x="1915.4" y="434.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-17)">
-</text><text class="terminal-2941406062-r1" x="0" y="459.2" textLength="1598.2" clip-path="url(#terminal-2941406062-line-18)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/dataclass_compat.py&quot;,&#160;line&#160;33,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="459.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-18)">
-</text><text class="terminal-2941406062-r1" x="0" y="483.6" textLength="878.4" clip-path="url(#terminal-2941406062-line-19)">&#160;&#160;&#160;&#160;from&#160;tensorboard.plugins.hparams&#160;import&#160;metadata&#160;as&#160;hparams_metadata</text><text class="terminal-2941406062-r1" x="1915.4" y="483.6" textLength="12.2" clip-path="url(#terminal-2941406062-line-19)">
-</text><text class="terminal-2941406062-r1" x="0" y="508" textLength="1695.8" clip-path="url(#terminal-2941406062-line-20)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/plugins/hparams/metadata.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2941406062-r1" x="1915.4" y="508" textLength="12.2" clip-path="url(#terminal-2941406062-line-20)">
-</text><text class="terminal-2941406062-r1" x="0" y="532.4" textLength="585.6" clip-path="url(#terminal-2941406062-line-21)">&#160;&#160;&#160;&#160;NULL_TENSOR&#160;=&#160;tensor_util.make_tensor_proto(</text><text class="terminal-2941406062-r1" x="1915.4" y="532.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-21)">
-</text><text class="terminal-2941406062-r1" x="0" y="556.8" textLength="1720.2" clip-path="url(#terminal-2941406062-line-22)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/util/tensor_util.py&quot;,&#160;line&#160;405,&#160;in&#160;make_tensor_proto</text><text class="terminal-2941406062-r1" x="1915.4" y="556.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-22)">
-</text><text class="terminal-2941406062-r1" x="0" y="581.2" textLength="585.6" clip-path="url(#terminal-2941406062-line-23)">&#160;&#160;&#160;&#160;numpy_dtype&#160;=&#160;dtypes.as_dtype(nparray.dtype)</text><text class="terminal-2941406062-r1" x="1915.4" y="581.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-23)">
-</text><text class="terminal-2941406062-r1" x="0" y="605.6" textLength="1769" clip-path="url(#terminal-2941406062-line-24)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py&quot;,&#160;line&#160;677,&#160;in&#160;as_dtype</text><text class="terminal-2941406062-r1" x="1915.4" y="605.6" textLength="12.2" clip-path="url(#terminal-2941406062-line-24)">
-</text><text class="terminal-2941406062-r1" x="0" y="630" textLength="866.2" clip-path="url(#terminal-2941406062-line-25)">&#160;&#160;&#160;&#160;if&#160;type_value.type&#160;==&#160;np.string_&#160;or&#160;type_value.type&#160;==&#160;np.unicode_:</text><text class="terminal-2941406062-r1" x="1915.4" y="630" textLength="12.2" clip-path="url(#terminal-2941406062-line-25)">
-</text><text class="terminal-2941406062-r1" x="0" y="654.4" textLength="1476.2" clip-path="url(#terminal-2941406062-line-26)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/numpy/__init__.py&quot;,&#160;line&#160;397,&#160;in&#160;__getattr__</text><text class="terminal-2941406062-r1" x="1915.4" y="654.4" textLength="12.2" clip-path="url(#terminal-2941406062-line-26)">
-</text><text class="terminal-2941406062-r1" x="0" y="678.8" textLength="305" clip-path="url(#terminal-2941406062-line-27)">&#160;&#160;&#160;&#160;raise&#160;AttributeError(</text><text class="terminal-2941406062-r1" x="1915.4" y="678.8" textLength="12.2" clip-path="url(#terminal-2941406062-line-27)">
-</text><text class="terminal-2941406062-r1" x="0" y="703.2" textLength="1427.4" clip-path="url(#terminal-2941406062-line-28)">AttributeError:&#160;`np.string_`&#160;was&#160;removed&#160;in&#160;the&#160;NumPy&#160;2.0&#160;release.&#160;Use&#160;`np.bytes_`&#160;instead..&#160;Did&#160;you&#160;mean:&#160;&#x27;strings&#x27;?</text><text class="terminal-2941406062-r1" x="1915.4" y="703.2" textLength="12.2" clip-path="url(#terminal-2941406062-line-28)">
+    <g class="terminal-2766440694-matrix">
+    <text class="terminal-2766440694-r1" x="0" y="20" textLength="329.4" clip-path="url(#terminal-2766440694-line-0)">$&#160;casanovo&#160;configure&#160;--help</text><text class="terminal-2766440694-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-2766440694-line-0)">
+</text><text class="terminal-2766440694-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-2766440694-line-1)">
+</text><text class="terminal-2766440694-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-2766440694-line-2)">Usage:</text><text class="terminal-2766440694-r3" x="97.6" y="68.8" textLength="219.6" clip-path="url(#terminal-2766440694-line-2)">casanovo&#160;configure</text><text class="terminal-2766440694-r1" x="317.2" y="68.8" textLength="24.4" clip-path="url(#terminal-2766440694-line-2)">&#160;[</text><text class="terminal-2766440694-r4" x="341.6" y="68.8" textLength="85.4" clip-path="url(#terminal-2766440694-line-2)">OPTIONS</text><text class="terminal-2766440694-r1" x="427" y="68.8" textLength="549" clip-path="url(#terminal-2766440694-line-2)">]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2766440694-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-2766440694-line-2)">
+</text><text class="terminal-2766440694-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-2766440694-line-3)">
+</text><text class="terminal-2766440694-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-2766440694-line-4)">&#160;Generate&#160;a&#160;Casanovo&#160;configuration&#160;file&#160;to&#160;customize.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2766440694-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-2766440694-line-4)">
+</text><text class="terminal-2766440694-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-2766440694-line-5)">&#160;The&#160;casanovo&#160;configuration&#160;file&#160;is&#160;in&#160;the&#160;YAML&#160;format.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2766440694-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-2766440694-line-5)">
+</text><text class="terminal-2766440694-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-2766440694-line-6)">
+</text><text class="terminal-2766440694-r5" x="0" y="190.8" textLength="24.4" clip-path="url(#terminal-2766440694-line-7)">╭─</text><text class="terminal-2766440694-r5" x="24.4" y="190.8" textLength="109.8" clip-path="url(#terminal-2766440694-line-7)">&#160;Options&#160;</text><text class="terminal-2766440694-r5" x="134.2" y="190.8" textLength="817.4" clip-path="url(#terminal-2766440694-line-7)">───────────────────────────────────────────────────────────────────</text><text class="terminal-2766440694-r5" x="951.6" y="190.8" textLength="24.4" clip-path="url(#terminal-2766440694-line-7)">─╮</text><text class="terminal-2766440694-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-2766440694-line-7)">
+</text><text class="terminal-2766440694-r5" x="0" y="215.2" textLength="12.2" clip-path="url(#terminal-2766440694-line-8)">│</text><text class="terminal-2766440694-r4" x="24.4" y="215.2" textLength="97.6" clip-path="url(#terminal-2766440694-line-8)">--output</text><text class="terminal-2766440694-r6" x="146.4" y="215.2" textLength="24.4" clip-path="url(#terminal-2766440694-line-8)">-o</text><text class="terminal-2766440694-r7" x="195.2" y="215.2" textLength="48.8" clip-path="url(#terminal-2766440694-line-8)">FILE</text><text class="terminal-2766440694-r1" x="244" y="215.2" textLength="719.8" clip-path="url(#terminal-2766440694-line-8)">&#160;&#160;The&#160;output&#160;configuration&#160;file.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2766440694-r5" x="963.8" y="215.2" textLength="12.2" clip-path="url(#terminal-2766440694-line-8)">│</text><text class="terminal-2766440694-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-2766440694-line-8)">
+</text><text class="terminal-2766440694-r5" x="0" y="239.6" textLength="12.2" clip-path="url(#terminal-2766440694-line-9)">│</text><text class="terminal-2766440694-r4" x="24.4" y="239.6" textLength="73.2" clip-path="url(#terminal-2766440694-line-9)">--help</text><text class="terminal-2766440694-r6" x="146.4" y="239.6" textLength="24.4" clip-path="url(#terminal-2766440694-line-9)">-h</text><text class="terminal-2766440694-r1" x="244" y="239.6" textLength="719.8" clip-path="url(#terminal-2766440694-line-9)">&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2766440694-r5" x="963.8" y="239.6" textLength="12.2" clip-path="url(#terminal-2766440694-line-9)">│</text><text class="terminal-2766440694-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-2766440694-line-9)">
+</text><text class="terminal-2766440694-r5" x="0" y="264" textLength="976" clip-path="url(#terminal-2766440694-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2766440694-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-2766440694-line-10)">
+</text><text class="terminal-2766440694-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-2766440694-line-11)">
 </text>
     </g>
     </g>
diff --git a/docs/images/evaluate-help.svg b/docs/images/evaluate-help.svg
index d86b2497..2f770e2e 100644
--- a/docs/images/evaluate-help.svg
+++ b/docs/images/evaluate-help.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 1934 757.5999999999999" xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 994 684.4" xmlns="http://www.w3.org/2000/svg">
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -19,151 +19,148 @@
         font-weight: 700;
     }
 
-    .terminal-1991789315-matrix {
+    .terminal-2215953096-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-1991789315-title {
+    .terminal-2215953096-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-1991789315-r1 { fill: #c5c8c6 }
+    .terminal-2215953096-r1 { fill: #c5c8c6 }
+.terminal-2215953096-r2 { fill: #d0b344 }
+.terminal-2215953096-r3 { fill: #c5c8c6;font-weight: bold }
+.terminal-2215953096-r4 { fill: #68a0b3;font-weight: bold }
+.terminal-2215953096-r5 { fill: #868887 }
+.terminal-2215953096-r6 { fill: #cc555a }
+.terminal-2215953096-r7 { fill: #d0b344;font-weight: bold }
+.terminal-2215953096-r8 { fill: #8a4346 }
+.terminal-2215953096-r9 { fill: #98a84b;font-weight: bold }
+.terminal-2215953096-r10 { fill: #8d7b39;font-weight: bold }
     </style>
 
     <defs>
-    <clipPath id="terminal-1991789315-clip-terminal">
-      <rect x="0" y="0" width="1914.3999999999999" height="706.5999999999999" />
+    <clipPath id="terminal-2215953096-clip-terminal">
+      <rect x="0" y="0" width="975.0" height="633.4" />
     </clipPath>
-    <clipPath id="terminal-1991789315-line-0">
-    <rect x="0" y="1.5" width="1915.4" height="24.65"/>
+    <clipPath id="terminal-2215953096-line-0">
+    <rect x="0" y="1.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-1">
-    <rect x="0" y="25.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-1">
+    <rect x="0" y="25.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-2">
-    <rect x="0" y="50.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-2">
+    <rect x="0" y="50.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-3">
-    <rect x="0" y="74.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-3">
+    <rect x="0" y="74.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-4">
-    <rect x="0" y="99.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-4">
+    <rect x="0" y="99.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-5">
-    <rect x="0" y="123.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-5">
+    <rect x="0" y="123.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-6">
-    <rect x="0" y="147.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-6">
+    <rect x="0" y="147.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-7">
-    <rect x="0" y="172.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-7">
+    <rect x="0" y="172.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-8">
-    <rect x="0" y="196.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-8">
+    <rect x="0" y="196.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-9">
-    <rect x="0" y="221.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-9">
+    <rect x="0" y="221.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-10">
-    <rect x="0" y="245.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-10">
+    <rect x="0" y="245.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-11">
-    <rect x="0" y="269.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-11">
+    <rect x="0" y="269.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-12">
-    <rect x="0" y="294.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-12">
+    <rect x="0" y="294.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-13">
-    <rect x="0" y="318.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-13">
+    <rect x="0" y="318.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-14">
-    <rect x="0" y="343.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-14">
+    <rect x="0" y="343.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-15">
-    <rect x="0" y="367.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-15">
+    <rect x="0" y="367.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-16">
-    <rect x="0" y="391.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-16">
+    <rect x="0" y="391.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-17">
-    <rect x="0" y="416.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-17">
+    <rect x="0" y="416.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-18">
-    <rect x="0" y="440.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-18">
+    <rect x="0" y="440.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-19">
-    <rect x="0" y="465.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-19">
+    <rect x="0" y="465.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-20">
-    <rect x="0" y="489.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-20">
+    <rect x="0" y="489.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-21">
-    <rect x="0" y="513.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-21">
+    <rect x="0" y="513.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-22">
-    <rect x="0" y="538.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-22">
+    <rect x="0" y="538.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-23">
-    <rect x="0" y="562.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-23">
+    <rect x="0" y="562.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-1991789315-line-24">
-    <rect x="0" y="587.1" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-1991789315-line-25">
-    <rect x="0" y="611.5" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-1991789315-line-26">
-    <rect x="0" y="635.9" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-1991789315-line-27">
-    <rect x="0" y="660.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2215953096-line-24">
+    <rect x="0" y="587.1" width="976" height="24.65"/>
             </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="1932" height="755.6" rx="8"/>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="992" height="682.4" rx="8"/>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-1991789315-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-2215953096-clip-terminal)">
     
-    <g class="terminal-1991789315-matrix">
-    <text class="terminal-1991789315-r1" x="0" y="20" textLength="317.2" clip-path="url(#terminal-1991789315-line-0)">$&#160;casanovo&#160;evaluate&#160;--help</text><text class="terminal-1991789315-r1" x="1915.4" y="20" textLength="12.2" clip-path="url(#terminal-1991789315-line-0)">
-</text><text class="terminal-1991789315-r1" x="0" y="44.4" textLength="414.8" clip-path="url(#terminal-1991789315-line-1)">Traceback&#160;(most&#160;recent&#160;call&#160;last):</text><text class="terminal-1991789315-r1" x="1915.4" y="44.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-1)">
-</text><text class="terminal-1991789315-r1" x="0" y="68.8" textLength="1000.4" clip-path="url(#terminal-1991789315-line-2)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/bin/casanovo&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="68.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-2)">
-</text><text class="terminal-1991789315-r1" x="0" y="93.2" textLength="463.6" clip-path="url(#terminal-1991789315-line-3)">&#160;&#160;&#160;&#160;from&#160;casanovo.casanovo&#160;import&#160;main</text><text class="terminal-1991789315-r1" x="1915.4" y="93.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-3)">
-</text><text class="terminal-1991789315-r1" x="0" y="117.6" textLength="1464" clip-path="url(#terminal-1991789315-line-4)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/casanovo/casanovo.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="117.6" textLength="12.2" clip-path="url(#terminal-1991789315-line-4)">
-</text><text class="terminal-1991789315-r1" x="0" y="142" textLength="268.4" clip-path="url(#terminal-1991789315-line-5)">&#160;&#160;&#160;&#160;import&#160;depthcharge</text><text class="terminal-1991789315-r1" x="1915.4" y="142" textLength="12.2" clip-path="url(#terminal-1991789315-line-5)">
-</text><text class="terminal-1991789315-r1" x="0" y="166.4" textLength="1488.4" clip-path="url(#terminal-1991789315-line-6)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/__init__.py&quot;,&#160;line&#160;3,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="166.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-6)">
-</text><text class="terminal-1991789315-r1" x="0" y="190.8" textLength="341.6" clip-path="url(#terminal-1991789315-line-7)">&#160;&#160;&#160;&#160;from&#160;.&#160;import&#160;components</text><text class="terminal-1991789315-r1" x="1915.4" y="190.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-7)">
-</text><text class="terminal-1991789315-r1" x="0" y="215.2" textLength="1622.6" clip-path="url(#terminal-1991789315-line-8)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/__init__.py&quot;,&#160;line&#160;2,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="215.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-8)">
-</text><text class="terminal-1991789315-r1" x="0" y="239.6" textLength="744.2" clip-path="url(#terminal-1991789315-line-9)">&#160;&#160;&#160;&#160;from&#160;.transformers&#160;import&#160;SpectrumEncoder,&#160;PeptideDecoder</text><text class="terminal-1991789315-r1" x="1915.4" y="239.6" textLength="12.2" clip-path="url(#terminal-1991789315-line-9)">
-</text><text class="terminal-1991789315-r1" x="0" y="264" textLength="1671.4" clip-path="url(#terminal-1991789315-line-10)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/transformers.py&quot;,&#160;line&#160;8,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="264" textLength="12.2" clip-path="url(#terminal-1991789315-line-10)">
-</text><text class="terminal-1991789315-r1" x="0" y="288.4" textLength="292.8" clip-path="url(#terminal-1991789315-line-11)">&#160;&#160;&#160;&#160;from&#160;..&#160;import&#160;utils</text><text class="terminal-1991789315-r1" x="1915.4" y="288.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-11)">
-</text><text class="terminal-1991789315-r1" x="0" y="312.8" textLength="1451.8" clip-path="url(#terminal-1991789315-line-12)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/utils.py&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="312.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-12)">
-</text><text class="terminal-1991789315-r1" x="0" y="337.2" textLength="878.4" clip-path="url(#terminal-1991789315-line-13)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing.event_accumulator&#160;import&#160;(</text><text class="terminal-1991789315-r1" x="1915.4" y="337.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-13)">
-</text><text class="terminal-1991789315-r1" x="0" y="361.6" textLength="1915.4" clip-path="url(#terminal-1991789315-line-14)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_accumulator.py&quot;,&#160;line&#160;24,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="361.6" textLength="12.2" clip-path="url(#terminal-1991789315-line-14)">
-</text><text class="terminal-1991789315-r1" x="0" y="386" textLength="854" clip-path="url(#terminal-1991789315-line-15)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing&#160;import&#160;event_file_loader</text><text class="terminal-1991789315-r1" x="1915.4" y="386" textLength="12.2" clip-path="url(#terminal-1991789315-line-15)">
-</text><text class="terminal-1991789315-r1" x="0" y="410.4" textLength="1915.4" clip-path="url(#terminal-1991789315-line-16)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_file_loader.py&quot;,&#160;line&#160;21,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="410.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-16)">
-</text><text class="terminal-1991789315-r1" x="0" y="434.8" textLength="536.8" clip-path="url(#terminal-1991789315-line-17)">&#160;&#160;&#160;&#160;from&#160;tensorboard&#160;import&#160;dataclass_compat</text><text class="terminal-1991789315-r1" x="1915.4" y="434.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-17)">
-</text><text class="terminal-1991789315-r1" x="0" y="459.2" textLength="1598.2" clip-path="url(#terminal-1991789315-line-18)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/dataclass_compat.py&quot;,&#160;line&#160;33,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="459.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-18)">
-</text><text class="terminal-1991789315-r1" x="0" y="483.6" textLength="878.4" clip-path="url(#terminal-1991789315-line-19)">&#160;&#160;&#160;&#160;from&#160;tensorboard.plugins.hparams&#160;import&#160;metadata&#160;as&#160;hparams_metadata</text><text class="terminal-1991789315-r1" x="1915.4" y="483.6" textLength="12.2" clip-path="url(#terminal-1991789315-line-19)">
-</text><text class="terminal-1991789315-r1" x="0" y="508" textLength="1695.8" clip-path="url(#terminal-1991789315-line-20)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/plugins/hparams/metadata.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-1991789315-r1" x="1915.4" y="508" textLength="12.2" clip-path="url(#terminal-1991789315-line-20)">
-</text><text class="terminal-1991789315-r1" x="0" y="532.4" textLength="585.6" clip-path="url(#terminal-1991789315-line-21)">&#160;&#160;&#160;&#160;NULL_TENSOR&#160;=&#160;tensor_util.make_tensor_proto(</text><text class="terminal-1991789315-r1" x="1915.4" y="532.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-21)">
-</text><text class="terminal-1991789315-r1" x="0" y="556.8" textLength="1720.2" clip-path="url(#terminal-1991789315-line-22)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/util/tensor_util.py&quot;,&#160;line&#160;405,&#160;in&#160;make_tensor_proto</text><text class="terminal-1991789315-r1" x="1915.4" y="556.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-22)">
-</text><text class="terminal-1991789315-r1" x="0" y="581.2" textLength="585.6" clip-path="url(#terminal-1991789315-line-23)">&#160;&#160;&#160;&#160;numpy_dtype&#160;=&#160;dtypes.as_dtype(nparray.dtype)</text><text class="terminal-1991789315-r1" x="1915.4" y="581.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-23)">
-</text><text class="terminal-1991789315-r1" x="0" y="605.6" textLength="1769" clip-path="url(#terminal-1991789315-line-24)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py&quot;,&#160;line&#160;677,&#160;in&#160;as_dtype</text><text class="terminal-1991789315-r1" x="1915.4" y="605.6" textLength="12.2" clip-path="url(#terminal-1991789315-line-24)">
-</text><text class="terminal-1991789315-r1" x="0" y="630" textLength="866.2" clip-path="url(#terminal-1991789315-line-25)">&#160;&#160;&#160;&#160;if&#160;type_value.type&#160;==&#160;np.string_&#160;or&#160;type_value.type&#160;==&#160;np.unicode_:</text><text class="terminal-1991789315-r1" x="1915.4" y="630" textLength="12.2" clip-path="url(#terminal-1991789315-line-25)">
-</text><text class="terminal-1991789315-r1" x="0" y="654.4" textLength="1476.2" clip-path="url(#terminal-1991789315-line-26)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/numpy/__init__.py&quot;,&#160;line&#160;397,&#160;in&#160;__getattr__</text><text class="terminal-1991789315-r1" x="1915.4" y="654.4" textLength="12.2" clip-path="url(#terminal-1991789315-line-26)">
-</text><text class="terminal-1991789315-r1" x="0" y="678.8" textLength="305" clip-path="url(#terminal-1991789315-line-27)">&#160;&#160;&#160;&#160;raise&#160;AttributeError(</text><text class="terminal-1991789315-r1" x="1915.4" y="678.8" textLength="12.2" clip-path="url(#terminal-1991789315-line-27)">
-</text><text class="terminal-1991789315-r1" x="0" y="703.2" textLength="1427.4" clip-path="url(#terminal-1991789315-line-28)">AttributeError:&#160;`np.string_`&#160;was&#160;removed&#160;in&#160;the&#160;NumPy&#160;2.0&#160;release.&#160;Use&#160;`np.bytes_`&#160;instead..&#160;Did&#160;you&#160;mean:&#160;&#x27;strings&#x27;?</text><text class="terminal-1991789315-r1" x="1915.4" y="703.2" textLength="12.2" clip-path="url(#terminal-1991789315-line-28)">
+    <g class="terminal-2215953096-matrix">
+    <text class="terminal-2215953096-r1" x="0" y="20" textLength="317.2" clip-path="url(#terminal-2215953096-line-0)">$&#160;casanovo&#160;evaluate&#160;--help</text><text class="terminal-2215953096-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-2215953096-line-0)">
+</text><text class="terminal-2215953096-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-2215953096-line-1)">
+</text><text class="terminal-2215953096-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-2215953096-line-2)">Usage:</text><text class="terminal-2215953096-r3" x="97.6" y="68.8" textLength="207.4" clip-path="url(#terminal-2215953096-line-2)">casanovo&#160;evaluate</text><text class="terminal-2215953096-r1" x="305" y="68.8" textLength="24.4" clip-path="url(#terminal-2215953096-line-2)">&#160;[</text><text class="terminal-2215953096-r4" x="329.4" y="68.8" textLength="85.4" clip-path="url(#terminal-2215953096-line-2)">OPTIONS</text><text class="terminal-2215953096-r1" x="414.8" y="68.8" textLength="24.4" clip-path="url(#terminal-2215953096-line-2)">]&#160;</text><text class="terminal-2215953096-r4" x="439.2" y="68.8" textLength="231.8" clip-path="url(#terminal-2215953096-line-2)">ANNOTATED_PEAK_PATH</text><text class="terminal-2215953096-r1" x="671" y="68.8" textLength="305" clip-path="url(#terminal-2215953096-line-2)">...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-2215953096-line-2)">
+</text><text class="terminal-2215953096-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-2215953096-line-3)">
+</text><text class="terminal-2215953096-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-2215953096-line-4)">&#160;Evaluate&#160;de&#160;novo&#160;peptide&#160;sequencing&#160;performance.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-4)">
+</text><text class="terminal-2215953096-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-2215953096-line-5)">&#160;ANNOTATED_PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;annoated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-2215953096-line-5)">
+</text><text class="terminal-2215953096-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-2215953096-line-6)">&#160;provided&#160;by&#160;MassIVE-KB.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-2215953096-line-6)">
+</text><text class="terminal-2215953096-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-2215953096-line-7)">
+</text><text class="terminal-2215953096-r5" x="0" y="215.2" textLength="24.4" clip-path="url(#terminal-2215953096-line-8)">╭─</text><text class="terminal-2215953096-r5" x="24.4" y="215.2" textLength="134.2" clip-path="url(#terminal-2215953096-line-8)">&#160;Arguments&#160;</text><text class="terminal-2215953096-r5" x="158.6" y="215.2" textLength="793" clip-path="url(#terminal-2215953096-line-8)">─────────────────────────────────────────────────────────────────</text><text class="terminal-2215953096-r5" x="951.6" y="215.2" textLength="24.4" clip-path="url(#terminal-2215953096-line-8)">─╮</text><text class="terminal-2215953096-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-2215953096-line-8)">
+</text><text class="terminal-2215953096-r5" x="0" y="239.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-9)">│</text><text class="terminal-2215953096-r6" x="24.4" y="239.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-9)">*</text><text class="terminal-2215953096-r1" x="36.6" y="239.6" textLength="305" clip-path="url(#terminal-2215953096-line-9)">&#160;&#160;ANNOTATED_PEAK_PATH&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r7" x="341.6" y="239.6" textLength="48.8" clip-path="url(#terminal-2215953096-line-9)">FILE</text><text class="terminal-2215953096-r8" x="414.8" y="239.6" textLength="122" clip-path="url(#terminal-2215953096-line-9)">[required]</text><text class="terminal-2215953096-r5" x="963.8" y="239.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-9)">│</text><text class="terminal-2215953096-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-9)">
+</text><text class="terminal-2215953096-r5" x="0" y="264" textLength="976" clip-path="url(#terminal-2215953096-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2215953096-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-2215953096-line-10)">
+</text><text class="terminal-2215953096-r5" x="0" y="288.4" textLength="24.4" clip-path="url(#terminal-2215953096-line-11)">╭─</text><text class="terminal-2215953096-r5" x="24.4" y="288.4" textLength="109.8" clip-path="url(#terminal-2215953096-line-11)">&#160;Options&#160;</text><text class="terminal-2215953096-r5" x="134.2" y="288.4" textLength="817.4" clip-path="url(#terminal-2215953096-line-11)">───────────────────────────────────────────────────────────────────</text><text class="terminal-2215953096-r5" x="951.6" y="288.4" textLength="24.4" clip-path="url(#terminal-2215953096-line-11)">─╮</text><text class="terminal-2215953096-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-2215953096-line-11)">
+</text><text class="terminal-2215953096-r5" x="0" y="312.8" textLength="12.2" clip-path="url(#terminal-2215953096-line-12)">│</text><text class="terminal-2215953096-r4" x="24.4" y="312.8" textLength="85.4" clip-path="url(#terminal-2215953096-line-12)">--model</text><text class="terminal-2215953096-r9" x="183" y="312.8" textLength="24.4" clip-path="url(#terminal-2215953096-line-12)">-m</text><text class="terminal-2215953096-r7" x="231.8" y="312.8" textLength="317.2" clip-path="url(#terminal-2215953096-line-12)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r1" x="549" y="312.8" textLength="414.8" clip-path="url(#terminal-2215953096-line-12)">&#160;&#160;The&#160;model&#160;weights&#160;(.ckpt&#160;file).&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="312.8" textLength="12.2" clip-path="url(#terminal-2215953096-line-12)">│</text><text class="terminal-2215953096-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-2215953096-line-12)">
+</text><text class="terminal-2215953096-r5" x="0" y="337.2" textLength="12.2" clip-path="url(#terminal-2215953096-line-13)">│</text><text class="terminal-2215953096-r1" x="12.2" y="337.2" textLength="951.6" clip-path="url(#terminal-2215953096-line-13)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;If&#160;not&#160;provided,&#160;Casanovo&#160;will&#160;&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="337.2" textLength="12.2" clip-path="url(#terminal-2215953096-line-13)">│</text><text class="terminal-2215953096-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-2215953096-line-13)">
+</text><text class="terminal-2215953096-r5" x="0" y="361.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-14)">│</text><text class="terminal-2215953096-r1" x="12.2" y="361.6" textLength="951.6" clip-path="url(#terminal-2215953096-line-14)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;try&#160;to&#160;download&#160;the&#160;latest&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="361.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-14)">│</text><text class="terminal-2215953096-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-14)">
+</text><text class="terminal-2215953096-r5" x="0" y="386" textLength="12.2" clip-path="url(#terminal-2215953096-line-15)">│</text><text class="terminal-2215953096-r1" x="12.2" y="386" textLength="951.6" clip-path="url(#terminal-2215953096-line-15)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;release.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="386" textLength="12.2" clip-path="url(#terminal-2215953096-line-15)">│</text><text class="terminal-2215953096-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-2215953096-line-15)">
+</text><text class="terminal-2215953096-r5" x="0" y="410.4" textLength="12.2" clip-path="url(#terminal-2215953096-line-16)">│</text><text class="terminal-2215953096-r4" x="24.4" y="410.4" textLength="97.6" clip-path="url(#terminal-2215953096-line-16)">--output</text><text class="terminal-2215953096-r9" x="183" y="410.4" textLength="24.4" clip-path="url(#terminal-2215953096-line-16)">-o</text><text class="terminal-2215953096-r7" x="231.8" y="410.4" textLength="317.2" clip-path="url(#terminal-2215953096-line-16)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r1" x="549" y="410.4" textLength="414.8" clip-path="url(#terminal-2215953096-line-16)">&#160;&#160;The&#160;mzTab&#160;file&#160;to&#160;which&#160;results&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="410.4" textLength="12.2" clip-path="url(#terminal-2215953096-line-16)">│</text><text class="terminal-2215953096-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-2215953096-line-16)">
+</text><text class="terminal-2215953096-r5" x="0" y="434.8" textLength="12.2" clip-path="url(#terminal-2215953096-line-17)">│</text><text class="terminal-2215953096-r1" x="12.2" y="434.8" textLength="951.6" clip-path="url(#terminal-2215953096-line-17)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;will&#160;be&#160;written.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="434.8" textLength="12.2" clip-path="url(#terminal-2215953096-line-17)">│</text><text class="terminal-2215953096-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-2215953096-line-17)">
+</text><text class="terminal-2215953096-r5" x="0" y="459.2" textLength="12.2" clip-path="url(#terminal-2215953096-line-18)">│</text><text class="terminal-2215953096-r4" x="24.4" y="459.2" textLength="97.6" clip-path="url(#terminal-2215953096-line-18)">--config</text><text class="terminal-2215953096-r9" x="183" y="459.2" textLength="24.4" clip-path="url(#terminal-2215953096-line-18)">-c</text><text class="terminal-2215953096-r7" x="231.8" y="459.2" textLength="317.2" clip-path="url(#terminal-2215953096-line-18)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r1" x="549" y="459.2" textLength="414.8" clip-path="url(#terminal-2215953096-line-18)">&#160;&#160;The&#160;YAML&#160;configuration&#160;file&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="459.2" textLength="12.2" clip-path="url(#terminal-2215953096-line-18)">│</text><text class="terminal-2215953096-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-2215953096-line-18)">
+</text><text class="terminal-2215953096-r5" x="0" y="483.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-19)">│</text><text class="terminal-2215953096-r1" x="12.2" y="483.6" textLength="951.6" clip-path="url(#terminal-2215953096-line-19)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;overriding&#160;the&#160;default&#160;options.&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="483.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-19)">│</text><text class="terminal-2215953096-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-19)">
+</text><text class="terminal-2215953096-r5" x="0" y="508" textLength="12.2" clip-path="url(#terminal-2215953096-line-20)">│</text><text class="terminal-2215953096-r4" x="24.4" y="508" textLength="134.2" clip-path="url(#terminal-2215953096-line-20)">--verbosity</text><text class="terminal-2215953096-r9" x="183" y="508" textLength="24.4" clip-path="url(#terminal-2215953096-line-20)">-v</text><text class="terminal-2215953096-r10" x="231.8" y="508" textLength="12.2" clip-path="url(#terminal-2215953096-line-20)">[</text><text class="terminal-2215953096-r7" x="244" y="508" textLength="61" clip-path="url(#terminal-2215953096-line-20)">debug</text><text class="terminal-2215953096-r10" x="305" y="508" textLength="12.2" clip-path="url(#terminal-2215953096-line-20)">|</text><text class="terminal-2215953096-r7" x="317.2" y="508" textLength="48.8" clip-path="url(#terminal-2215953096-line-20)">info</text><text class="terminal-2215953096-r10" x="366" y="508" textLength="12.2" clip-path="url(#terminal-2215953096-line-20)">|</text><text class="terminal-2215953096-r7" x="378.2" y="508" textLength="85.4" clip-path="url(#terminal-2215953096-line-20)">warning</text><text class="terminal-2215953096-r10" x="463.6" y="508" textLength="12.2" clip-path="url(#terminal-2215953096-line-20)">|</text><text class="terminal-2215953096-r7" x="475.8" y="508" textLength="61" clip-path="url(#terminal-2215953096-line-20)">error</text><text class="terminal-2215953096-r10" x="536.8" y="508" textLength="12.2" clip-path="url(#terminal-2215953096-line-20)">]</text><text class="terminal-2215953096-r1" x="549" y="508" textLength="414.8" clip-path="url(#terminal-2215953096-line-20)">&#160;&#160;Set&#160;the&#160;verbosity&#160;of&#160;console&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="508" textLength="12.2" clip-path="url(#terminal-2215953096-line-20)">│</text><text class="terminal-2215953096-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-2215953096-line-20)">
+</text><text class="terminal-2215953096-r5" x="0" y="532.4" textLength="12.2" clip-path="url(#terminal-2215953096-line-21)">│</text><text class="terminal-2215953096-r1" x="12.2" y="532.4" textLength="951.6" clip-path="url(#terminal-2215953096-line-21)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;logging&#160;messages.&#160;Log&#160;files&#160;are&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="532.4" textLength="12.2" clip-path="url(#terminal-2215953096-line-21)">│</text><text class="terminal-2215953096-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-2215953096-line-21)">
+</text><text class="terminal-2215953096-r5" x="0" y="556.8" textLength="12.2" clip-path="url(#terminal-2215953096-line-22)">│</text><text class="terminal-2215953096-r1" x="12.2" y="556.8" textLength="951.6" clip-path="url(#terminal-2215953096-line-22)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;always&#160;set&#160;to&#160;&#x27;debug&#x27;.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="556.8" textLength="12.2" clip-path="url(#terminal-2215953096-line-22)">│</text><text class="terminal-2215953096-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-2215953096-line-22)">
+</text><text class="terminal-2215953096-r5" x="0" y="581.2" textLength="12.2" clip-path="url(#terminal-2215953096-line-23)">│</text><text class="terminal-2215953096-r4" x="24.4" y="581.2" textLength="73.2" clip-path="url(#terminal-2215953096-line-23)">--help</text><text class="terminal-2215953096-r9" x="183" y="581.2" textLength="24.4" clip-path="url(#terminal-2215953096-line-23)">-h</text><text class="terminal-2215953096-r1" x="549" y="581.2" textLength="414.8" clip-path="url(#terminal-2215953096-line-23)">&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2215953096-r5" x="963.8" y="581.2" textLength="12.2" clip-path="url(#terminal-2215953096-line-23)">│</text><text class="terminal-2215953096-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-2215953096-line-23)">
+</text><text class="terminal-2215953096-r5" x="0" y="605.6" textLength="976" clip-path="url(#terminal-2215953096-line-24)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2215953096-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-2215953096-line-24)">
+</text><text class="terminal-2215953096-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-2215953096-line-25)">
 </text>
     </g>
     </g>
diff --git a/docs/images/help.svg b/docs/images/help.svg
index dfb1039c..6243538a 100644
--- a/docs/images/help.svg
+++ b/docs/images/help.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 1934 757.5999999999999" xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 994 928.4" xmlns="http://www.w3.org/2000/svg">
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -19,151 +19,186 @@
         font-weight: 700;
     }
 
-    .terminal-952518540-matrix {
+    .terminal-3065326850-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-952518540-title {
+    .terminal-3065326850-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-952518540-r1 { fill: #c5c8c6 }
+    .terminal-3065326850-r1 { fill: #c5c8c6 }
+.terminal-3065326850-r2 { fill: #d0b344 }
+.terminal-3065326850-r3 { fill: #c5c8c6;font-weight: bold }
+.terminal-3065326850-r4 { fill: #68a0b3;font-weight: bold }
+.terminal-3065326850-r5 { fill: #d0b344;font-weight: bold }
+.terminal-3065326850-r6 { fill: #608ab1;text-decoration: underline; }
+.terminal-3065326850-r7 { fill: #868887 }
+.terminal-3065326850-r8 { fill: #98a84b;font-weight: bold }
     </style>
 
     <defs>
-    <clipPath id="terminal-952518540-clip-terminal">
-      <rect x="0" y="0" width="1914.3999999999999" height="706.5999999999999" />
+    <clipPath id="terminal-3065326850-clip-terminal">
+      <rect x="0" y="0" width="975.0" height="877.4" />
     </clipPath>
-    <clipPath id="terminal-952518540-line-0">
-    <rect x="0" y="1.5" width="1915.4" height="24.65"/>
+    <clipPath id="terminal-3065326850-line-0">
+    <rect x="0" y="1.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-1">
-    <rect x="0" y="25.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-1">
+    <rect x="0" y="25.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-2">
-    <rect x="0" y="50.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-2">
+    <rect x="0" y="50.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-3">
-    <rect x="0" y="74.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-3">
+    <rect x="0" y="74.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-4">
-    <rect x="0" y="99.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-4">
+    <rect x="0" y="99.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-5">
-    <rect x="0" y="123.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-5">
+    <rect x="0" y="123.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-6">
-    <rect x="0" y="147.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-6">
+    <rect x="0" y="147.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-7">
-    <rect x="0" y="172.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-7">
+    <rect x="0" y="172.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-8">
-    <rect x="0" y="196.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-8">
+    <rect x="0" y="196.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-9">
-    <rect x="0" y="221.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-9">
+    <rect x="0" y="221.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-10">
-    <rect x="0" y="245.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-10">
+    <rect x="0" y="245.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-11">
-    <rect x="0" y="269.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-11">
+    <rect x="0" y="269.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-12">
-    <rect x="0" y="294.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-12">
+    <rect x="0" y="294.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-13">
-    <rect x="0" y="318.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-13">
+    <rect x="0" y="318.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-14">
-    <rect x="0" y="343.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-14">
+    <rect x="0" y="343.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-15">
-    <rect x="0" y="367.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-15">
+    <rect x="0" y="367.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-16">
-    <rect x="0" y="391.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-16">
+    <rect x="0" y="391.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-17">
-    <rect x="0" y="416.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-17">
+    <rect x="0" y="416.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-18">
-    <rect x="0" y="440.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-18">
+    <rect x="0" y="440.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-19">
-    <rect x="0" y="465.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-19">
+    <rect x="0" y="465.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-20">
-    <rect x="0" y="489.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-20">
+    <rect x="0" y="489.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-21">
-    <rect x="0" y="513.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-21">
+    <rect x="0" y="513.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-22">
-    <rect x="0" y="538.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-22">
+    <rect x="0" y="538.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-23">
-    <rect x="0" y="562.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-23">
+    <rect x="0" y="562.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-24">
-    <rect x="0" y="587.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-24">
+    <rect x="0" y="587.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-25">
-    <rect x="0" y="611.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-25">
+    <rect x="0" y="611.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-26">
-    <rect x="0" y="635.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-26">
+    <rect x="0" y="635.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-952518540-line-27">
-    <rect x="0" y="660.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-3065326850-line-27">
+    <rect x="0" y="660.3" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-3065326850-line-28">
+    <rect x="0" y="684.7" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-3065326850-line-29">
+    <rect x="0" y="709.1" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-3065326850-line-30">
+    <rect x="0" y="733.5" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-3065326850-line-31">
+    <rect x="0" y="757.9" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-3065326850-line-32">
+    <rect x="0" y="782.3" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-3065326850-line-33">
+    <rect x="0" y="806.7" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-3065326850-line-34">
+    <rect x="0" y="831.1" width="976" height="24.65"/>
             </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="1932" height="755.6" rx="8"/>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="992" height="926.4" rx="8"/>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-952518540-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-3065326850-clip-terminal)">
     
-    <g class="terminal-952518540-matrix">
-    <text class="terminal-952518540-r1" x="0" y="20" textLength="207.4" clip-path="url(#terminal-952518540-line-0)">$&#160;casanovo&#160;--help</text><text class="terminal-952518540-r1" x="1915.4" y="20" textLength="12.2" clip-path="url(#terminal-952518540-line-0)">
-</text><text class="terminal-952518540-r1" x="0" y="44.4" textLength="414.8" clip-path="url(#terminal-952518540-line-1)">Traceback&#160;(most&#160;recent&#160;call&#160;last):</text><text class="terminal-952518540-r1" x="1915.4" y="44.4" textLength="12.2" clip-path="url(#terminal-952518540-line-1)">
-</text><text class="terminal-952518540-r1" x="0" y="68.8" textLength="1000.4" clip-path="url(#terminal-952518540-line-2)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/bin/casanovo&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="68.8" textLength="12.2" clip-path="url(#terminal-952518540-line-2)">
-</text><text class="terminal-952518540-r1" x="0" y="93.2" textLength="463.6" clip-path="url(#terminal-952518540-line-3)">&#160;&#160;&#160;&#160;from&#160;casanovo.casanovo&#160;import&#160;main</text><text class="terminal-952518540-r1" x="1915.4" y="93.2" textLength="12.2" clip-path="url(#terminal-952518540-line-3)">
-</text><text class="terminal-952518540-r1" x="0" y="117.6" textLength="1464" clip-path="url(#terminal-952518540-line-4)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/casanovo/casanovo.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="117.6" textLength="12.2" clip-path="url(#terminal-952518540-line-4)">
-</text><text class="terminal-952518540-r1" x="0" y="142" textLength="268.4" clip-path="url(#terminal-952518540-line-5)">&#160;&#160;&#160;&#160;import&#160;depthcharge</text><text class="terminal-952518540-r1" x="1915.4" y="142" textLength="12.2" clip-path="url(#terminal-952518540-line-5)">
-</text><text class="terminal-952518540-r1" x="0" y="166.4" textLength="1488.4" clip-path="url(#terminal-952518540-line-6)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/__init__.py&quot;,&#160;line&#160;3,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="166.4" textLength="12.2" clip-path="url(#terminal-952518540-line-6)">
-</text><text class="terminal-952518540-r1" x="0" y="190.8" textLength="341.6" clip-path="url(#terminal-952518540-line-7)">&#160;&#160;&#160;&#160;from&#160;.&#160;import&#160;components</text><text class="terminal-952518540-r1" x="1915.4" y="190.8" textLength="12.2" clip-path="url(#terminal-952518540-line-7)">
-</text><text class="terminal-952518540-r1" x="0" y="215.2" textLength="1622.6" clip-path="url(#terminal-952518540-line-8)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/__init__.py&quot;,&#160;line&#160;2,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="215.2" textLength="12.2" clip-path="url(#terminal-952518540-line-8)">
-</text><text class="terminal-952518540-r1" x="0" y="239.6" textLength="744.2" clip-path="url(#terminal-952518540-line-9)">&#160;&#160;&#160;&#160;from&#160;.transformers&#160;import&#160;SpectrumEncoder,&#160;PeptideDecoder</text><text class="terminal-952518540-r1" x="1915.4" y="239.6" textLength="12.2" clip-path="url(#terminal-952518540-line-9)">
-</text><text class="terminal-952518540-r1" x="0" y="264" textLength="1671.4" clip-path="url(#terminal-952518540-line-10)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/transformers.py&quot;,&#160;line&#160;8,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="264" textLength="12.2" clip-path="url(#terminal-952518540-line-10)">
-</text><text class="terminal-952518540-r1" x="0" y="288.4" textLength="292.8" clip-path="url(#terminal-952518540-line-11)">&#160;&#160;&#160;&#160;from&#160;..&#160;import&#160;utils</text><text class="terminal-952518540-r1" x="1915.4" y="288.4" textLength="12.2" clip-path="url(#terminal-952518540-line-11)">
-</text><text class="terminal-952518540-r1" x="0" y="312.8" textLength="1451.8" clip-path="url(#terminal-952518540-line-12)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/utils.py&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="312.8" textLength="12.2" clip-path="url(#terminal-952518540-line-12)">
-</text><text class="terminal-952518540-r1" x="0" y="337.2" textLength="878.4" clip-path="url(#terminal-952518540-line-13)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing.event_accumulator&#160;import&#160;(</text><text class="terminal-952518540-r1" x="1915.4" y="337.2" textLength="12.2" clip-path="url(#terminal-952518540-line-13)">
-</text><text class="terminal-952518540-r1" x="0" y="361.6" textLength="1915.4" clip-path="url(#terminal-952518540-line-14)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_accumulator.py&quot;,&#160;line&#160;24,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="361.6" textLength="12.2" clip-path="url(#terminal-952518540-line-14)">
-</text><text class="terminal-952518540-r1" x="0" y="386" textLength="854" clip-path="url(#terminal-952518540-line-15)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing&#160;import&#160;event_file_loader</text><text class="terminal-952518540-r1" x="1915.4" y="386" textLength="12.2" clip-path="url(#terminal-952518540-line-15)">
-</text><text class="terminal-952518540-r1" x="0" y="410.4" textLength="1915.4" clip-path="url(#terminal-952518540-line-16)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_file_loader.py&quot;,&#160;line&#160;21,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="410.4" textLength="12.2" clip-path="url(#terminal-952518540-line-16)">
-</text><text class="terminal-952518540-r1" x="0" y="434.8" textLength="536.8" clip-path="url(#terminal-952518540-line-17)">&#160;&#160;&#160;&#160;from&#160;tensorboard&#160;import&#160;dataclass_compat</text><text class="terminal-952518540-r1" x="1915.4" y="434.8" textLength="12.2" clip-path="url(#terminal-952518540-line-17)">
-</text><text class="terminal-952518540-r1" x="0" y="459.2" textLength="1598.2" clip-path="url(#terminal-952518540-line-18)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/dataclass_compat.py&quot;,&#160;line&#160;33,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="459.2" textLength="12.2" clip-path="url(#terminal-952518540-line-18)">
-</text><text class="terminal-952518540-r1" x="0" y="483.6" textLength="878.4" clip-path="url(#terminal-952518540-line-19)">&#160;&#160;&#160;&#160;from&#160;tensorboard.plugins.hparams&#160;import&#160;metadata&#160;as&#160;hparams_metadata</text><text class="terminal-952518540-r1" x="1915.4" y="483.6" textLength="12.2" clip-path="url(#terminal-952518540-line-19)">
-</text><text class="terminal-952518540-r1" x="0" y="508" textLength="1695.8" clip-path="url(#terminal-952518540-line-20)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/plugins/hparams/metadata.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-952518540-r1" x="1915.4" y="508" textLength="12.2" clip-path="url(#terminal-952518540-line-20)">
-</text><text class="terminal-952518540-r1" x="0" y="532.4" textLength="585.6" clip-path="url(#terminal-952518540-line-21)">&#160;&#160;&#160;&#160;NULL_TENSOR&#160;=&#160;tensor_util.make_tensor_proto(</text><text class="terminal-952518540-r1" x="1915.4" y="532.4" textLength="12.2" clip-path="url(#terminal-952518540-line-21)">
-</text><text class="terminal-952518540-r1" x="0" y="556.8" textLength="1720.2" clip-path="url(#terminal-952518540-line-22)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/util/tensor_util.py&quot;,&#160;line&#160;405,&#160;in&#160;make_tensor_proto</text><text class="terminal-952518540-r1" x="1915.4" y="556.8" textLength="12.2" clip-path="url(#terminal-952518540-line-22)">
-</text><text class="terminal-952518540-r1" x="0" y="581.2" textLength="585.6" clip-path="url(#terminal-952518540-line-23)">&#160;&#160;&#160;&#160;numpy_dtype&#160;=&#160;dtypes.as_dtype(nparray.dtype)</text><text class="terminal-952518540-r1" x="1915.4" y="581.2" textLength="12.2" clip-path="url(#terminal-952518540-line-23)">
-</text><text class="terminal-952518540-r1" x="0" y="605.6" textLength="1769" clip-path="url(#terminal-952518540-line-24)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py&quot;,&#160;line&#160;677,&#160;in&#160;as_dtype</text><text class="terminal-952518540-r1" x="1915.4" y="605.6" textLength="12.2" clip-path="url(#terminal-952518540-line-24)">
-</text><text class="terminal-952518540-r1" x="0" y="630" textLength="866.2" clip-path="url(#terminal-952518540-line-25)">&#160;&#160;&#160;&#160;if&#160;type_value.type&#160;==&#160;np.string_&#160;or&#160;type_value.type&#160;==&#160;np.unicode_:</text><text class="terminal-952518540-r1" x="1915.4" y="630" textLength="12.2" clip-path="url(#terminal-952518540-line-25)">
-</text><text class="terminal-952518540-r1" x="0" y="654.4" textLength="1476.2" clip-path="url(#terminal-952518540-line-26)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/numpy/__init__.py&quot;,&#160;line&#160;397,&#160;in&#160;__getattr__</text><text class="terminal-952518540-r1" x="1915.4" y="654.4" textLength="12.2" clip-path="url(#terminal-952518540-line-26)">
-</text><text class="terminal-952518540-r1" x="0" y="678.8" textLength="305" clip-path="url(#terminal-952518540-line-27)">&#160;&#160;&#160;&#160;raise&#160;AttributeError(</text><text class="terminal-952518540-r1" x="1915.4" y="678.8" textLength="12.2" clip-path="url(#terminal-952518540-line-27)">
-</text><text class="terminal-952518540-r1" x="0" y="703.2" textLength="1427.4" clip-path="url(#terminal-952518540-line-28)">AttributeError:&#160;`np.string_`&#160;was&#160;removed&#160;in&#160;the&#160;NumPy&#160;2.0&#160;release.&#160;Use&#160;`np.bytes_`&#160;instead..&#160;Did&#160;you&#160;mean:&#160;&#x27;strings&#x27;?</text><text class="terminal-952518540-r1" x="1915.4" y="703.2" textLength="12.2" clip-path="url(#terminal-952518540-line-28)">
+    <g class="terminal-3065326850-matrix">
+    <text class="terminal-3065326850-r1" x="0" y="20" textLength="207.4" clip-path="url(#terminal-3065326850-line-0)">$&#160;casanovo&#160;--help</text><text class="terminal-3065326850-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-3065326850-line-0)">
+</text><text class="terminal-3065326850-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-3065326850-line-1)">
+</text><text class="terminal-3065326850-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-3065326850-line-2)">Usage:</text><text class="terminal-3065326850-r3" x="97.6" y="68.8" textLength="97.6" clip-path="url(#terminal-3065326850-line-2)">casanovo</text><text class="terminal-3065326850-r1" x="195.2" y="68.8" textLength="24.4" clip-path="url(#terminal-3065326850-line-2)">&#160;[</text><text class="terminal-3065326850-r4" x="219.6" y="68.8" textLength="85.4" clip-path="url(#terminal-3065326850-line-2)">OPTIONS</text><text class="terminal-3065326850-r1" x="305" y="68.8" textLength="24.4" clip-path="url(#terminal-3065326850-line-2)">]&#160;</text><text class="terminal-3065326850-r4" x="329.4" y="68.8" textLength="85.4" clip-path="url(#terminal-3065326850-line-2)">COMMAND</text><text class="terminal-3065326850-r1" x="414.8" y="68.8" textLength="24.4" clip-path="url(#terminal-3065326850-line-2)">&#160;[</text><text class="terminal-3065326850-r4" x="439.2" y="68.8" textLength="48.8" clip-path="url(#terminal-3065326850-line-2)">ARGS</text><text class="terminal-3065326850-r1" x="488" y="68.8" textLength="488" clip-path="url(#terminal-3065326850-line-2)">]...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-3065326850-line-2)">
+</text><text class="terminal-3065326850-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-3065326850-line-3)">
+</text><text class="terminal-3065326850-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-3065326850-line-4)">&#160;┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓&#160;</text><text class="terminal-3065326850-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-3065326850-line-4)">
+</text><text class="terminal-3065326850-r1" x="0" y="142" textLength="439.2" clip-path="url(#terminal-3065326850-line-5)">&#160;┃&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r3" x="439.2" y="142" textLength="97.6" clip-path="url(#terminal-3065326850-line-5)">Casanovo</text><text class="terminal-3065326850-r1" x="536.8" y="142" textLength="439.2" clip-path="url(#terminal-3065326850-line-5)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;┃&#160;</text><text class="terminal-3065326850-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-3065326850-line-5)">
+</text><text class="terminal-3065326850-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-3065326850-line-6)">&#160;┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛&#160;</text><text class="terminal-3065326850-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-3065326850-line-6)">
+</text><text class="terminal-3065326850-r1" x="0" y="190.8" textLength="976" clip-path="url(#terminal-3065326850-line-7)">&#160;Casanovo&#160;de&#160;novo&#160;sequences&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra&#160;using&#160;a&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-3065326850-line-7)">
+</text><text class="terminal-3065326850-r1" x="0" y="215.2" textLength="976" clip-path="url(#terminal-3065326850-line-8)">&#160;Transformer&#160;model.&#160;Casanovo&#160;currently&#160;supports&#160;mzML,&#160;mzXML,&#160;and&#160;MGF&#160;files&#160;for&#160;&#160;</text><text class="terminal-3065326850-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-3065326850-line-8)">
+</text><text class="terminal-3065326850-r1" x="0" y="239.6" textLength="976" clip-path="url(#terminal-3065326850-line-9)">&#160;de&#160;novo&#160;sequencing&#160;and&#160;annotated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;from&#160;MassIVE-KB,&#160;for&#160;</text><text class="terminal-3065326850-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-3065326850-line-9)">
+</text><text class="terminal-3065326850-r1" x="0" y="264" textLength="976" clip-path="url(#terminal-3065326850-line-10)">&#160;training&#160;new&#160;models.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-3065326850-line-10)">
+</text><text class="terminal-3065326850-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-3065326850-line-11)">
+</text><text class="terminal-3065326850-r1" x="0" y="312.8" textLength="976" clip-path="url(#terminal-3065326850-line-12)">&#160;Links:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-3065326850-line-12)">
+</text><text class="terminal-3065326850-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-3065326850-line-13)">
+</text><text class="terminal-3065326850-r5" x="12.2" y="361.6" textLength="36.6" clip-path="url(#terminal-3065326850-line-14)">&#160;•&#160;</text><text class="terminal-3065326850-r1" x="48.8" y="361.6" textLength="183" clip-path="url(#terminal-3065326850-line-14)">Documentation:&#160;</text><text class="terminal-3065326850-r6" x="231.8" y="361.6" textLength="378.2" clip-path="url(#terminal-3065326850-line-14)">https://casanovo.readthedocs.io</text><text class="terminal-3065326850-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-3065326850-line-14)">
+</text><text class="terminal-3065326850-r5" x="12.2" y="386" textLength="36.6" clip-path="url(#terminal-3065326850-line-15)">&#160;•&#160;</text><text class="terminal-3065326850-r1" x="48.8" y="386" textLength="317.2" clip-path="url(#terminal-3065326850-line-15)">Official&#160;code&#160;repository:&#160;</text><text class="terminal-3065326850-r6" x="366" y="386" textLength="451.4" clip-path="url(#terminal-3065326850-line-15)">https://github.com/Noble-Lab/casanovo</text><text class="terminal-3065326850-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-3065326850-line-15)">
+</text><text class="terminal-3065326850-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-3065326850-line-16)">
+</text><text class="terminal-3065326850-r1" x="0" y="434.8" textLength="976" clip-path="url(#terminal-3065326850-line-17)">&#160;If&#160;you&#160;use&#160;Casanovo&#160;in&#160;your&#160;work,&#160;please&#160;cite:&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-3065326850-line-17)">
+</text><text class="terminal-3065326850-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-3065326850-line-18)">
+</text><text class="terminal-3065326850-r5" x="12.2" y="483.6" textLength="36.6" clip-path="url(#terminal-3065326850-line-19)">&#160;•&#160;</text><text class="terminal-3065326850-r1" x="48.8" y="483.6" textLength="927.2" clip-path="url(#terminal-3065326850-line-19)">Yilmaz,&#160;M.,&#160;Fondrie,&#160;W.&#160;E.,&#160;Bittremieux,&#160;W.,&#160;Oh,&#160;S.&#160;&amp;&#160;Noble,&#160;W.&#160;S.&#160;De&#160;novo&#160;&#160;</text><text class="terminal-3065326850-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-3065326850-line-19)">
+</text><text class="terminal-3065326850-r1" x="48.8" y="508" textLength="927.2" clip-path="url(#terminal-3065326850-line-20)">mass&#160;spectrometry&#160;peptide&#160;sequencing&#160;with&#160;a&#160;transformer&#160;model.&#160;Proceedings&#160;&#160;</text><text class="terminal-3065326850-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-3065326850-line-20)">
+</text><text class="terminal-3065326850-r1" x="48.8" y="532.4" textLength="927.2" clip-path="url(#terminal-3065326850-line-21)">of&#160;the&#160;39th&#160;International&#160;Conference&#160;on&#160;Machine&#160;Learning&#160;-&#160;ICML&#160;&#x27;22&#160;(2022)&#160;&#160;</text><text class="terminal-3065326850-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-3065326850-line-21)">
+</text><text class="terminal-3065326850-r1" x="48.8" y="556.8" textLength="927.2" clip-path="url(#terminal-3065326850-line-22)">doi:10.1101/2022.02.07.479481.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-3065326850-line-22)">
+</text><text class="terminal-3065326850-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-3065326850-line-23)">
+</text><text class="terminal-3065326850-r7" x="0" y="605.6" textLength="24.4" clip-path="url(#terminal-3065326850-line-24)">╭─</text><text class="terminal-3065326850-r7" x="24.4" y="605.6" textLength="109.8" clip-path="url(#terminal-3065326850-line-24)">&#160;Options&#160;</text><text class="terminal-3065326850-r7" x="134.2" y="605.6" textLength="817.4" clip-path="url(#terminal-3065326850-line-24)">───────────────────────────────────────────────────────────────────</text><text class="terminal-3065326850-r7" x="951.6" y="605.6" textLength="24.4" clip-path="url(#terminal-3065326850-line-24)">─╮</text><text class="terminal-3065326850-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-3065326850-line-24)">
+</text><text class="terminal-3065326850-r7" x="0" y="630" textLength="12.2" clip-path="url(#terminal-3065326850-line-25)">│</text><text class="terminal-3065326850-r4" x="24.4" y="630" textLength="73.2" clip-path="url(#terminal-3065326850-line-25)">--help</text><text class="terminal-3065326850-r8" x="122" y="630" textLength="24.4" clip-path="url(#terminal-3065326850-line-25)">-h</text><text class="terminal-3065326850-r1" x="146.4" y="630" textLength="817.4" clip-path="url(#terminal-3065326850-line-25)">&#160;&#160;&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r7" x="963.8" y="630" textLength="12.2" clip-path="url(#terminal-3065326850-line-25)">│</text><text class="terminal-3065326850-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-3065326850-line-25)">
+</text><text class="terminal-3065326850-r7" x="0" y="654.4" textLength="976" clip-path="url(#terminal-3065326850-line-26)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-3065326850-r1" x="976" y="654.4" textLength="12.2" clip-path="url(#terminal-3065326850-line-26)">
+</text><text class="terminal-3065326850-r7" x="0" y="678.8" textLength="24.4" clip-path="url(#terminal-3065326850-line-27)">╭─</text><text class="terminal-3065326850-r7" x="24.4" y="678.8" textLength="122" clip-path="url(#terminal-3065326850-line-27)">&#160;Commands&#160;</text><text class="terminal-3065326850-r7" x="146.4" y="678.8" textLength="805.2" clip-path="url(#terminal-3065326850-line-27)">──────────────────────────────────────────────────────────────────</text><text class="terminal-3065326850-r7" x="951.6" y="678.8" textLength="24.4" clip-path="url(#terminal-3065326850-line-27)">─╮</text><text class="terminal-3065326850-r1" x="976" y="678.8" textLength="12.2" clip-path="url(#terminal-3065326850-line-27)">
+</text><text class="terminal-3065326850-r7" x="0" y="703.2" textLength="12.2" clip-path="url(#terminal-3065326850-line-28)">│</text><text class="terminal-3065326850-r4" x="24.4" y="703.2" textLength="109.8" clip-path="url(#terminal-3065326850-line-28)">configure</text><text class="terminal-3065326850-r1" x="146.4" y="703.2" textLength="817.4" clip-path="url(#terminal-3065326850-line-28)">&#160;Generate&#160;a&#160;Casanovo&#160;configuration&#160;file&#160;to&#160;customize.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r7" x="963.8" y="703.2" textLength="12.2" clip-path="url(#terminal-3065326850-line-28)">│</text><text class="terminal-3065326850-r1" x="976" y="703.2" textLength="12.2" clip-path="url(#terminal-3065326850-line-28)">
+</text><text class="terminal-3065326850-r7" x="0" y="727.6" textLength="12.2" clip-path="url(#terminal-3065326850-line-29)">│</text><text class="terminal-3065326850-r4" x="24.4" y="727.6" textLength="109.8" clip-path="url(#terminal-3065326850-line-29)">db-search</text><text class="terminal-3065326850-r1" x="146.4" y="727.6" textLength="817.4" clip-path="url(#terminal-3065326850-line-29)">&#160;Perform&#160;a&#160;database&#160;search&#160;on&#160;MS/MS&#160;data&#160;using&#160;Casanovo-DB.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r7" x="963.8" y="727.6" textLength="12.2" clip-path="url(#terminal-3065326850-line-29)">│</text><text class="terminal-3065326850-r1" x="976" y="727.6" textLength="12.2" clip-path="url(#terminal-3065326850-line-29)">
+</text><text class="terminal-3065326850-r7" x="0" y="752" textLength="12.2" clip-path="url(#terminal-3065326850-line-30)">│</text><text class="terminal-3065326850-r4" x="24.4" y="752" textLength="109.8" clip-path="url(#terminal-3065326850-line-30)">evaluate&#160;</text><text class="terminal-3065326850-r1" x="146.4" y="752" textLength="817.4" clip-path="url(#terminal-3065326850-line-30)">&#160;Evaluate&#160;de&#160;novo&#160;peptide&#160;sequencing&#160;performance.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r7" x="963.8" y="752" textLength="12.2" clip-path="url(#terminal-3065326850-line-30)">│</text><text class="terminal-3065326850-r1" x="976" y="752" textLength="12.2" clip-path="url(#terminal-3065326850-line-30)">
+</text><text class="terminal-3065326850-r7" x="0" y="776.4" textLength="12.2" clip-path="url(#terminal-3065326850-line-31)">│</text><text class="terminal-3065326850-r4" x="24.4" y="776.4" textLength="109.8" clip-path="url(#terminal-3065326850-line-31)">sequence&#160;</text><text class="terminal-3065326850-r1" x="146.4" y="776.4" textLength="817.4" clip-path="url(#terminal-3065326850-line-31)">&#160;De&#160;novo&#160;sequence&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r7" x="963.8" y="776.4" textLength="12.2" clip-path="url(#terminal-3065326850-line-31)">│</text><text class="terminal-3065326850-r1" x="976" y="776.4" textLength="12.2" clip-path="url(#terminal-3065326850-line-31)">
+</text><text class="terminal-3065326850-r7" x="0" y="800.8" textLength="12.2" clip-path="url(#terminal-3065326850-line-32)">│</text><text class="terminal-3065326850-r4" x="24.4" y="800.8" textLength="109.8" clip-path="url(#terminal-3065326850-line-32)">train&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r1" x="146.4" y="800.8" textLength="817.4" clip-path="url(#terminal-3065326850-line-32)">&#160;Train&#160;a&#160;Casanovo&#160;model&#160;on&#160;your&#160;own&#160;data.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r7" x="963.8" y="800.8" textLength="12.2" clip-path="url(#terminal-3065326850-line-32)">│</text><text class="terminal-3065326850-r1" x="976" y="800.8" textLength="12.2" clip-path="url(#terminal-3065326850-line-32)">
+</text><text class="terminal-3065326850-r7" x="0" y="825.2" textLength="12.2" clip-path="url(#terminal-3065326850-line-33)">│</text><text class="terminal-3065326850-r4" x="24.4" y="825.2" textLength="109.8" clip-path="url(#terminal-3065326850-line-33)">version&#160;&#160;</text><text class="terminal-3065326850-r1" x="146.4" y="825.2" textLength="817.4" clip-path="url(#terminal-3065326850-line-33)">&#160;Get&#160;the&#160;Casanovo&#160;version&#160;information&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-3065326850-r7" x="963.8" y="825.2" textLength="12.2" clip-path="url(#terminal-3065326850-line-33)">│</text><text class="terminal-3065326850-r1" x="976" y="825.2" textLength="12.2" clip-path="url(#terminal-3065326850-line-33)">
+</text><text class="terminal-3065326850-r7" x="0" y="849.6" textLength="976" clip-path="url(#terminal-3065326850-line-34)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-3065326850-r1" x="976" y="849.6" textLength="12.2" clip-path="url(#terminal-3065326850-line-34)">
+</text><text class="terminal-3065326850-r1" x="976" y="874" textLength="12.2" clip-path="url(#terminal-3065326850-line-35)">
 </text>
     </g>
     </g>
diff --git a/docs/images/sequence-help.svg b/docs/images/sequence-help.svg
index b9b96d74..7a1bbff6 100644
--- a/docs/images/sequence-help.svg
+++ b/docs/images/sequence-help.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 1934 757.5999999999999" xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 994 684.4" xmlns="http://www.w3.org/2000/svg">
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -19,151 +19,148 @@
         font-weight: 700;
     }
 
-    .terminal-2412464901-matrix {
+    .terminal-2416557665-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-2412464901-title {
+    .terminal-2416557665-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-2412464901-r1 { fill: #c5c8c6 }
+    .terminal-2416557665-r1 { fill: #c5c8c6 }
+.terminal-2416557665-r2 { fill: #d0b344 }
+.terminal-2416557665-r3 { fill: #c5c8c6;font-weight: bold }
+.terminal-2416557665-r4 { fill: #68a0b3;font-weight: bold }
+.terminal-2416557665-r5 { fill: #868887 }
+.terminal-2416557665-r6 { fill: #cc555a }
+.terminal-2416557665-r7 { fill: #d0b344;font-weight: bold }
+.terminal-2416557665-r8 { fill: #8a4346 }
+.terminal-2416557665-r9 { fill: #98a84b;font-weight: bold }
+.terminal-2416557665-r10 { fill: #8d7b39;font-weight: bold }
     </style>
 
     <defs>
-    <clipPath id="terminal-2412464901-clip-terminal">
-      <rect x="0" y="0" width="1914.3999999999999" height="706.5999999999999" />
+    <clipPath id="terminal-2416557665-clip-terminal">
+      <rect x="0" y="0" width="975.0" height="633.4" />
     </clipPath>
-    <clipPath id="terminal-2412464901-line-0">
-    <rect x="0" y="1.5" width="1915.4" height="24.65"/>
+    <clipPath id="terminal-2416557665-line-0">
+    <rect x="0" y="1.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-1">
-    <rect x="0" y="25.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-1">
+    <rect x="0" y="25.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-2">
-    <rect x="0" y="50.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-2">
+    <rect x="0" y="50.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-3">
-    <rect x="0" y="74.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-3">
+    <rect x="0" y="74.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-4">
-    <rect x="0" y="99.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-4">
+    <rect x="0" y="99.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-5">
-    <rect x="0" y="123.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-5">
+    <rect x="0" y="123.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-6">
-    <rect x="0" y="147.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-6">
+    <rect x="0" y="147.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-7">
-    <rect x="0" y="172.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-7">
+    <rect x="0" y="172.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-8">
-    <rect x="0" y="196.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-8">
+    <rect x="0" y="196.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-9">
-    <rect x="0" y="221.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-9">
+    <rect x="0" y="221.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-10">
-    <rect x="0" y="245.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-10">
+    <rect x="0" y="245.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-11">
-    <rect x="0" y="269.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-11">
+    <rect x="0" y="269.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-12">
-    <rect x="0" y="294.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-12">
+    <rect x="0" y="294.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-13">
-    <rect x="0" y="318.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-13">
+    <rect x="0" y="318.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-14">
-    <rect x="0" y="343.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-14">
+    <rect x="0" y="343.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-15">
-    <rect x="0" y="367.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-15">
+    <rect x="0" y="367.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-16">
-    <rect x="0" y="391.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-16">
+    <rect x="0" y="391.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-17">
-    <rect x="0" y="416.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-17">
+    <rect x="0" y="416.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-18">
-    <rect x="0" y="440.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-18">
+    <rect x="0" y="440.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-19">
-    <rect x="0" y="465.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-19">
+    <rect x="0" y="465.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-20">
-    <rect x="0" y="489.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-20">
+    <rect x="0" y="489.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-21">
-    <rect x="0" y="513.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-21">
+    <rect x="0" y="513.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-22">
-    <rect x="0" y="538.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-22">
+    <rect x="0" y="538.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-23">
-    <rect x="0" y="562.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-23">
+    <rect x="0" y="562.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-2412464901-line-24">
-    <rect x="0" y="587.1" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2412464901-line-25">
-    <rect x="0" y="611.5" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2412464901-line-26">
-    <rect x="0" y="635.9" width="1915.4" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-2412464901-line-27">
-    <rect x="0" y="660.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2416557665-line-24">
+    <rect x="0" y="587.1" width="976" height="24.65"/>
             </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="1932" height="755.6" rx="8"/>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="992" height="682.4" rx="8"/>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-2412464901-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-2416557665-clip-terminal)">
     
-    <g class="terminal-2412464901-matrix">
-    <text class="terminal-2412464901-r1" x="0" y="20" textLength="317.2" clip-path="url(#terminal-2412464901-line-0)">$&#160;casanovo&#160;sequence&#160;--help</text><text class="terminal-2412464901-r1" x="1915.4" y="20" textLength="12.2" clip-path="url(#terminal-2412464901-line-0)">
-</text><text class="terminal-2412464901-r1" x="0" y="44.4" textLength="414.8" clip-path="url(#terminal-2412464901-line-1)">Traceback&#160;(most&#160;recent&#160;call&#160;last):</text><text class="terminal-2412464901-r1" x="1915.4" y="44.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-1)">
-</text><text class="terminal-2412464901-r1" x="0" y="68.8" textLength="1000.4" clip-path="url(#terminal-2412464901-line-2)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/bin/casanovo&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="68.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-2)">
-</text><text class="terminal-2412464901-r1" x="0" y="93.2" textLength="463.6" clip-path="url(#terminal-2412464901-line-3)">&#160;&#160;&#160;&#160;from&#160;casanovo.casanovo&#160;import&#160;main</text><text class="terminal-2412464901-r1" x="1915.4" y="93.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-3)">
-</text><text class="terminal-2412464901-r1" x="0" y="117.6" textLength="1464" clip-path="url(#terminal-2412464901-line-4)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/casanovo/casanovo.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="117.6" textLength="12.2" clip-path="url(#terminal-2412464901-line-4)">
-</text><text class="terminal-2412464901-r1" x="0" y="142" textLength="268.4" clip-path="url(#terminal-2412464901-line-5)">&#160;&#160;&#160;&#160;import&#160;depthcharge</text><text class="terminal-2412464901-r1" x="1915.4" y="142" textLength="12.2" clip-path="url(#terminal-2412464901-line-5)">
-</text><text class="terminal-2412464901-r1" x="0" y="166.4" textLength="1488.4" clip-path="url(#terminal-2412464901-line-6)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/__init__.py&quot;,&#160;line&#160;3,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="166.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-6)">
-</text><text class="terminal-2412464901-r1" x="0" y="190.8" textLength="341.6" clip-path="url(#terminal-2412464901-line-7)">&#160;&#160;&#160;&#160;from&#160;.&#160;import&#160;components</text><text class="terminal-2412464901-r1" x="1915.4" y="190.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-7)">
-</text><text class="terminal-2412464901-r1" x="0" y="215.2" textLength="1622.6" clip-path="url(#terminal-2412464901-line-8)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/__init__.py&quot;,&#160;line&#160;2,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="215.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-8)">
-</text><text class="terminal-2412464901-r1" x="0" y="239.6" textLength="744.2" clip-path="url(#terminal-2412464901-line-9)">&#160;&#160;&#160;&#160;from&#160;.transformers&#160;import&#160;SpectrumEncoder,&#160;PeptideDecoder</text><text class="terminal-2412464901-r1" x="1915.4" y="239.6" textLength="12.2" clip-path="url(#terminal-2412464901-line-9)">
-</text><text class="terminal-2412464901-r1" x="0" y="264" textLength="1671.4" clip-path="url(#terminal-2412464901-line-10)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/transformers.py&quot;,&#160;line&#160;8,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="264" textLength="12.2" clip-path="url(#terminal-2412464901-line-10)">
-</text><text class="terminal-2412464901-r1" x="0" y="288.4" textLength="292.8" clip-path="url(#terminal-2412464901-line-11)">&#160;&#160;&#160;&#160;from&#160;..&#160;import&#160;utils</text><text class="terminal-2412464901-r1" x="1915.4" y="288.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-11)">
-</text><text class="terminal-2412464901-r1" x="0" y="312.8" textLength="1451.8" clip-path="url(#terminal-2412464901-line-12)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/utils.py&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="312.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-12)">
-</text><text class="terminal-2412464901-r1" x="0" y="337.2" textLength="878.4" clip-path="url(#terminal-2412464901-line-13)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing.event_accumulator&#160;import&#160;(</text><text class="terminal-2412464901-r1" x="1915.4" y="337.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-13)">
-</text><text class="terminal-2412464901-r1" x="0" y="361.6" textLength="1915.4" clip-path="url(#terminal-2412464901-line-14)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_accumulator.py&quot;,&#160;line&#160;24,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="361.6" textLength="12.2" clip-path="url(#terminal-2412464901-line-14)">
-</text><text class="terminal-2412464901-r1" x="0" y="386" textLength="854" clip-path="url(#terminal-2412464901-line-15)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing&#160;import&#160;event_file_loader</text><text class="terminal-2412464901-r1" x="1915.4" y="386" textLength="12.2" clip-path="url(#terminal-2412464901-line-15)">
-</text><text class="terminal-2412464901-r1" x="0" y="410.4" textLength="1915.4" clip-path="url(#terminal-2412464901-line-16)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_file_loader.py&quot;,&#160;line&#160;21,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="410.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-16)">
-</text><text class="terminal-2412464901-r1" x="0" y="434.8" textLength="536.8" clip-path="url(#terminal-2412464901-line-17)">&#160;&#160;&#160;&#160;from&#160;tensorboard&#160;import&#160;dataclass_compat</text><text class="terminal-2412464901-r1" x="1915.4" y="434.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-17)">
-</text><text class="terminal-2412464901-r1" x="0" y="459.2" textLength="1598.2" clip-path="url(#terminal-2412464901-line-18)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/dataclass_compat.py&quot;,&#160;line&#160;33,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="459.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-18)">
-</text><text class="terminal-2412464901-r1" x="0" y="483.6" textLength="878.4" clip-path="url(#terminal-2412464901-line-19)">&#160;&#160;&#160;&#160;from&#160;tensorboard.plugins.hparams&#160;import&#160;metadata&#160;as&#160;hparams_metadata</text><text class="terminal-2412464901-r1" x="1915.4" y="483.6" textLength="12.2" clip-path="url(#terminal-2412464901-line-19)">
-</text><text class="terminal-2412464901-r1" x="0" y="508" textLength="1695.8" clip-path="url(#terminal-2412464901-line-20)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/plugins/hparams/metadata.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-2412464901-r1" x="1915.4" y="508" textLength="12.2" clip-path="url(#terminal-2412464901-line-20)">
-</text><text class="terminal-2412464901-r1" x="0" y="532.4" textLength="585.6" clip-path="url(#terminal-2412464901-line-21)">&#160;&#160;&#160;&#160;NULL_TENSOR&#160;=&#160;tensor_util.make_tensor_proto(</text><text class="terminal-2412464901-r1" x="1915.4" y="532.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-21)">
-</text><text class="terminal-2412464901-r1" x="0" y="556.8" textLength="1720.2" clip-path="url(#terminal-2412464901-line-22)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/util/tensor_util.py&quot;,&#160;line&#160;405,&#160;in&#160;make_tensor_proto</text><text class="terminal-2412464901-r1" x="1915.4" y="556.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-22)">
-</text><text class="terminal-2412464901-r1" x="0" y="581.2" textLength="585.6" clip-path="url(#terminal-2412464901-line-23)">&#160;&#160;&#160;&#160;numpy_dtype&#160;=&#160;dtypes.as_dtype(nparray.dtype)</text><text class="terminal-2412464901-r1" x="1915.4" y="581.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-23)">
-</text><text class="terminal-2412464901-r1" x="0" y="605.6" textLength="1769" clip-path="url(#terminal-2412464901-line-24)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py&quot;,&#160;line&#160;677,&#160;in&#160;as_dtype</text><text class="terminal-2412464901-r1" x="1915.4" y="605.6" textLength="12.2" clip-path="url(#terminal-2412464901-line-24)">
-</text><text class="terminal-2412464901-r1" x="0" y="630" textLength="866.2" clip-path="url(#terminal-2412464901-line-25)">&#160;&#160;&#160;&#160;if&#160;type_value.type&#160;==&#160;np.string_&#160;or&#160;type_value.type&#160;==&#160;np.unicode_:</text><text class="terminal-2412464901-r1" x="1915.4" y="630" textLength="12.2" clip-path="url(#terminal-2412464901-line-25)">
-</text><text class="terminal-2412464901-r1" x="0" y="654.4" textLength="1476.2" clip-path="url(#terminal-2412464901-line-26)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/numpy/__init__.py&quot;,&#160;line&#160;397,&#160;in&#160;__getattr__</text><text class="terminal-2412464901-r1" x="1915.4" y="654.4" textLength="12.2" clip-path="url(#terminal-2412464901-line-26)">
-</text><text class="terminal-2412464901-r1" x="0" y="678.8" textLength="305" clip-path="url(#terminal-2412464901-line-27)">&#160;&#160;&#160;&#160;raise&#160;AttributeError(</text><text class="terminal-2412464901-r1" x="1915.4" y="678.8" textLength="12.2" clip-path="url(#terminal-2412464901-line-27)">
-</text><text class="terminal-2412464901-r1" x="0" y="703.2" textLength="1427.4" clip-path="url(#terminal-2412464901-line-28)">AttributeError:&#160;`np.string_`&#160;was&#160;removed&#160;in&#160;the&#160;NumPy&#160;2.0&#160;release.&#160;Use&#160;`np.bytes_`&#160;instead..&#160;Did&#160;you&#160;mean:&#160;&#x27;strings&#x27;?</text><text class="terminal-2412464901-r1" x="1915.4" y="703.2" textLength="12.2" clip-path="url(#terminal-2412464901-line-28)">
+    <g class="terminal-2416557665-matrix">
+    <text class="terminal-2416557665-r1" x="0" y="20" textLength="317.2" clip-path="url(#terminal-2416557665-line-0)">$&#160;casanovo&#160;sequence&#160;--help</text><text class="terminal-2416557665-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-2416557665-line-0)">
+</text><text class="terminal-2416557665-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-2416557665-line-1)">
+</text><text class="terminal-2416557665-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-2416557665-line-2)">Usage:</text><text class="terminal-2416557665-r3" x="97.6" y="68.8" textLength="207.4" clip-path="url(#terminal-2416557665-line-2)">casanovo&#160;sequence</text><text class="terminal-2416557665-r1" x="305" y="68.8" textLength="24.4" clip-path="url(#terminal-2416557665-line-2)">&#160;[</text><text class="terminal-2416557665-r4" x="329.4" y="68.8" textLength="85.4" clip-path="url(#terminal-2416557665-line-2)">OPTIONS</text><text class="terminal-2416557665-r1" x="414.8" y="68.8" textLength="24.4" clip-path="url(#terminal-2416557665-line-2)">]&#160;</text><text class="terminal-2416557665-r4" x="439.2" y="68.8" textLength="109.8" clip-path="url(#terminal-2416557665-line-2)">PEAK_PATH</text><text class="terminal-2416557665-r1" x="549" y="68.8" textLength="427" clip-path="url(#terminal-2416557665-line-2)">...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-2416557665-line-2)">
+</text><text class="terminal-2416557665-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-2416557665-line-3)">
+</text><text class="terminal-2416557665-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-2416557665-line-4)">&#160;De&#160;novo&#160;sequence&#160;peptides&#160;from&#160;tandem&#160;mass&#160;spectra.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-4)">
+</text><text class="terminal-2416557665-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-2416557665-line-5)">&#160;PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;mzML,&#160;mzXML,&#160;or&#160;MGF&#160;files&#160;from&#160;which&#160;to&#160;sequence&#160;</text><text class="terminal-2416557665-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-2416557665-line-5)">
+</text><text class="terminal-2416557665-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-2416557665-line-6)">&#160;peptides.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-2416557665-line-6)">
+</text><text class="terminal-2416557665-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-2416557665-line-7)">
+</text><text class="terminal-2416557665-r5" x="0" y="215.2" textLength="24.4" clip-path="url(#terminal-2416557665-line-8)">╭─</text><text class="terminal-2416557665-r5" x="24.4" y="215.2" textLength="134.2" clip-path="url(#terminal-2416557665-line-8)">&#160;Arguments&#160;</text><text class="terminal-2416557665-r5" x="158.6" y="215.2" textLength="793" clip-path="url(#terminal-2416557665-line-8)">─────────────────────────────────────────────────────────────────</text><text class="terminal-2416557665-r5" x="951.6" y="215.2" textLength="24.4" clip-path="url(#terminal-2416557665-line-8)">─╮</text><text class="terminal-2416557665-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-2416557665-line-8)">
+</text><text class="terminal-2416557665-r5" x="0" y="239.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-9)">│</text><text class="terminal-2416557665-r6" x="24.4" y="239.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-9)">*</text><text class="terminal-2416557665-r1" x="36.6" y="239.6" textLength="183" clip-path="url(#terminal-2416557665-line-9)">&#160;&#160;PEAK_PATH&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r7" x="219.6" y="239.6" textLength="48.8" clip-path="url(#terminal-2416557665-line-9)">FILE</text><text class="terminal-2416557665-r8" x="292.8" y="239.6" textLength="122" clip-path="url(#terminal-2416557665-line-9)">[required]</text><text class="terminal-2416557665-r5" x="963.8" y="239.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-9)">│</text><text class="terminal-2416557665-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-9)">
+</text><text class="terminal-2416557665-r5" x="0" y="264" textLength="976" clip-path="url(#terminal-2416557665-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2416557665-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-2416557665-line-10)">
+</text><text class="terminal-2416557665-r5" x="0" y="288.4" textLength="24.4" clip-path="url(#terminal-2416557665-line-11)">╭─</text><text class="terminal-2416557665-r5" x="24.4" y="288.4" textLength="109.8" clip-path="url(#terminal-2416557665-line-11)">&#160;Options&#160;</text><text class="terminal-2416557665-r5" x="134.2" y="288.4" textLength="817.4" clip-path="url(#terminal-2416557665-line-11)">───────────────────────────────────────────────────────────────────</text><text class="terminal-2416557665-r5" x="951.6" y="288.4" textLength="24.4" clip-path="url(#terminal-2416557665-line-11)">─╮</text><text class="terminal-2416557665-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-2416557665-line-11)">
+</text><text class="terminal-2416557665-r5" x="0" y="312.8" textLength="12.2" clip-path="url(#terminal-2416557665-line-12)">│</text><text class="terminal-2416557665-r4" x="24.4" y="312.8" textLength="85.4" clip-path="url(#terminal-2416557665-line-12)">--model</text><text class="terminal-2416557665-r9" x="183" y="312.8" textLength="24.4" clip-path="url(#terminal-2416557665-line-12)">-m</text><text class="terminal-2416557665-r7" x="231.8" y="312.8" textLength="317.2" clip-path="url(#terminal-2416557665-line-12)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r1" x="549" y="312.8" textLength="414.8" clip-path="url(#terminal-2416557665-line-12)">&#160;&#160;The&#160;model&#160;weights&#160;(.ckpt&#160;file).&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="312.8" textLength="12.2" clip-path="url(#terminal-2416557665-line-12)">│</text><text class="terminal-2416557665-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-2416557665-line-12)">
+</text><text class="terminal-2416557665-r5" x="0" y="337.2" textLength="12.2" clip-path="url(#terminal-2416557665-line-13)">│</text><text class="terminal-2416557665-r1" x="12.2" y="337.2" textLength="951.6" clip-path="url(#terminal-2416557665-line-13)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;If&#160;not&#160;provided,&#160;Casanovo&#160;will&#160;&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="337.2" textLength="12.2" clip-path="url(#terminal-2416557665-line-13)">│</text><text class="terminal-2416557665-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-2416557665-line-13)">
+</text><text class="terminal-2416557665-r5" x="0" y="361.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-14)">│</text><text class="terminal-2416557665-r1" x="12.2" y="361.6" textLength="951.6" clip-path="url(#terminal-2416557665-line-14)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;try&#160;to&#160;download&#160;the&#160;latest&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="361.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-14)">│</text><text class="terminal-2416557665-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-14)">
+</text><text class="terminal-2416557665-r5" x="0" y="386" textLength="12.2" clip-path="url(#terminal-2416557665-line-15)">│</text><text class="terminal-2416557665-r1" x="12.2" y="386" textLength="951.6" clip-path="url(#terminal-2416557665-line-15)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;release.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="386" textLength="12.2" clip-path="url(#terminal-2416557665-line-15)">│</text><text class="terminal-2416557665-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-2416557665-line-15)">
+</text><text class="terminal-2416557665-r5" x="0" y="410.4" textLength="12.2" clip-path="url(#terminal-2416557665-line-16)">│</text><text class="terminal-2416557665-r4" x="24.4" y="410.4" textLength="97.6" clip-path="url(#terminal-2416557665-line-16)">--output</text><text class="terminal-2416557665-r9" x="183" y="410.4" textLength="24.4" clip-path="url(#terminal-2416557665-line-16)">-o</text><text class="terminal-2416557665-r7" x="231.8" y="410.4" textLength="317.2" clip-path="url(#terminal-2416557665-line-16)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r1" x="549" y="410.4" textLength="414.8" clip-path="url(#terminal-2416557665-line-16)">&#160;&#160;The&#160;mzTab&#160;file&#160;to&#160;which&#160;results&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="410.4" textLength="12.2" clip-path="url(#terminal-2416557665-line-16)">│</text><text class="terminal-2416557665-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-2416557665-line-16)">
+</text><text class="terminal-2416557665-r5" x="0" y="434.8" textLength="12.2" clip-path="url(#terminal-2416557665-line-17)">│</text><text class="terminal-2416557665-r1" x="12.2" y="434.8" textLength="951.6" clip-path="url(#terminal-2416557665-line-17)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;will&#160;be&#160;written.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="434.8" textLength="12.2" clip-path="url(#terminal-2416557665-line-17)">│</text><text class="terminal-2416557665-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-2416557665-line-17)">
+</text><text class="terminal-2416557665-r5" x="0" y="459.2" textLength="12.2" clip-path="url(#terminal-2416557665-line-18)">│</text><text class="terminal-2416557665-r4" x="24.4" y="459.2" textLength="97.6" clip-path="url(#terminal-2416557665-line-18)">--config</text><text class="terminal-2416557665-r9" x="183" y="459.2" textLength="24.4" clip-path="url(#terminal-2416557665-line-18)">-c</text><text class="terminal-2416557665-r7" x="231.8" y="459.2" textLength="317.2" clip-path="url(#terminal-2416557665-line-18)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r1" x="549" y="459.2" textLength="414.8" clip-path="url(#terminal-2416557665-line-18)">&#160;&#160;The&#160;YAML&#160;configuration&#160;file&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="459.2" textLength="12.2" clip-path="url(#terminal-2416557665-line-18)">│</text><text class="terminal-2416557665-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-2416557665-line-18)">
+</text><text class="terminal-2416557665-r5" x="0" y="483.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-19)">│</text><text class="terminal-2416557665-r1" x="12.2" y="483.6" textLength="951.6" clip-path="url(#terminal-2416557665-line-19)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;overriding&#160;the&#160;default&#160;options.&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="483.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-19)">│</text><text class="terminal-2416557665-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-19)">
+</text><text class="terminal-2416557665-r5" x="0" y="508" textLength="12.2" clip-path="url(#terminal-2416557665-line-20)">│</text><text class="terminal-2416557665-r4" x="24.4" y="508" textLength="134.2" clip-path="url(#terminal-2416557665-line-20)">--verbosity</text><text class="terminal-2416557665-r9" x="183" y="508" textLength="24.4" clip-path="url(#terminal-2416557665-line-20)">-v</text><text class="terminal-2416557665-r10" x="231.8" y="508" textLength="12.2" clip-path="url(#terminal-2416557665-line-20)">[</text><text class="terminal-2416557665-r7" x="244" y="508" textLength="61" clip-path="url(#terminal-2416557665-line-20)">debug</text><text class="terminal-2416557665-r10" x="305" y="508" textLength="12.2" clip-path="url(#terminal-2416557665-line-20)">|</text><text class="terminal-2416557665-r7" x="317.2" y="508" textLength="48.8" clip-path="url(#terminal-2416557665-line-20)">info</text><text class="terminal-2416557665-r10" x="366" y="508" textLength="12.2" clip-path="url(#terminal-2416557665-line-20)">|</text><text class="terminal-2416557665-r7" x="378.2" y="508" textLength="85.4" clip-path="url(#terminal-2416557665-line-20)">warning</text><text class="terminal-2416557665-r10" x="463.6" y="508" textLength="12.2" clip-path="url(#terminal-2416557665-line-20)">|</text><text class="terminal-2416557665-r7" x="475.8" y="508" textLength="61" clip-path="url(#terminal-2416557665-line-20)">error</text><text class="terminal-2416557665-r10" x="536.8" y="508" textLength="12.2" clip-path="url(#terminal-2416557665-line-20)">]</text><text class="terminal-2416557665-r1" x="549" y="508" textLength="414.8" clip-path="url(#terminal-2416557665-line-20)">&#160;&#160;Set&#160;the&#160;verbosity&#160;of&#160;console&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="508" textLength="12.2" clip-path="url(#terminal-2416557665-line-20)">│</text><text class="terminal-2416557665-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-2416557665-line-20)">
+</text><text class="terminal-2416557665-r5" x="0" y="532.4" textLength="12.2" clip-path="url(#terminal-2416557665-line-21)">│</text><text class="terminal-2416557665-r1" x="12.2" y="532.4" textLength="951.6" clip-path="url(#terminal-2416557665-line-21)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;logging&#160;messages.&#160;Log&#160;files&#160;are&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="532.4" textLength="12.2" clip-path="url(#terminal-2416557665-line-21)">│</text><text class="terminal-2416557665-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-2416557665-line-21)">
+</text><text class="terminal-2416557665-r5" x="0" y="556.8" textLength="12.2" clip-path="url(#terminal-2416557665-line-22)">│</text><text class="terminal-2416557665-r1" x="12.2" y="556.8" textLength="951.6" clip-path="url(#terminal-2416557665-line-22)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;always&#160;set&#160;to&#160;&#x27;debug&#x27;.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="556.8" textLength="12.2" clip-path="url(#terminal-2416557665-line-22)">│</text><text class="terminal-2416557665-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-2416557665-line-22)">
+</text><text class="terminal-2416557665-r5" x="0" y="581.2" textLength="12.2" clip-path="url(#terminal-2416557665-line-23)">│</text><text class="terminal-2416557665-r4" x="24.4" y="581.2" textLength="73.2" clip-path="url(#terminal-2416557665-line-23)">--help</text><text class="terminal-2416557665-r9" x="183" y="581.2" textLength="24.4" clip-path="url(#terminal-2416557665-line-23)">-h</text><text class="terminal-2416557665-r1" x="549" y="581.2" textLength="414.8" clip-path="url(#terminal-2416557665-line-23)">&#160;&#160;Show&#160;this&#160;message&#160;and&#160;exit.&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2416557665-r5" x="963.8" y="581.2" textLength="12.2" clip-path="url(#terminal-2416557665-line-23)">│</text><text class="terminal-2416557665-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-2416557665-line-23)">
+</text><text class="terminal-2416557665-r5" x="0" y="605.6" textLength="976" clip-path="url(#terminal-2416557665-line-24)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2416557665-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-2416557665-line-24)">
+</text><text class="terminal-2416557665-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-2416557665-line-25)">
 </text>
     </g>
     </g>
diff --git a/docs/images/train-help.svg b/docs/images/train-help.svg
index a71b8915..58251215 100644
--- a/docs/images/train-help.svg
+++ b/docs/images/train-help.svg
@@ -1,4 +1,4 @@
-<svg class="rich-terminal" viewBox="0 0 1934 757.5999999999999" xmlns="http://www.w3.org/2000/svg">
+<svg class="rich-terminal" viewBox="0 0 994 1001.5999999999999" xmlns="http://www.w3.org/2000/svg">
     <!-- Generated with Rich https://www.textualize.io -->
     <style>
 
@@ -19,151 +19,200 @@
         font-weight: 700;
     }
 
-    .terminal-844581322-matrix {
+    .terminal-2982408974-matrix {
         font-family: Fira Code, monospace;
         font-size: 20px;
         line-height: 24.4px;
         font-variant-east-asian: full-width;
     }
 
-    .terminal-844581322-title {
+    .terminal-2982408974-title {
         font-size: 18px;
         font-weight: bold;
         font-family: arial;
     }
 
-    .terminal-844581322-r1 { fill: #c5c8c6 }
+    .terminal-2982408974-r1 { fill: #c5c8c6 }
+.terminal-2982408974-r2 { fill: #d0b344 }
+.terminal-2982408974-r3 { fill: #c5c8c6;font-weight: bold }
+.terminal-2982408974-r4 { fill: #68a0b3;font-weight: bold }
+.terminal-2982408974-r5 { fill: #868887 }
+.terminal-2982408974-r6 { fill: #cc555a }
+.terminal-2982408974-r7 { fill: #d0b344;font-weight: bold }
+.terminal-2982408974-r8 { fill: #8a4346 }
+.terminal-2982408974-r9 { fill: #98a84b;font-weight: bold }
+.terminal-2982408974-r10 { fill: #8d7b39;font-weight: bold }
     </style>
 
     <defs>
-    <clipPath id="terminal-844581322-clip-terminal">
-      <rect x="0" y="0" width="1914.3999999999999" height="706.5999999999999" />
+    <clipPath id="terminal-2982408974-clip-terminal">
+      <rect x="0" y="0" width="975.0" height="950.5999999999999" />
     </clipPath>
-    <clipPath id="terminal-844581322-line-0">
-    <rect x="0" y="1.5" width="1915.4" height="24.65"/>
+    <clipPath id="terminal-2982408974-line-0">
+    <rect x="0" y="1.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-1">
-    <rect x="0" y="25.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-1">
+    <rect x="0" y="25.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-2">
-    <rect x="0" y="50.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-2">
+    <rect x="0" y="50.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-3">
-    <rect x="0" y="74.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-3">
+    <rect x="0" y="74.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-4">
-    <rect x="0" y="99.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-4">
+    <rect x="0" y="99.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-5">
-    <rect x="0" y="123.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-5">
+    <rect x="0" y="123.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-6">
-    <rect x="0" y="147.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-6">
+    <rect x="0" y="147.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-7">
-    <rect x="0" y="172.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-7">
+    <rect x="0" y="172.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-8">
-    <rect x="0" y="196.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-8">
+    <rect x="0" y="196.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-9">
-    <rect x="0" y="221.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-9">
+    <rect x="0" y="221.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-10">
-    <rect x="0" y="245.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-10">
+    <rect x="0" y="245.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-11">
-    <rect x="0" y="269.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-11">
+    <rect x="0" y="269.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-12">
-    <rect x="0" y="294.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-12">
+    <rect x="0" y="294.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-13">
-    <rect x="0" y="318.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-13">
+    <rect x="0" y="318.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-14">
-    <rect x="0" y="343.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-14">
+    <rect x="0" y="343.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-15">
-    <rect x="0" y="367.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-15">
+    <rect x="0" y="367.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-16">
-    <rect x="0" y="391.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-16">
+    <rect x="0" y="391.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-17">
-    <rect x="0" y="416.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-17">
+    <rect x="0" y="416.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-18">
-    <rect x="0" y="440.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-18">
+    <rect x="0" y="440.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-19">
-    <rect x="0" y="465.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-19">
+    <rect x="0" y="465.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-20">
-    <rect x="0" y="489.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-20">
+    <rect x="0" y="489.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-21">
-    <rect x="0" y="513.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-21">
+    <rect x="0" y="513.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-22">
-    <rect x="0" y="538.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-22">
+    <rect x="0" y="538.3" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-23">
-    <rect x="0" y="562.7" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-23">
+    <rect x="0" y="562.7" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-24">
-    <rect x="0" y="587.1" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-24">
+    <rect x="0" y="587.1" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-25">
-    <rect x="0" y="611.5" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-25">
+    <rect x="0" y="611.5" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-26">
-    <rect x="0" y="635.9" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-26">
+    <rect x="0" y="635.9" width="976" height="24.65"/>
             </clipPath>
-<clipPath id="terminal-844581322-line-27">
-    <rect x="0" y="660.3" width="1915.4" height="24.65"/>
+<clipPath id="terminal-2982408974-line-27">
+    <rect x="0" y="660.3" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2982408974-line-28">
+    <rect x="0" y="684.7" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2982408974-line-29">
+    <rect x="0" y="709.1" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2982408974-line-30">
+    <rect x="0" y="733.5" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2982408974-line-31">
+    <rect x="0" y="757.9" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2982408974-line-32">
+    <rect x="0" y="782.3" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2982408974-line-33">
+    <rect x="0" y="806.7" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2982408974-line-34">
+    <rect x="0" y="831.1" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2982408974-line-35">
+    <rect x="0" y="855.5" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2982408974-line-36">
+    <rect x="0" y="879.9" width="976" height="24.65"/>
+            </clipPath>
+<clipPath id="terminal-2982408974-line-37">
+    <rect x="0" y="904.3" width="976" height="24.65"/>
             </clipPath>
     </defs>
 
-    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="1932" height="755.6" rx="8"/>
+    <rect fill="#292929" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="992" height="999.6" rx="8"/>
             <g transform="translate(26,22)">
             <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
             <circle cx="22" cy="0" r="7" fill="#febc2e"/>
             <circle cx="44" cy="0" r="7" fill="#28c840"/>
             </g>
         
-    <g transform="translate(9, 41)" clip-path="url(#terminal-844581322-clip-terminal)">
+    <g transform="translate(9, 41)" clip-path="url(#terminal-2982408974-clip-terminal)">
     
-    <g class="terminal-844581322-matrix">
-    <text class="terminal-844581322-r1" x="0" y="20" textLength="280.6" clip-path="url(#terminal-844581322-line-0)">$&#160;casanovo&#160;train&#160;--help</text><text class="terminal-844581322-r1" x="1915.4" y="20" textLength="12.2" clip-path="url(#terminal-844581322-line-0)">
-</text><text class="terminal-844581322-r1" x="0" y="44.4" textLength="414.8" clip-path="url(#terminal-844581322-line-1)">Traceback&#160;(most&#160;recent&#160;call&#160;last):</text><text class="terminal-844581322-r1" x="1915.4" y="44.4" textLength="12.2" clip-path="url(#terminal-844581322-line-1)">
-</text><text class="terminal-844581322-r1" x="0" y="68.8" textLength="1000.4" clip-path="url(#terminal-844581322-line-2)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/bin/casanovo&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="68.8" textLength="12.2" clip-path="url(#terminal-844581322-line-2)">
-</text><text class="terminal-844581322-r1" x="0" y="93.2" textLength="463.6" clip-path="url(#terminal-844581322-line-3)">&#160;&#160;&#160;&#160;from&#160;casanovo.casanovo&#160;import&#160;main</text><text class="terminal-844581322-r1" x="1915.4" y="93.2" textLength="12.2" clip-path="url(#terminal-844581322-line-3)">
-</text><text class="terminal-844581322-r1" x="0" y="117.6" textLength="1464" clip-path="url(#terminal-844581322-line-4)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/casanovo/casanovo.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="117.6" textLength="12.2" clip-path="url(#terminal-844581322-line-4)">
-</text><text class="terminal-844581322-r1" x="0" y="142" textLength="268.4" clip-path="url(#terminal-844581322-line-5)">&#160;&#160;&#160;&#160;import&#160;depthcharge</text><text class="terminal-844581322-r1" x="1915.4" y="142" textLength="12.2" clip-path="url(#terminal-844581322-line-5)">
-</text><text class="terminal-844581322-r1" x="0" y="166.4" textLength="1488.4" clip-path="url(#terminal-844581322-line-6)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/__init__.py&quot;,&#160;line&#160;3,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="166.4" textLength="12.2" clip-path="url(#terminal-844581322-line-6)">
-</text><text class="terminal-844581322-r1" x="0" y="190.8" textLength="341.6" clip-path="url(#terminal-844581322-line-7)">&#160;&#160;&#160;&#160;from&#160;.&#160;import&#160;components</text><text class="terminal-844581322-r1" x="1915.4" y="190.8" textLength="12.2" clip-path="url(#terminal-844581322-line-7)">
-</text><text class="terminal-844581322-r1" x="0" y="215.2" textLength="1622.6" clip-path="url(#terminal-844581322-line-8)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/__init__.py&quot;,&#160;line&#160;2,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="215.2" textLength="12.2" clip-path="url(#terminal-844581322-line-8)">
-</text><text class="terminal-844581322-r1" x="0" y="239.6" textLength="744.2" clip-path="url(#terminal-844581322-line-9)">&#160;&#160;&#160;&#160;from&#160;.transformers&#160;import&#160;SpectrumEncoder,&#160;PeptideDecoder</text><text class="terminal-844581322-r1" x="1915.4" y="239.6" textLength="12.2" clip-path="url(#terminal-844581322-line-9)">
-</text><text class="terminal-844581322-r1" x="0" y="264" textLength="1671.4" clip-path="url(#terminal-844581322-line-10)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/components/transformers.py&quot;,&#160;line&#160;8,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="264" textLength="12.2" clip-path="url(#terminal-844581322-line-10)">
-</text><text class="terminal-844581322-r1" x="0" y="288.4" textLength="292.8" clip-path="url(#terminal-844581322-line-11)">&#160;&#160;&#160;&#160;from&#160;..&#160;import&#160;utils</text><text class="terminal-844581322-r1" x="1915.4" y="288.4" textLength="12.2" clip-path="url(#terminal-844581322-line-11)">
-</text><text class="terminal-844581322-r1" x="0" y="312.8" textLength="1451.8" clip-path="url(#terminal-844581322-line-12)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/depthcharge/utils.py&quot;,&#160;line&#160;5,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="312.8" textLength="12.2" clip-path="url(#terminal-844581322-line-12)">
-</text><text class="terminal-844581322-r1" x="0" y="337.2" textLength="878.4" clip-path="url(#terminal-844581322-line-13)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing.event_accumulator&#160;import&#160;(</text><text class="terminal-844581322-r1" x="1915.4" y="337.2" textLength="12.2" clip-path="url(#terminal-844581322-line-13)">
-</text><text class="terminal-844581322-r1" x="0" y="361.6" textLength="1915.4" clip-path="url(#terminal-844581322-line-14)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_accumulator.py&quot;,&#160;line&#160;24,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="361.6" textLength="12.2" clip-path="url(#terminal-844581322-line-14)">
-</text><text class="terminal-844581322-r1" x="0" y="386" textLength="854" clip-path="url(#terminal-844581322-line-15)">&#160;&#160;&#160;&#160;from&#160;tensorboard.backend.event_processing&#160;import&#160;event_file_loader</text><text class="terminal-844581322-r1" x="1915.4" y="386" textLength="12.2" clip-path="url(#terminal-844581322-line-15)">
-</text><text class="terminal-844581322-r1" x="0" y="410.4" textLength="1915.4" clip-path="url(#terminal-844581322-line-16)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/backend/event_processing/event_file_loader.py&quot;,&#160;line&#160;21,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="410.4" textLength="12.2" clip-path="url(#terminal-844581322-line-16)">
-</text><text class="terminal-844581322-r1" x="0" y="434.8" textLength="536.8" clip-path="url(#terminal-844581322-line-17)">&#160;&#160;&#160;&#160;from&#160;tensorboard&#160;import&#160;dataclass_compat</text><text class="terminal-844581322-r1" x="1915.4" y="434.8" textLength="12.2" clip-path="url(#terminal-844581322-line-17)">
-</text><text class="terminal-844581322-r1" x="0" y="459.2" textLength="1598.2" clip-path="url(#terminal-844581322-line-18)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/dataclass_compat.py&quot;,&#160;line&#160;33,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="459.2" textLength="12.2" clip-path="url(#terminal-844581322-line-18)">
-</text><text class="terminal-844581322-r1" x="0" y="483.6" textLength="878.4" clip-path="url(#terminal-844581322-line-19)">&#160;&#160;&#160;&#160;from&#160;tensorboard.plugins.hparams&#160;import&#160;metadata&#160;as&#160;hparams_metadata</text><text class="terminal-844581322-r1" x="1915.4" y="483.6" textLength="12.2" clip-path="url(#terminal-844581322-line-19)">
-</text><text class="terminal-844581322-r1" x="0" y="508" textLength="1695.8" clip-path="url(#terminal-844581322-line-20)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/plugins/hparams/metadata.py&quot;,&#160;line&#160;32,&#160;in&#160;&lt;module&gt;</text><text class="terminal-844581322-r1" x="1915.4" y="508" textLength="12.2" clip-path="url(#terminal-844581322-line-20)">
-</text><text class="terminal-844581322-r1" x="0" y="532.4" textLength="585.6" clip-path="url(#terminal-844581322-line-21)">&#160;&#160;&#160;&#160;NULL_TENSOR&#160;=&#160;tensor_util.make_tensor_proto(</text><text class="terminal-844581322-r1" x="1915.4" y="532.4" textLength="12.2" clip-path="url(#terminal-844581322-line-21)">
-</text><text class="terminal-844581322-r1" x="0" y="556.8" textLength="1720.2" clip-path="url(#terminal-844581322-line-22)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/util/tensor_util.py&quot;,&#160;line&#160;405,&#160;in&#160;make_tensor_proto</text><text class="terminal-844581322-r1" x="1915.4" y="556.8" textLength="12.2" clip-path="url(#terminal-844581322-line-22)">
-</text><text class="terminal-844581322-r1" x="0" y="581.2" textLength="585.6" clip-path="url(#terminal-844581322-line-23)">&#160;&#160;&#160;&#160;numpy_dtype&#160;=&#160;dtypes.as_dtype(nparray.dtype)</text><text class="terminal-844581322-r1" x="1915.4" y="581.2" textLength="12.2" clip-path="url(#terminal-844581322-line-23)">
-</text><text class="terminal-844581322-r1" x="0" y="605.6" textLength="1769" clip-path="url(#terminal-844581322-line-24)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py&quot;,&#160;line&#160;677,&#160;in&#160;as_dtype</text><text class="terminal-844581322-r1" x="1915.4" y="605.6" textLength="12.2" clip-path="url(#terminal-844581322-line-24)">
-</text><text class="terminal-844581322-r1" x="0" y="630" textLength="866.2" clip-path="url(#terminal-844581322-line-25)">&#160;&#160;&#160;&#160;if&#160;type_value.type&#160;==&#160;np.string_&#160;or&#160;type_value.type&#160;==&#160;np.unicode_:</text><text class="terminal-844581322-r1" x="1915.4" y="630" textLength="12.2" clip-path="url(#terminal-844581322-line-25)">
-</text><text class="terminal-844581322-r1" x="0" y="654.4" textLength="1476.2" clip-path="url(#terminal-844581322-line-26)">&#160;&#160;File&#160;&quot;/opt/hostedtoolcache/Python/3.10.14/x64/lib/python3.10/site-packages/numpy/__init__.py&quot;,&#160;line&#160;397,&#160;in&#160;__getattr__</text><text class="terminal-844581322-r1" x="1915.4" y="654.4" textLength="12.2" clip-path="url(#terminal-844581322-line-26)">
-</text><text class="terminal-844581322-r1" x="0" y="678.8" textLength="305" clip-path="url(#terminal-844581322-line-27)">&#160;&#160;&#160;&#160;raise&#160;AttributeError(</text><text class="terminal-844581322-r1" x="1915.4" y="678.8" textLength="12.2" clip-path="url(#terminal-844581322-line-27)">
-</text><text class="terminal-844581322-r1" x="0" y="703.2" textLength="1427.4" clip-path="url(#terminal-844581322-line-28)">AttributeError:&#160;`np.string_`&#160;was&#160;removed&#160;in&#160;the&#160;NumPy&#160;2.0&#160;release.&#160;Use&#160;`np.bytes_`&#160;instead..&#160;Did&#160;you&#160;mean:&#160;&#x27;strings&#x27;?</text><text class="terminal-844581322-r1" x="1915.4" y="703.2" textLength="12.2" clip-path="url(#terminal-844581322-line-28)">
+    <g class="terminal-2982408974-matrix">
+    <text class="terminal-2982408974-r1" x="0" y="20" textLength="280.6" clip-path="url(#terminal-2982408974-line-0)">$&#160;casanovo&#160;train&#160;--help</text><text class="terminal-2982408974-r1" x="976" y="20" textLength="12.2" clip-path="url(#terminal-2982408974-line-0)">
+</text><text class="terminal-2982408974-r1" x="976" y="44.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-1)">
+</text><text class="terminal-2982408974-r2" x="12.2" y="68.8" textLength="73.2" clip-path="url(#terminal-2982408974-line-2)">Usage:</text><text class="terminal-2982408974-r3" x="97.6" y="68.8" textLength="170.8" clip-path="url(#terminal-2982408974-line-2)">casanovo&#160;train</text><text class="terminal-2982408974-r1" x="268.4" y="68.8" textLength="24.4" clip-path="url(#terminal-2982408974-line-2)">&#160;[</text><text class="terminal-2982408974-r4" x="292.8" y="68.8" textLength="85.4" clip-path="url(#terminal-2982408974-line-2)">OPTIONS</text><text class="terminal-2982408974-r1" x="378.2" y="68.8" textLength="24.4" clip-path="url(#terminal-2982408974-line-2)">]&#160;</text><text class="terminal-2982408974-r4" x="402.6" y="68.8" textLength="183" clip-path="url(#terminal-2982408974-line-2)">TRAIN_PEAK_PATH</text><text class="terminal-2982408974-r1" x="585.6" y="68.8" textLength="390.4" clip-path="url(#terminal-2982408974-line-2)">...&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r1" x="976" y="68.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-2)">
+</text><text class="terminal-2982408974-r1" x="976" y="93.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-3)">
+</text><text class="terminal-2982408974-r1" x="0" y="117.6" textLength="976" clip-path="url(#terminal-2982408974-line-4)">&#160;Train&#160;a&#160;Casanovo&#160;model&#160;on&#160;your&#160;own&#160;data.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r1" x="976" y="117.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-4)">
+</text><text class="terminal-2982408974-r1" x="0" y="142" textLength="976" clip-path="url(#terminal-2982408974-line-5)">&#160;TRAIN_PEAK_PATH&#160;must&#160;be&#160;one&#160;or&#160;more&#160;annoated&#160;MGF&#160;files,&#160;such&#160;as&#160;those&#160;provided&#160;</text><text class="terminal-2982408974-r1" x="976" y="142" textLength="12.2" clip-path="url(#terminal-2982408974-line-5)">
+</text><text class="terminal-2982408974-r1" x="0" y="166.4" textLength="976" clip-path="url(#terminal-2982408974-line-6)">&#160;by&#160;MassIVE-KB,&#160;from&#160;which&#160;to&#160;train&#160;a&#160;new&#160;Casnovo&#160;model.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r1" x="976" y="166.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-6)">
+</text><text class="terminal-2982408974-r1" x="976" y="190.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-7)">
+</text><text class="terminal-2982408974-r5" x="0" y="215.2" textLength="24.4" clip-path="url(#terminal-2982408974-line-8)">╭─</text><text class="terminal-2982408974-r5" x="24.4" y="215.2" textLength="134.2" clip-path="url(#terminal-2982408974-line-8)">&#160;Arguments&#160;</text><text class="terminal-2982408974-r5" x="158.6" y="215.2" textLength="793" clip-path="url(#terminal-2982408974-line-8)">─────────────────────────────────────────────────────────────────</text><text class="terminal-2982408974-r5" x="951.6" y="215.2" textLength="24.4" clip-path="url(#terminal-2982408974-line-8)">─╮</text><text class="terminal-2982408974-r1" x="976" y="215.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-8)">
+</text><text class="terminal-2982408974-r5" x="0" y="239.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-9)">│</text><text class="terminal-2982408974-r6" x="24.4" y="239.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-9)">*</text><text class="terminal-2982408974-r1" x="36.6" y="239.6" textLength="256.2" clip-path="url(#terminal-2982408974-line-9)">&#160;&#160;TRAIN_PEAK_PATH&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r7" x="292.8" y="239.6" textLength="48.8" clip-path="url(#terminal-2982408974-line-9)">FILE</text><text class="terminal-2982408974-r8" x="366" y="239.6" textLength="122" clip-path="url(#terminal-2982408974-line-9)">[required]</text><text class="terminal-2982408974-r5" x="963.8" y="239.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-9)">│</text><text class="terminal-2982408974-r1" x="976" y="239.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-9)">
+</text><text class="terminal-2982408974-r5" x="0" y="264" textLength="976" clip-path="url(#terminal-2982408974-line-10)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2982408974-r1" x="976" y="264" textLength="12.2" clip-path="url(#terminal-2982408974-line-10)">
+</text><text class="terminal-2982408974-r5" x="0" y="288.4" textLength="24.4" clip-path="url(#terminal-2982408974-line-11)">╭─</text><text class="terminal-2982408974-r5" x="24.4" y="288.4" textLength="109.8" clip-path="url(#terminal-2982408974-line-11)">&#160;Options&#160;</text><text class="terminal-2982408974-r5" x="134.2" y="288.4" textLength="817.4" clip-path="url(#terminal-2982408974-line-11)">───────────────────────────────────────────────────────────────────</text><text class="terminal-2982408974-r5" x="951.6" y="288.4" textLength="24.4" clip-path="url(#terminal-2982408974-line-11)">─╮</text><text class="terminal-2982408974-r1" x="976" y="288.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-11)">
+</text><text class="terminal-2982408974-r5" x="0" y="312.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-12)">│</text><text class="terminal-2982408974-r6" x="24.4" y="312.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-12)">*</text><text class="terminal-2982408974-r4" x="61" y="312.8" textLength="256.2" clip-path="url(#terminal-2982408974-line-12)">--validation_peak_pa…</text><text class="terminal-2982408974-r9" x="341.6" y="312.8" textLength="24.4" clip-path="url(#terminal-2982408974-line-12)">-p</text><text class="terminal-2982408974-r7" x="390.4" y="312.8" textLength="268.4" clip-path="url(#terminal-2982408974-line-12)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r1" x="658.8" y="312.8" textLength="305" clip-path="url(#terminal-2982408974-line-12)">&#160;&#160;An&#160;annotated&#160;MGF&#160;file&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="312.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-12)">│</text><text class="terminal-2982408974-r1" x="976" y="312.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-12)">
+</text><text class="terminal-2982408974-r5" x="0" y="337.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-13)">│</text><text class="terminal-2982408974-r1" x="12.2" y="337.2" textLength="951.6" clip-path="url(#terminal-2982408974-line-13)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;for&#160;validation,&#160;like&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="337.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-13)">│</text><text class="terminal-2982408974-r1" x="976" y="337.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-13)">
+</text><text class="terminal-2982408974-r5" x="0" y="361.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-14)">│</text><text class="terminal-2982408974-r1" x="12.2" y="361.6" textLength="951.6" clip-path="url(#terminal-2982408974-line-14)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;from&#160;MassIVE-KB.&#160;Use&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="361.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-14)">│</text><text class="terminal-2982408974-r1" x="976" y="361.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-14)">
+</text><text class="terminal-2982408974-r5" x="0" y="386" textLength="12.2" clip-path="url(#terminal-2982408974-line-15)">│</text><text class="terminal-2982408974-r1" x="12.2" y="386" textLength="951.6" clip-path="url(#terminal-2982408974-line-15)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;this&#160;option&#160;multiple&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="386" textLength="12.2" clip-path="url(#terminal-2982408974-line-15)">│</text><text class="terminal-2982408974-r1" x="976" y="386" textLength="12.2" clip-path="url(#terminal-2982408974-line-15)">
+</text><text class="terminal-2982408974-r5" x="0" y="410.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-16)">│</text><text class="terminal-2982408974-r1" x="12.2" y="410.4" textLength="951.6" clip-path="url(#terminal-2982408974-line-16)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;times&#160;to&#160;specify&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="410.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-16)">│</text><text class="terminal-2982408974-r1" x="976" y="410.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-16)">
+</text><text class="terminal-2982408974-r5" x="0" y="434.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-17)">│</text><text class="terminal-2982408974-r1" x="12.2" y="434.8" textLength="951.6" clip-path="url(#terminal-2982408974-line-17)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;multiple&#160;files.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="434.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-17)">│</text><text class="terminal-2982408974-r1" x="976" y="434.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-17)">
+</text><text class="terminal-2982408974-r5" x="0" y="459.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-18)">│</text><text class="terminal-2982408974-r8" x="683.2" y="459.2" textLength="268.4" clip-path="url(#terminal-2982408974-line-18)">[required]&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="459.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-18)">│</text><text class="terminal-2982408974-r1" x="976" y="459.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-18)">
+</text><text class="terminal-2982408974-r5" x="0" y="483.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-19)">│</text><text class="terminal-2982408974-r4" x="61" y="483.6" textLength="85.4" clip-path="url(#terminal-2982408974-line-19)">--model</text><text class="terminal-2982408974-r9" x="341.6" y="483.6" textLength="24.4" clip-path="url(#terminal-2982408974-line-19)">-m</text><text class="terminal-2982408974-r7" x="390.4" y="483.6" textLength="268.4" clip-path="url(#terminal-2982408974-line-19)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r1" x="658.8" y="483.6" textLength="305" clip-path="url(#terminal-2982408974-line-19)">&#160;&#160;The&#160;model&#160;weights&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="483.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-19)">│</text><text class="terminal-2982408974-r1" x="976" y="483.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-19)">
+</text><text class="terminal-2982408974-r5" x="0" y="508" textLength="12.2" clip-path="url(#terminal-2982408974-line-20)">│</text><text class="terminal-2982408974-r1" x="12.2" y="508" textLength="951.6" clip-path="url(#terminal-2982408974-line-20)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;(.ckpt&#160;file).&#160;If&#160;not&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="508" textLength="12.2" clip-path="url(#terminal-2982408974-line-20)">│</text><text class="terminal-2982408974-r1" x="976" y="508" textLength="12.2" clip-path="url(#terminal-2982408974-line-20)">
+</text><text class="terminal-2982408974-r5" x="0" y="532.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-21)">│</text><text class="terminal-2982408974-r1" x="12.2" y="532.4" textLength="951.6" clip-path="url(#terminal-2982408974-line-21)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;provided,&#160;Casanovo&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="532.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-21)">│</text><text class="terminal-2982408974-r1" x="976" y="532.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-21)">
+</text><text class="terminal-2982408974-r5" x="0" y="556.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-22)">│</text><text class="terminal-2982408974-r1" x="12.2" y="556.8" textLength="951.6" clip-path="url(#terminal-2982408974-line-22)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;will&#160;try&#160;to&#160;download&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="556.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-22)">│</text><text class="terminal-2982408974-r1" x="976" y="556.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-22)">
+</text><text class="terminal-2982408974-r5" x="0" y="581.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-23)">│</text><text class="terminal-2982408974-r1" x="12.2" y="581.2" textLength="951.6" clip-path="url(#terminal-2982408974-line-23)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;the&#160;latest&#160;release.&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="581.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-23)">│</text><text class="terminal-2982408974-r1" x="976" y="581.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-23)">
+</text><text class="terminal-2982408974-r5" x="0" y="605.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-24)">│</text><text class="terminal-2982408974-r4" x="61" y="605.6" textLength="97.6" clip-path="url(#terminal-2982408974-line-24)">--output</text><text class="terminal-2982408974-r9" x="341.6" y="605.6" textLength="24.4" clip-path="url(#terminal-2982408974-line-24)">-o</text><text class="terminal-2982408974-r7" x="390.4" y="605.6" textLength="268.4" clip-path="url(#terminal-2982408974-line-24)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r1" x="658.8" y="605.6" textLength="305" clip-path="url(#terminal-2982408974-line-24)">&#160;&#160;The&#160;mzTab&#160;file&#160;to&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="605.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-24)">│</text><text class="terminal-2982408974-r1" x="976" y="605.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-24)">
+</text><text class="terminal-2982408974-r5" x="0" y="630" textLength="12.2" clip-path="url(#terminal-2982408974-line-25)">│</text><text class="terminal-2982408974-r1" x="12.2" y="630" textLength="951.6" clip-path="url(#terminal-2982408974-line-25)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;which&#160;results&#160;will&#160;be&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="630" textLength="12.2" clip-path="url(#terminal-2982408974-line-25)">│</text><text class="terminal-2982408974-r1" x="976" y="630" textLength="12.2" clip-path="url(#terminal-2982408974-line-25)">
+</text><text class="terminal-2982408974-r5" x="0" y="654.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-26)">│</text><text class="terminal-2982408974-r1" x="12.2" y="654.4" textLength="951.6" clip-path="url(#terminal-2982408974-line-26)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;written.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="654.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-26)">│</text><text class="terminal-2982408974-r1" x="976" y="654.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-26)">
+</text><text class="terminal-2982408974-r5" x="0" y="678.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-27)">│</text><text class="terminal-2982408974-r4" x="61" y="678.8" textLength="97.6" clip-path="url(#terminal-2982408974-line-27)">--config</text><text class="terminal-2982408974-r9" x="341.6" y="678.8" textLength="24.4" clip-path="url(#terminal-2982408974-line-27)">-c</text><text class="terminal-2982408974-r7" x="390.4" y="678.8" textLength="268.4" clip-path="url(#terminal-2982408974-line-27)">FILE&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r1" x="658.8" y="678.8" textLength="305" clip-path="url(#terminal-2982408974-line-27)">&#160;&#160;The&#160;YAML&#160;configuration&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="678.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-27)">│</text><text class="terminal-2982408974-r1" x="976" y="678.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-27)">
+</text><text class="terminal-2982408974-r5" x="0" y="703.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-28)">│</text><text class="terminal-2982408974-r1" x="12.2" y="703.2" textLength="951.6" clip-path="url(#terminal-2982408974-line-28)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;file&#160;overriding&#160;the&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="703.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-28)">│</text><text class="terminal-2982408974-r1" x="976" y="703.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-28)">
+</text><text class="terminal-2982408974-r5" x="0" y="727.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-29)">│</text><text class="terminal-2982408974-r1" x="12.2" y="727.6" textLength="951.6" clip-path="url(#terminal-2982408974-line-29)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;default&#160;options.&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="727.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-29)">│</text><text class="terminal-2982408974-r1" x="976" y="727.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-29)">
+</text><text class="terminal-2982408974-r5" x="0" y="752" textLength="12.2" clip-path="url(#terminal-2982408974-line-30)">│</text><text class="terminal-2982408974-r4" x="61" y="752" textLength="134.2" clip-path="url(#terminal-2982408974-line-30)">--verbosity</text><text class="terminal-2982408974-r9" x="341.6" y="752" textLength="24.4" clip-path="url(#terminal-2982408974-line-30)">-v</text><text class="terminal-2982408974-r10" x="390.4" y="752" textLength="12.2" clip-path="url(#terminal-2982408974-line-30)">[</text><text class="terminal-2982408974-r7" x="402.6" y="752" textLength="61" clip-path="url(#terminal-2982408974-line-30)">debug</text><text class="terminal-2982408974-r10" x="463.6" y="752" textLength="12.2" clip-path="url(#terminal-2982408974-line-30)">|</text><text class="terminal-2982408974-r7" x="475.8" y="752" textLength="48.8" clip-path="url(#terminal-2982408974-line-30)">info</text><text class="terminal-2982408974-r10" x="524.6" y="752" textLength="12.2" clip-path="url(#terminal-2982408974-line-30)">|</text><text class="terminal-2982408974-r7" x="536.8" y="752" textLength="85.4" clip-path="url(#terminal-2982408974-line-30)">warning</text><text class="terminal-2982408974-r10" x="622.2" y="752" textLength="12.2" clip-path="url(#terminal-2982408974-line-30)">|</text><text class="terminal-2982408974-r7" x="634.4" y="752" textLength="24.4" clip-path="url(#terminal-2982408974-line-30)">er</text><text class="terminal-2982408974-r1" x="658.8" y="752" textLength="305" clip-path="url(#terminal-2982408974-line-30)">&#160;&#160;Set&#160;the&#160;verbosity&#160;of&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="752" textLength="12.2" clip-path="url(#terminal-2982408974-line-30)">│</text><text class="terminal-2982408974-r1" x="976" y="752" textLength="12.2" clip-path="url(#terminal-2982408974-line-30)">
+</text><text class="terminal-2982408974-r5" x="0" y="776.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-31)">│</text><text class="terminal-2982408974-r7" x="390.4" y="776.4" textLength="36.6" clip-path="url(#terminal-2982408974-line-31)">ror</text><text class="terminal-2982408974-r10" x="427" y="776.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-31)">]</text><text class="terminal-2982408974-r1" x="658.8" y="776.4" textLength="305" clip-path="url(#terminal-2982408974-line-31)">&#160;&#160;console&#160;logging&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="776.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-31)">│</text><text class="terminal-2982408974-r1" x="976" y="776.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-31)">
+</text><text class="terminal-2982408974-r5" x="0" y="800.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-32)">│</text><text class="terminal-2982408974-r1" x="12.2" y="800.8" textLength="951.6" clip-path="url(#terminal-2982408974-line-32)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;messages.&#160;Log&#160;files&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="800.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-32)">│</text><text class="terminal-2982408974-r1" x="976" y="800.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-32)">
+</text><text class="terminal-2982408974-r5" x="0" y="825.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-33)">│</text><text class="terminal-2982408974-r1" x="12.2" y="825.2" textLength="951.6" clip-path="url(#terminal-2982408974-line-33)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;are&#160;always&#160;set&#160;to&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="825.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-33)">│</text><text class="terminal-2982408974-r1" x="976" y="825.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-33)">
+</text><text class="terminal-2982408974-r5" x="0" y="849.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-34)">│</text><text class="terminal-2982408974-r1" x="12.2" y="849.6" textLength="951.6" clip-path="url(#terminal-2982408974-line-34)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#x27;debug&#x27;.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="849.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-34)">│</text><text class="terminal-2982408974-r1" x="976" y="849.6" textLength="12.2" clip-path="url(#terminal-2982408974-line-34)">
+</text><text class="terminal-2982408974-r5" x="0" y="874" textLength="12.2" clip-path="url(#terminal-2982408974-line-35)">│</text><text class="terminal-2982408974-r4" x="61" y="874" textLength="73.2" clip-path="url(#terminal-2982408974-line-35)">--help</text><text class="terminal-2982408974-r9" x="341.6" y="874" textLength="24.4" clip-path="url(#terminal-2982408974-line-35)">-h</text><text class="terminal-2982408974-r1" x="658.8" y="874" textLength="305" clip-path="url(#terminal-2982408974-line-35)">&#160;&#160;Show&#160;this&#160;message&#160;and&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="874" textLength="12.2" clip-path="url(#terminal-2982408974-line-35)">│</text><text class="terminal-2982408974-r1" x="976" y="874" textLength="12.2" clip-path="url(#terminal-2982408974-line-35)">
+</text><text class="terminal-2982408974-r5" x="0" y="898.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-36)">│</text><text class="terminal-2982408974-r1" x="12.2" y="898.4" textLength="951.6" clip-path="url(#terminal-2982408974-line-36)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;exit.&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;&#160;</text><text class="terminal-2982408974-r5" x="963.8" y="898.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-36)">│</text><text class="terminal-2982408974-r1" x="976" y="898.4" textLength="12.2" clip-path="url(#terminal-2982408974-line-36)">
+</text><text class="terminal-2982408974-r5" x="0" y="922.8" textLength="976" clip-path="url(#terminal-2982408974-line-37)">╰──────────────────────────────────────────────────────────────────────────────╯</text><text class="terminal-2982408974-r1" x="976" y="922.8" textLength="12.2" clip-path="url(#terminal-2982408974-line-37)">
+</text><text class="terminal-2982408974-r1" x="976" y="947.2" textLength="12.2" clip-path="url(#terminal-2982408974-line-38)">
 </text>
     </g>
     </g>

From 812226e396f667f2d9e628e1aabd76546f8c18a1 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Tue, 20 Aug 2024 20:21:29 -0700
Subject: [PATCH 12/21] finish proteindatabase

---
 casanovo/data/db_utils.py      | 101 +++++++++++++++++----------------
 casanovo/denovo/dataloaders.py |   6 +-
 tests/unit_tests/test_unit.py  | 100 +++++++++++++++-----------------
 3 files changed, 101 insertions(+), 106 deletions(-)

diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index a7b5e850..d249e0c7 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -1,12 +1,12 @@
 """Unique methods used within db-search mode"""
 
-import bisect
 import logging
 import os
-from typing import List, Tuple
+from typing import List
 
 import depthcharge.masses
 from numba import jit
+import pandas as pd
 from pyteomics import fasta, parser
 
 logger = logging.getLogger("casanovo")
@@ -28,11 +28,29 @@
 
 class ProteinDatabase:
     """
-    TODO
+    Store digested .fasta data and return candidate peptides for a given precursor mass.
 
     Parameters
     ----------
-    TODO
+    fasta_path : str
+        Path to the FASTA file.
+    enzyme : str
+        The enzyme to use for digestion.
+        See pyteomics.parser.expasy_rules for valid enzymes.
+    digestion : str
+        The type of digestion to perform. Either 'full' or 'partial'.
+    missed_cleavages : int
+        The number of missed cleavages to allow.
+    min_peptide_len : int
+        The minimum length of peptides to consider.
+    max_peptide_len : int
+        The maximum length of peptides to consider.
+    max_mods : int
+        The maximum number of modifications to allow per peptide.
+    precursor_tolerance : float
+        The precursor mass tolerance in ppm.
+    isotope_error : List[int]
+        Isotopes to consider when comparing predicted and observed precursor m/z's.
     """
 
     def __init__(
@@ -73,27 +91,34 @@ def get_candidates(
             The precursor mass-to-charge ratio.
         charge : int
             The precursor charge.
+
+        Returns
+        -------
+        candidates : List[Tuple[str, str]]
+            A list of candidate peptides and associated
+            protein.
         """
-        candidates = set()
+        candidates = []
 
         for e in self.isotope_error:
             iso_shift = ISOTOPE_SPACING * e
-            upper_bound = (
+            upper_bound = float(
                 ProteinDatabase._to_raw_mass(precursor_mz, charge) - iso_shift
             ) * (1 + (self.precursor_tolerance / 1e6))
-            lower_bound = (
+            lower_bound = float(
                 ProteinDatabase._to_raw_mass(precursor_mz, charge) - iso_shift
             ) * (1 - (self.precursor_tolerance / 1e6))
 
-            start, end = ProteinDatabase._get_mass_indices(
-                [x[1] for x in self.digest], lower_bound, upper_bound
-            )
+            window = self.digest[
+                (self.digest["calc_mass"] >= lower_bound)
+                & (self.digest["calc_mass"] <= upper_bound)
+            ]
+            candidates.append(window[["peptide", "calc_mass", "protein"]])
 
-            candidates.update(self.digest[start:end])
-
-        candidates = list(candidates)
-        candidates.sort(key=lambda x: x[1])
-        return candidates
+        candidates = pd.concat(candidates)
+        candidates.drop_duplicates(inplace=True)
+        candidates.sort_values(by=["calc_mass", "peptide"], inplace=True)
+        return list(candidates["peptide"]), list(candidates["protein"])
 
     def _digest_fasta(
         self,
@@ -128,9 +153,9 @@ def _digest_fasta(
 
         Returns
         -------
-        mod_peptide_list : List[Tuple[str, float, str]]
-            A list of tuples containing the peptide sequence, mass,
-            and associated protein. Sorted by neutral mass in ascending order.
+        mod_peptide_list : pd.DataFrame
+            A Pandas DataFrame with peptide, mass,
+            and protein columns. Sorted by neutral mass in ascending order.
         """
         # Verify the existence of the file:
         if not os.path.isfile(fasta_filename):
@@ -180,17 +205,20 @@ def _digest_fasta(
                 map(ProteinDatabase._convert_from_modx, peptide_isoforms)
             )
             mod_peptide_list.extend(
-                (mod_pep, mass_calculator.mass(mod_pep), prot)
+                [mod_pep, mass_calculator.mass(mod_pep), prot]
                 for mod_pep in peptide_isoforms
             )
 
-        # Sort the peptides by mass and return.
-        mod_peptide_list.sort(key=lambda x: x[1])
-        logger.info(
-            "Digestion complete. %d peptides generated.", len(mod_peptide_list)
+        # Create a DataFrame for easy sorting and filtering
+        pdb_df = pd.DataFrame(
+            mod_peptide_list, columns=["peptide", "calc_mass", "protein"]
         )
-        return mod_peptide_list
+        pdb_df.sort_values(by=["calc_mass", "peptide"], inplace=True)
+
+        logger.info("Digestion complete. %d peptides generated.", len(pdb_df))
+        return pdb_df
 
+    @jit
     def _to_mz(precursor_mass, charge):
         """
         Convert precursor neutral mass to m/z value.
@@ -209,6 +237,7 @@ def _to_mz(precursor_mass, charge):
         """
         return (precursor_mass + (charge * PROTON)) / charge
 
+    @jit
     def _to_raw_mass(mz_mass, charge):
         """
         Convert precursor m/z value to neutral mass.
@@ -227,30 +256,6 @@ def _to_raw_mass(mz_mass, charge):
         """
         return charge * (mz_mass - PROTON)
 
-    def _get_mass_indices(masses, m_low, m_high):
-        """Grabs mass indices that fall within a specified range.
-
-        Pulls from masses, a list of mass values.
-        Requires that the mass values are sorted in ascending order.
-
-        Parameters
-        ----------
-        masses : List[int]
-            List of mass values
-        m_low : int
-            Lower bound of mass range (inclusive)
-        m_high : int
-            Upper bound of mass range (inclusive)
-
-        Return
-        ------
-        indices : Tuple[int, int]
-            Indices of mass values that fall within the specified range
-        """
-        start = bisect.bisect_left(masses, m_low)
-        end = bisect.bisect_right(masses, m_high)
-        return start, end
-
     def _convert_from_modx(seq: str):
         """Converts peptide sequence from modX format to Casanovo-acceptable modifications.
 
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index 4d5524f4..2d9e200b 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -284,11 +284,11 @@ def prepare_psm_batch(
     all_proteins = []
     for idx in range(len(batch)):
         digest_data = pdb.get_candidates(
-            precursor_mzs[idx],
-            precursor_charges[idx],
+            float(precursor_mzs[idx]),
+            float(precursor_charges[idx]),
         )
         try:
-            spec_peptides, _, pep_protein = list(zip(*digest_data))
+            spec_peptides, pep_protein = digest_data
             all_spectra.append(
                 spectra[idx].unsqueeze(0).repeat(len(spec_peptides), 1, 1)
             )
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index 7a37e771..2473a168 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -9,6 +9,7 @@
 import einops
 import github
 import numpy as np
+import pandas as pd
 import pytest
 import torch
 
@@ -287,8 +288,7 @@ def test_digest_fasta_cleave(tiny_fasta_file):
             precursor_tolerance=20,
             isotope_error=[0],
         )
-        peptide_list = pdb.digest
-        peptide_list = [x[0] for x in peptide_list]
+        peptide_list = list(pdb.digest["peptide"])
         assert peptide_list == expected
 
 
@@ -357,8 +357,7 @@ def test_digest_fasta_mods(tiny_fasta_file):
         precursor_tolerance=20,
         isotope_error=[0],
     )
-    peptide_list = pdb.digest
-    peptide_list = [x[0] for x in peptide_list]
+    peptide_list = list(pdb.digest["peptide"])
     peptide_list = [
         x
         for x in peptide_list
@@ -391,8 +390,7 @@ def test_length_restrictions(tiny_fasta_file):
         precursor_tolerance=20,
         isotope_error=[0],
     )
-    peptide_list = pdb.digest
-    peptide_list = [x[0] for x in peptide_list]
+    peptide_list = list(pdb.digest["peptide"])
     assert peptide_list == expected_long
 
     pdb = db_utils.ProteinDatabase(
@@ -406,8 +404,7 @@ def test_length_restrictions(tiny_fasta_file):
         precursor_tolerance=20,
         isotope_error=[0],
     )
-    peptide_list = pdb.digest
-    peptide_list = [x[0] for x in peptide_list]
+    peptide_list = list(pdb.digest["peptide"])
     assert peptide_list == expected_short
 
 
@@ -437,8 +434,7 @@ def test_digest_fasta_enzyme(tiny_fasta_file):
         precursor_tolerance=20,
         isotope_error=[0],
     )
-    peptide_list = pdb.digest
-    peptide_list = [x[0] for x in peptide_list]
+    peptide_list = list(pdb.digest["peptide"])
     assert peptide_list == expected_argc
 
     pdb = db_utils.ProteinDatabase(
@@ -452,8 +448,7 @@ def test_digest_fasta_enzyme(tiny_fasta_file):
         precursor_tolerance=20,
         isotope_error=[0],
     )
-    peptide_list = pdb.digest
-    peptide_list = [x[0] for x in peptide_list]
+    peptide_list = list(pdb.digest["peptide"])
     assert peptide_list == expected_aspn
 
 
@@ -478,8 +473,7 @@ def test_get_candidates(tiny_fasta_file):
         precursor_tolerance=10000,
         isotope_error=[0],
     )
-    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    candidates = [x[0] for x in candidates]
+    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_smallwindow == candidates
 
     pdb = db_utils.ProteinDatabase(
@@ -493,8 +487,7 @@ def test_get_candidates(tiny_fasta_file):
         precursor_tolerance=150000,
         isotope_error=[0],
     )
-    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    candidates = [x[0] for x in candidates]
+    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_midwindow == candidates
 
     pdb = db_utils.ProteinDatabase(
@@ -508,8 +501,7 @@ def test_get_candidates(tiny_fasta_file):
         precursor_tolerance=600000,
         isotope_error=[0],
     )
-    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    candidates = [x[0] for x in candidates]
+    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_widewindow == candidates
 
 
@@ -522,35 +514,38 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
     # 3: [977.510108, 997.257787]
 
     peptide_list = [
-        ("A", 1001),
-        ("B", 1000),
-        ("C", 999),
-        ("D", 998),
-        ("E", 997),
-        ("F", 996),
-        ("G", 995),
-        ("H", 994),
-        ("I", 993),
-        ("J", 992),
-        ("K", 991),
-        ("L", 990),
-        ("M", 989),
-        ("N", 988),
-        ("O", 987),
-        ("P", 986),
-        ("Q", 985),
-        ("R", 984),
-        ("S", 983),
-        ("T", 982),
-        ("U", 981),
-        ("V", 980),
-        ("W", 979),
-        ("X", 978),
-        ("Y", 977),
-        ("Z", 976),
+        ("A", 1001, "foo"),
+        ("B", 1000, "foo"),
+        ("C", 999, "foo"),
+        ("D", 998, "foo"),
+        ("E", 997, "foo"),
+        ("F", 996, "foo"),
+        ("G", 995, "foo"),
+        ("H", 994, "foo"),
+        ("I", 993, "foo"),
+        ("J", 992, "foo"),
+        ("K", 991, "foo"),
+        ("L", 990, "foo"),
+        ("M", 989, "foo"),
+        ("N", 988, "foo"),
+        ("O", 987, "foo"),
+        ("P", 986, "foo"),
+        ("Q", 985, "foo"),
+        ("R", 984, "foo"),
+        ("S", 983, "foo"),
+        ("T", 982, "foo"),
+        ("U", 981, "foo"),
+        ("V", 980, "foo"),
+        ("W", 979, "foo"),
+        ("X", 978, "foo"),
+        ("Y", 977, "foo"),
+        ("Z", 976, "foo"),
     ]
 
-    peptide_list.sort(key=lambda x: x[1])
+    peptide_list = pd.DataFrame(
+        peptide_list, columns=["peptide", "calc_mass", "protein"]
+    )
+    peptide_list.sort_values("calc_mass", inplace=True)
 
     expected_isotope0 = list("UTSRQPONMLKJIHGFEDCB")
     expected_isotope1 = list("VUTSRQPONMLKJIHGFEDC")
@@ -570,8 +565,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         isotope_error=[0],
     )
     pdb.digest = peptide_list
-    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    candidates = [x[0] for x in candidates]
+    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_isotope0 == candidates
 
     pdb = db_utils.ProteinDatabase(
@@ -586,8 +580,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         isotope_error=[1],
     )
     pdb.digest = peptide_list
-    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    candidates = [x[0] for x in candidates]
+    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_isotope1 == candidates
 
     pdb = db_utils.ProteinDatabase(
@@ -602,8 +595,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         isotope_error=[2],
     )
     pdb.digest = peptide_list
-    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    candidates = [x[0] for x in candidates]
+    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_isotope2 == candidates
 
     pdb = db_utils.ProteinDatabase(
@@ -618,8 +610,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         isotope_error=[3],
     )
     pdb.digest = peptide_list
-    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    candidates = [x[0] for x in candidates]
+    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_isotope3 == candidates
 
     pdb = db_utils.ProteinDatabase(
@@ -634,8 +625,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         isotope_error=[0, 1, 2, 3],
     )
     pdb.digest = peptide_list
-    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    candidates = [x[0] for x in candidates]
+    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_isotope0123 == candidates
 
 

From cfd39e80b4898077f92cacc6491a5c891c5a9454 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Fri, 23 Aug 2024 14:12:50 -0700
Subject: [PATCH 13/21] all comments addressed

---
 casanovo/config.yaml            |  7 +++-
 casanovo/data/db_utils.py       | 68 +++++++++++++++++++++++++++------
 casanovo/denovo/model_runner.py |  1 +
 tests/conftest.py               |  4 ++
 tests/unit_tests/test_unit.py   | 56 +++++++++++++++++++++++++++
 5 files changed, 123 insertions(+), 13 deletions(-)

diff --git a/casanovo/config.yaml b/casanovo/config.yaml
index 860cfabb..87795db8 100644
--- a/casanovo/config.yaml
+++ b/casanovo/config.yaml
@@ -46,7 +46,7 @@ devices:
 # See pyteomics.parser.expasy_rules for valid enzymes
 enzyme: "trypsin"
 # Digestion type for candidate peptide generation.
-# Full: standard digestion. Semi: Include products of semi-specific cleavage
+# full: standard digestion. semi: Include products of semi-specific cleavage
 digestion: "full"
 # Number of allowed missed cleavages when digesting protein
 missed_cleavages: 0
@@ -55,6 +55,11 @@ missed_cleavages: 0
 max_mods: 
 # Maximum peptide length to consider
 max_peptide_len: 50
+# Toggle allowed modifications on/off
+# Permanent fixed mod (don't include): C+57.021
+# Allowed variable mods: M+15.995, N+0.984, Q+0.984, 
+# Allowed N-terminal mods: +42.011, +43.006, -17.027, +43.006-17.027
+allowed_mods: "M+15.995,N+0.984,Q+0.984,+42.011,+43.006,-17.027,+43.006-17.027"
 
 
 ###
diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index d249e0c7..2bdf3828 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -15,16 +15,6 @@
 PROTON = 1.00727646677
 ISOTOPE_SPACING = 1.003355
 
-var_mods = {
-    "d": ["N", "Q"],
-    "ox": ["M"],
-    "ace-": True,
-    "carb-": True,
-    "nh3x-": True,
-    "carbnh3x-": True,
-}
-fixed_mods = {"carbm": ["C"]}
-
 
 class ProteinDatabase:
     """
@@ -51,6 +41,8 @@ class ProteinDatabase:
         The precursor mass tolerance in ppm.
     isotope_error : List[int]
         Isotopes to consider when comparing predicted and observed precursor m/z's.
+    allowed_mods : List[str]
+        A list of allowed modifications to consider.
     """
 
     def __init__(
@@ -64,7 +56,11 @@ def __init__(
         max_mods: int,
         precursor_tolerance: float,
         isotope_error: List[int],
+        allowed_mods: List[str],
     ):
+        self.fixed_mods, self.var_mods = self._construct_mods_dict(
+            allowed_mods
+        )
         self.digest = self._digest_fasta(
             fasta_path,
             enzyme,
@@ -197,8 +193,8 @@ def _digest_fasta(
         for pep, prot in peptide_list:
             peptide_isoforms = parser.isoforms(
                 pep,
-                variable_mods=var_mods,
-                fixed_mods=fixed_mods,
+                variable_mods=self.var_mods,
+                fixed_mods=self.fixed_mods,
                 max_mods=max_mods,
             )
             peptide_isoforms = list(
@@ -218,6 +214,54 @@ def _digest_fasta(
         logger.info("Digestion complete. %d peptides generated.", len(pdb_df))
         return pdb_df
 
+    def _construct_mods_dict(self, allowed_mods):
+        """
+        Constructs dictionaries of fixed and variable modifications.
+
+        Parameters
+        ----------
+        allowed_mods : str
+            A comma-separated list of allowed modifications.
+
+        Returns
+        -------
+        fixed_mods : dict
+            A dictionary of fixed modifications.
+        var_mods : dict
+            A dictionary of variable modifications.
+        """
+        fixed_mods = {"carbm": ["C"]}
+        var_mods = {}
+
+        if allowed_mods is "" or None:
+            return fixed_mods, var_mods
+        for mod in allowed_mods.split(","):
+            if mod == "M+15.995":
+                if "ox" not in var_mods:
+                    var_mods["ox"] = []
+                var_mods["ox"].append("M")
+            elif mod == "N+0.984":
+                if "d" not in var_mods:
+                    var_mods["d"] = []
+                var_mods["d"].append("N")
+            elif mod == "Q+0.984":
+                if "d" not in var_mods:
+                    var_mods["d"] = []
+                var_mods["d"].append("Q")
+            elif mod == "+42.011":
+                var_mods["ace-"] = True
+            elif mod == "+43.006":
+                var_mods["carb-"] = True
+            elif mod == "-17.027":
+                var_mods["nh3x-"] = True
+            elif mod == "+43.006-17.027":
+                var_mods["carbnh3x-"] = True
+            else:
+                logger.error("Modification %s not recognized.", mod)
+                raise ValueError(f"Modification {mod} not recognized.")
+
+        return fixed_mods, var_mods
+
     @jit
     def _to_mz(precursor_mass, charge):
         """
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index b90f06b0..789c960b 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -124,6 +124,7 @@ def db_search(
             self.config.max_mods,
             self.config.precursor_mass_tol,
             self.config.isotope_error_range,
+            self.config.allowed_mods,
         )
         self.loaders.setup(stage="test", annotated=False)
         self.trainer.predict(self.model, self.loaders.db_dataloader())
diff --git a/tests/conftest.py b/tests/conftest.py
index f20d7879..452316c8 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -299,6 +299,10 @@ def tiny_config(tmp_path):
             "-17.027": -17.026549,
             "+43.006-17.027": 25.980265,
         },
+        "allowed_mods": (
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     }
 
     cfg_file = tmp_path / "config.yml"
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index 2473a168..a31e2024 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -287,6 +287,10 @@ def test_digest_fasta_cleave(tiny_fasta_file):
             max_mods=0,
             precursor_tolerance=20,
             isotope_error=[0],
+            allowed_mods=(
+                "M+15.995,N+0.984,Q+0.984,"
+                "+42.011,+43.006,-17.027,+43.006-17.027"
+            ),
         )
         peptide_list = list(pdb.digest["peptide"])
         assert peptide_list == expected
@@ -356,6 +360,10 @@ def test_digest_fasta_mods(tiny_fasta_file):
         max_mods=1,
         precursor_tolerance=20,
         isotope_error=[0],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     peptide_list = list(pdb.digest["peptide"])
     peptide_list = [
@@ -389,6 +397,10 @@ def test_length_restrictions(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=20,
         isotope_error=[0],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     peptide_list = list(pdb.digest["peptide"])
     assert peptide_list == expected_long
@@ -403,6 +415,10 @@ def test_length_restrictions(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=20,
         isotope_error=[0],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     peptide_list = list(pdb.digest["peptide"])
     assert peptide_list == expected_short
@@ -433,6 +449,10 @@ def test_digest_fasta_enzyme(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=20,
         isotope_error=[0],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     peptide_list = list(pdb.digest["peptide"])
     assert peptide_list == expected_argc
@@ -447,6 +467,10 @@ def test_digest_fasta_enzyme(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=20,
         isotope_error=[0],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     peptide_list = list(pdb.digest["peptide"])
     assert peptide_list == expected_aspn
@@ -472,6 +496,10 @@ def test_get_candidates(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=10000,
         isotope_error=[0],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_smallwindow == candidates
@@ -486,6 +514,10 @@ def test_get_candidates(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=150000,
         isotope_error=[0],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_midwindow == candidates
@@ -500,6 +532,10 @@ def test_get_candidates(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=600000,
         isotope_error=[0],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_widewindow == candidates
@@ -563,6 +599,10 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=10000,
         isotope_error=[0],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     pdb.digest = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
@@ -578,6 +618,10 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=10000,
         isotope_error=[1],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     pdb.digest = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
@@ -593,6 +637,10 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=10000,
         isotope_error=[2],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     pdb.digest = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
@@ -608,6 +656,10 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=10000,
         isotope_error=[3],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     pdb.digest = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
@@ -623,6 +675,10 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=10000,
         isotope_error=[0, 1, 2, 3],
+        allowed_mods=(
+            "M+15.995,N+0.984,Q+0.984,"
+            "+42.011,+43.006,-17.027,+43.006-17.027"
+        ),
     )
     pdb.digest = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)

From 106c4ecc524c202a7624d6fa025afc82adac1a0c Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Wed, 28 Aug 2024 16:41:24 -0700
Subject: [PATCH 14/21] new comments addressed

---
 casanovo/config.yaml            |  22 +--
 casanovo/data/db_utils.py       | 276 +++++++++++++++++---------------
 casanovo/denovo/dataloaders.py  |  22 +--
 casanovo/denovo/model.py        |   2 +-
 casanovo/denovo/model_runner.py |   2 +-
 tests/conftest.py               |  25 ++-
 tests/test_integration.py       |   4 +-
 tests/unit_tests/test_unit.py   |  74 ++++-----
 8 files changed, 209 insertions(+), 218 deletions(-)

diff --git a/casanovo/config.yaml b/casanovo/config.yaml
index 87795db8..6c9063f5 100644
--- a/casanovo/config.yaml
+++ b/casanovo/config.yaml
@@ -11,13 +11,13 @@
 
 # Max absolute difference allowed with respect to observed precursor m/z.
 # denovo: Predictions outside the tolerance range are assigned a negative peptide score.
-# db-search: Used to create mas windows for candidate generation.
+# db-search: Select candidate peptides within the specified precursor m/z tolerance.
 precursor_mass_tol: 50  # ppm
 # Isotopes to consider when comparing predicted and observed precursor m/z's.
 isotope_error_range: [0, 1]
-# The minimum length of predicted/scored peptides.
+# The minimum length of considered peptides.
 min_peptide_len: 6
-# Number of spectra or psms in one inference batch.
+# Number of spectra in one inference batch.
 predict_batch_size: 1024
 
 
@@ -43,21 +43,21 @@ devices:
 ###
 
 # Enzyme for in silico digestion, used to generate candidate peptides.
-# See pyteomics.parser.expasy_rules for valid enzymes
+# See pyteomics.parser.expasy_rules for valid enzymes.
 enzyme: "trypsin"
 # Digestion type for candidate peptide generation.
-# full: standard digestion. semi: Include products of semi-specific cleavage
+# full: standard digestion. semi: Include products of semi-specific cleavage.
 digestion: "full"
-# Number of allowed missed cleavages when digesting protein
+# Number of allowed missed cleavages when digesting protein.
 missed_cleavages: 0
-# Maximum number of amino acid modifications per peptide.
+# Maximum number of amino acid modifications per peptide,
 # None generates all possible isoforms as candidates.
-max_mods: 
-# Maximum peptide length to consider
+max_mods: 0
+# Maximum peptide length to consider.
 max_peptide_len: 50
-# Toggle allowed modifications on/off
+# Select which modifications from the vocabulary can be used in candidate creation.
 # Permanent fixed mod (don't include): C+57.021
-# Allowed variable mods: M+15.995, N+0.984, Q+0.984, 
+# Allowed variable mods: M+15.995, N+0.984, Q+0.984
 # Allowed N-terminal mods: +42.011, +43.006, -17.027, +43.006-17.027
 allowed_mods: "M+15.995,N+0.984,Q+0.984,+42.011,+43.006,-17.027,+43.006-17.027"
 
diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 2bdf3828..c1d5e91e 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -2,11 +2,11 @@
 
 import logging
 import os
-from typing import List
+from typing import List, Tuple
 
 import depthcharge.masses
-from numba import jit
 import pandas as pd
+from numba import njit
 from pyteomics import fasta, parser
 
 logger = logging.getLogger("casanovo")
@@ -39,10 +39,10 @@ class ProteinDatabase:
         The maximum number of modifications to allow per peptide.
     precursor_tolerance : float
         The precursor mass tolerance in ppm.
-    isotope_error : List[int]
-        Isotopes to consider when comparing predicted and observed precursor m/z's.
-    allowed_mods : List[str]
-        A list of allowed modifications to consider.
+    isotope_error : Tuple[int, int]
+        Isotope range [min, max] to consider when comparing predicted and observed precursor m/z's.
+    allowed_mods : str
+        A comma separated string of allowed modifications to consider.
     """
 
     def __init__(
@@ -55,13 +55,11 @@ def __init__(
         max_peptide_len: int,
         max_mods: int,
         precursor_tolerance: float,
-        isotope_error: List[int],
-        allowed_mods: List[str],
+        isotope_error: Tuple[int, int],
+        allowed_mods: str,
     ):
-        self.fixed_mods, self.var_mods = self._construct_mods_dict(
-            allowed_mods
-        )
-        self.digest = self._digest_fasta(
+        self.fixed_mods, self.var_mods = _construct_mods_dict(allowed_mods)
+        self.db_peptides = self._digest_fasta(
             fasta_path,
             enzyme,
             digestion,
@@ -77,7 +75,7 @@ def get_candidates(
         self,
         precursor_mz: float,
         charge: int,
-    ):
+    ) -> List[Tuple[str, str]]:
         """
         Returns a list of candidate peptides that fall within the specified mass range.
 
@@ -96,18 +94,18 @@ def get_candidates(
         """
         candidates = []
 
-        for e in self.isotope_error:
+        for e in range(self.isotope_error[0], self.isotope_error[1] + 1):
             iso_shift = ISOTOPE_SPACING * e
             upper_bound = float(
-                ProteinDatabase._to_raw_mass(precursor_mz, charge) - iso_shift
+                _to_raw_mass(precursor_mz, charge) - iso_shift
             ) * (1 + (self.precursor_tolerance / 1e6))
             lower_bound = float(
-                ProteinDatabase._to_raw_mass(precursor_mz, charge) - iso_shift
+                _to_raw_mass(precursor_mz, charge) - iso_shift
             ) * (1 - (self.precursor_tolerance / 1e6))
 
-            window = self.digest[
-                (self.digest["calc_mass"] >= lower_bound)
-                & (self.digest["calc_mass"] <= upper_bound)
+            window = self.db_peptides[
+                (self.db_peptides["calc_mass"] >= lower_bound)
+                & (self.db_peptides["calc_mass"] <= upper_bound)
             ]
             candidates.append(window[["peptide", "calc_mass", "protein"]])
 
@@ -125,7 +123,7 @@ def _digest_fasta(
         max_mods: int,
         min_peptide_length: int,
         max_peptide_length: int,
-    ):
+    ) -> pd.DataFrame:
         """
         Digests a FASTA file and returns the peptides, their masses, and associated protein.
 
@@ -158,13 +156,18 @@ def _digest_fasta(
             logger.error("File %s does not exist.", fasta_filename)
             raise FileNotFoundError(f"File {fasta_filename} does not exist.")
 
-        fasta_data = fasta.read(fasta_filename)
         peptide_list = []
         if digestion not in ["full", "partial"]:
             logger.error("Digestion type %s not recognized.", digestion)
             raise ValueError(f"Digestion type {digestion} not recognized.")
+        if enzyme not in parser.expasy_rules:
+            logger.error(
+                "Enzyme %s not recognized. Must be in pyteomics.parser.expasy_rules",
+                enzyme,
+            )
+            raise ValueError(f"Enzyme {enzyme} not recognized.")
         semi = digestion == "partial"
-        for header, seq in fasta_data:
+        for header, seq in fasta.read(fasta_filename):
             pep_set = parser.cleave(
                 seq,
                 rule=parser.expasy_rules[enzyme],
@@ -182,136 +185,143 @@ def _digest_fasta(
                     aa in pep for aa in "BJOUXZ"
                 ):  # Check for incorrect AA letters
                     logger.warn(
-                        "Skipping peptide with ambiguous amino acids: %s", pep
+                        "Skipping peptide with unknown amino acids: %s", pep
                     )
                     continue
                 peptide_list.append((pep, protein))
 
         # Generate modified peptides
         mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
-        mod_peptide_list = []
-        for pep, prot in peptide_list:
-            peptide_isoforms = parser.isoforms(
-                pep,
-                variable_mods=self.var_mods,
-                fixed_mods=self.fixed_mods,
-                max_mods=max_mods,
-            )
-            peptide_isoforms = list(
-                map(ProteinDatabase._convert_from_modx, peptide_isoforms)
-            )
-            mod_peptide_list.extend(
-                [mod_pep, mass_calculator.mass(mod_pep), prot]
-                for mod_pep in peptide_isoforms
+        peptide_isoforms = [
+            (
+                parser.isoforms(
+                    pep,
+                    variable_mods=self.var_mods,
+                    fixed_mods=self.fixed_mods,
+                    max_mods=max_mods,
+                ),
+                prot,
             )
-
+            for pep, prot in peptide_list
+        ]
+        mod_peptide_list = [
+            (mod_pep, mass_calculator.mass(mod_pep), prot)
+            for isos, prot in peptide_isoforms
+            for mod_pep in map(_convert_from_modx, isos)
+        ]
         # Create a DataFrame for easy sorting and filtering
-        pdb_df = pd.DataFrame(
+        pep_table = pd.DataFrame(
             mod_peptide_list, columns=["peptide", "calc_mass", "protein"]
         )
-        pdb_df.sort_values(by=["calc_mass", "peptide"], inplace=True)
-
-        logger.info("Digestion complete. %d peptides generated.", len(pdb_df))
-        return pdb_df
+        pep_table.sort_values(by=["calc_mass", "peptide"], inplace=True)
 
-    def _construct_mods_dict(self, allowed_mods):
-        """
-        Constructs dictionaries of fixed and variable modifications.
+        logger.info(
+            "Digestion complete. %d peptides generated.", len(pep_table)
+        )
+        return pep_table
 
-        Parameters
-        ----------
-        allowed_mods : str
-            A comma-separated list of allowed modifications.
 
-        Returns
-        -------
-        fixed_mods : dict
-            A dictionary of fixed modifications.
-        var_mods : dict
-            A dictionary of variable modifications.
-        """
-        fixed_mods = {"carbm": ["C"]}
-        var_mods = {}
-
-        if allowed_mods is "" or None:
-            return fixed_mods, var_mods
-        for mod in allowed_mods.split(","):
-            if mod == "M+15.995":
-                if "ox" not in var_mods:
-                    var_mods["ox"] = []
-                var_mods["ox"].append("M")
-            elif mod == "N+0.984":
-                if "d" not in var_mods:
-                    var_mods["d"] = []
-                var_mods["d"].append("N")
-            elif mod == "Q+0.984":
-                if "d" not in var_mods:
-                    var_mods["d"] = []
-                var_mods["d"].append("Q")
-            elif mod == "+42.011":
-                var_mods["ace-"] = True
-            elif mod == "+43.006":
-                var_mods["carb-"] = True
-            elif mod == "-17.027":
-                var_mods["nh3x-"] = True
-            elif mod == "+43.006-17.027":
-                var_mods["carbnh3x-"] = True
-            else:
-                logger.error("Modification %s not recognized.", mod)
-                raise ValueError(f"Modification {mod} not recognized.")
+@njit
+def _to_mz(precursor_mass, charge):
+    """
+    Convert precursor neutral mass to m/z value.
 
-        return fixed_mods, var_mods
+    Parameters
+    ----------
+    precursor_mass : float
+        The precursor neutral mass.
+    charge : int
+        The precursor charge.
+
+    Returns
+    -------
+    mz : float
+        The calculated precursor mass-to-charge ratio.
+    """
+    return (precursor_mass + (charge * PROTON)) / charge
 
-    @jit
-    def _to_mz(precursor_mass, charge):
-        """
-        Convert precursor neutral mass to m/z value.
 
-        Parameters
-        ----------
-        precursor_mass : float
-            The precursor neutral mass.
-        charge : int
-            The precursor charge.
+@njit
+def _to_raw_mass(mz_mass, charge):
+    """
+    Convert precursor m/z value to neutral mass.
 
-        Returns
-        -------
-        mz : float
-            The calculated precursor mass-to-charge ratio.
-        """
-        return (precursor_mass + (charge * PROTON)) / charge
+    Parameters
+    ----------
+    mz_mass : float
+        The precursor mass-to-charge ratio.
+    charge : int
+        The precursor charge.
+
+    Returns
+    -------
+    mass : float
+        The calculated precursor neutral mass.
+    """
+    return charge * (mz_mass - PROTON)
 
-    @jit
-    def _to_raw_mass(mz_mass, charge):
-        """
-        Convert precursor m/z value to neutral mass.
 
-        Parameters
-        ----------
-        mz_mass : float
-            The precursor mass-to-charge ratio.
-        charge : int
-            The precursor charge.
+def _convert_from_modx(seq: str):
+    """Converts peptide sequence from modX format to Casanovo-acceptable modifications.
 
-        Returns
-        -------
-        mass : float
-            The calculated precursor neutral mass.
-        """
-        return charge * (mz_mass - PROTON)
+    Args:
+        seq (str): Peptide in modX format
+    """
+    seq = seq.replace("carbmC", "C+57.021")  # Fixed modification
+    seq = seq.replace("oxM", "M+15.995")
+    seq = seq.replace("dN", "N+0.984")
+    seq = seq.replace("dQ", "Q+0.984")
+    seq = seq.replace("ace-", "+42.011")
+    seq = seq.replace("carbnh3x-", "+43.006-17.027")
+    seq = seq.replace("carb-", "+43.006")
+    seq = seq.replace("nh3x-", "-17.027")
+    return seq
+
+
+def _construct_mods_dict(allowed_mods):
+    """
+    Constructs dictionaries of fixed and variable modifications.
 
-    def _convert_from_modx(seq: str):
-        """Converts peptide sequence from modX format to Casanovo-acceptable modifications.
+    Parameters
+    ----------
+    allowed_mods : str
+        A comma-separated list of allowed modifications.
+
+    Returns
+    -------
+    fixed_mods : dict
+        A dictionary of fixed modifications.
+    var_mods : dict
+        A dictionary of variable modifications.
+    """
+    fixed_mods = {"carbm": ["C"]}
+    var_mods = {}
 
-        Args:
-            seq (str): Peptide in modX format
-        """
-        seq = seq.replace("carbmC", "C+57.021")  # Fixed modification
-        seq = seq.replace("oxM", "M+15.995")
-        seq = seq.replace("dN", "N+0.984")
-        seq = seq.replace("dQ", "Q+0.984")
-        seq = seq.replace("ace-", "+42.011")
-        seq = seq.replace("carbnh3x-", "+43.006-17.027")
-        seq = seq.replace("carb-", "+43.006")
-        seq = seq.replace("nh3x-", "-17.027")
-        return seq
+    if not allowed_mods:
+        return fixed_mods, var_mods
+    for mod in allowed_mods.split(","):
+        if mod == "M+15.995":
+            if "ox" not in var_mods:
+                var_mods["ox"] = []
+            var_mods["ox"].append("M")
+        elif mod == "N+0.984":
+            if "d" not in var_mods:
+                var_mods["d"] = []
+            var_mods["d"].append("N")
+        elif mod == "Q+0.984":
+            if "d" not in var_mods:
+                var_mods["d"] = []
+            var_mods["d"].append("Q")
+        elif mod == "+42.011":
+            var_mods["ace-"] = True
+        elif mod == "+43.006":
+            var_mods["carb-"] = True
+        elif mod == "-17.027":
+            var_mods["nh3x-"] = True
+        elif mod == "+43.006-17.027":
+            var_mods["carbnh3x-"] = True
+        else:
+            logger.error("Modification %s not recognized.", mod)
+            raise ValueError(f"Modification {mod} not recognized.")
+
+    return fixed_mods, var_mods
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index 2d9e200b..a6ab8ddc 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -1,14 +1,14 @@
 """Data loaders for the de novo sequencing task."""
 
 import functools
-import os
 import logging
+import os
 from typing import List, Optional, Tuple
 
-from depthcharge.data import AnnotatedSpectrumIndex
 import lightning.pytorch as pl
 import numpy as np
 import torch
+from depthcharge.data import AnnotatedSpectrumIndex
 
 from ..data import db_utils
 from ..data.datasets import (
@@ -89,7 +89,7 @@ def __init__(
         self.train_dataset = None
         self.valid_dataset = None
         self.test_dataset = None
-        self.pdb = None
+        self.protein_database = None
 
     def setup(self, stage: str = None, annotated: bool = True) -> None:
         """
@@ -187,7 +187,9 @@ def db_dataloader(self) -> torch.utils.data.DataLoader:
         return torch.utils.data.DataLoader(
             self.test_dataset,
             batch_size=self.eval_batch_size,
-            collate_fn=functools.partial(prepare_psm_batch, pdb=self.pdb),
+            collate_fn=functools.partial(
+                prepare_psm_batch, protein_database=self.protein_database
+            ),
             pin_memory=True,
             num_workers=self.n_workers,
             shuffle=False,
@@ -235,8 +237,8 @@ def prepare_batch(
 
 def prepare_psm_batch(
     batch: List[Tuple[torch.Tensor, float, int, str]],
-    pdb: db_utils.ProteinDatabase,
-):
+    protein_database: db_utils.ProteinDatabase,
+) -> Tuple[torch.Tensor, torch.Tensor, np.ndarray, List[str], List[str]]:
     """
     Collate MS/MS spectra into a batch for DB search.
 
@@ -249,7 +251,7 @@ def prepare_psm_batch(
         A batch of data from an AnnotatedSpectrumDataset, consisting of for each
         spectrum (i) a tensor with the m/z and intensity peak values, (ii), the
         precursor m/z, (iii) the precursor charge, (iv) the spectrum identifier.
-    pdb : db_utils.ProteinDatabase
+    protein_database : db_utils.ProteinDatabase
         The protein database to use for candidate peptide retrieval.
 
     Returns
@@ -283,9 +285,9 @@ def prepare_psm_batch(
     all_peptides = []
     all_proteins = []
     for idx in range(len(batch)):
-        digest_data = pdb.get_candidates(
-            float(precursor_mzs[idx]),
-            float(precursor_charges[idx]),
+        digest_data = protein_database.get_candidates(
+            precursor_mzs[idx].type(torch.float64).item(),
+            precursor_charges[idx].type(torch.int64).item(),
         )
         try:
             spec_peptides, pep_protein = digest_data
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 79848682..b38a27c0 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -991,7 +991,7 @@ def configure_optimizers(
 
 class DbSpec2Pep(Spec2Pep):
     """
-    Subclass of Spec2Pep for the use of Casanovo as an \
+    Subclass of Spec2Pep for the use of Casanovo as an
     MS/MS database search score function.
 
     Uses teacher forcing to 'query' Casanovo for its score for each AA
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index 789c960b..6928560d 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -114,7 +114,7 @@ def db_search(
         self.writer.set_ms_run(test_index.ms_files)
 
         self.initialize_data_module(test_index=test_index)
-        self.loaders.pdb = db_utils.ProteinDatabase(
+        self.loaders.protein_database = db_utils.ProteinDatabase(
             fasta_path,
             self.config.enzyme,
             self.config.digestion,
diff --git a/tests/conftest.py b/tests/conftest.py
index 452316c8..90e522fe 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -28,7 +28,7 @@ def tiny_fasta_file(tmp_path):
 
 
 @pytest.fixture
-def mgf_db_search(tmp_path):
+def mgf_medium(tmp_path):
     """An MGF file with 7 spectra and scan numbers, C+57.021 mass modification considered"""
     peptides = [
         "ATSIPAR",
@@ -40,10 +40,10 @@ def mgf_db_search(tmp_path):
         "FSGSGSGTDFTLTISSLQPEDFAVYYCQQDYNLP",
     ]
     mgf_file = tmp_path / "db_search.mgf"
-    return _create_mgf(peptides, mgf_file, c_mod=True)
+    return _create_mgf(peptides, mgf_file, mod_aa_mass={"C": 160.030649})
 
 
-def _create_mgf(peptides, mgf_file, random_state=42, c_mod=False):
+def _create_mgf(peptides, mgf_file, random_state=42, mod_aa_mass=None):
     """
     Create a fake MGF file from one or more peptides.
 
@@ -55,9 +55,9 @@ def _create_mgf(peptides, mgf_file, random_state=42, c_mod=False):
         The MGF file to create.
     random_state : int or numpy.random.Generator, optional
         The random seed. The charge states are chosen to be 2 or 3 randomly.
-    c_mod : bool, optional
-        Whether to use the constant carbamidomethylation
-        of C in mass calculations.
+    mod_aa_mass : dict, optional
+        A dictionary that specifies the modified masses of amino acids.
+        e.g. {"C": 160.030649} for carbamidomethylated C.
 
     Returns
     -------
@@ -65,7 +65,7 @@ def _create_mgf(peptides, mgf_file, random_state=42, c_mod=False):
     """
     rng = np.random.default_rng(random_state)
     entries = [
-        _create_mgf_entry(p, rng.choice([2, 3]), c_mod) for p in peptides
+        _create_mgf_entry(p, rng.choice([2, 3]), mod_aa_mass) for p in peptides
     ]
     with mgf_file.open("w+") as mgf_ref:
         mgf_ref.write("\n".join(entries))
@@ -73,7 +73,7 @@ def _create_mgf(peptides, mgf_file, random_state=42, c_mod=False):
     return mgf_file
 
 
-def _create_mgf_entry(peptide, charge=2, c_mod=False):
+def _create_mgf_entry(peptide, charge=2, mod_aa_mass=None):
     """
     Create a MassIVE-KB style MGF entry for a single PSM.
 
@@ -83,20 +83,19 @@ def _create_mgf_entry(peptide, charge=2, c_mod=False):
         A peptide sequence.
     charge : int, optional
         The peptide charge state.
-    c_mod : bool, optional
-        Whether to use the constant carbamidomethylation
-        of C in mass calculations.
+    mod_aa_mass : dict, optional
+        A dictionary that specifies the modified masses of amino acids.
 
     Returns
     -------
     str
         The PSM entry in an MGF file format.
     """
-    if not c_mod:
+    if mod_aa_mass is None:
         precursor_mz = calculate_mass(peptide, charge=int(charge))
     else:
         aa_mass = std_aa_mass
-        aa_mass.update({"C": 160.030649})  # Carbamidomethylated C mass
+        aa_mass.update(mod_aa_mass)
         precursor_mz = fast_mass(peptide, charge=int(charge), aa_mass=aa_mass)
     mzs, intensities = _peptide_to_peaks(peptide, charge)
     frags = "\n".join([f"{m} {i}" for m, i in zip(mzs, intensities)])
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 61f735c3..4275d792 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -8,7 +8,7 @@
 
 
 def test_db_search(
-    mgf_db_search, tiny_fasta_file, tiny_config, tmp_path, monkeypatch
+    mgf_medium, tiny_fasta_file, tiny_config, tmp_path, monkeypatch
 ):
     # Run a command:
     monkeypatch.setattr(casanovo, "__version__", "4.1.0")
@@ -24,7 +24,7 @@ def test_db_search(
         tiny_config,
         "--output",
         str(output_path),
-        str(mgf_db_search),
+        str(mgf_medium),
         str(tiny_fasta_file),
     ]
 
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index a31e2024..51d9a3c9 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -286,13 +286,13 @@ def test_digest_fasta_cleave(tiny_fasta_file):
             max_peptide_len=50,
             max_mods=0,
             precursor_tolerance=20,
-            isotope_error=[0],
+            isotope_error=[0, 0],
             allowed_mods=(
                 "M+15.995,N+0.984,Q+0.984,"
                 "+42.011,+43.006,-17.027,+43.006-17.027"
             ),
         )
-        peptide_list = list(pdb.digest["peptide"])
+        peptide_list = list(pdb.db_peptides["peptide"])
         assert peptide_list == expected
 
 
@@ -359,13 +359,13 @@ def test_digest_fasta_mods(tiny_fasta_file):
         max_peptide_len=50,
         max_mods=1,
         precursor_tolerance=20,
-        isotope_error=[0],
+        isotope_error=[0, 0],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
         ),
     )
-    peptide_list = list(pdb.digest["peptide"])
+    peptide_list = list(pdb.db_peptides["peptide"])
     peptide_list = [
         x
         for x in peptide_list
@@ -396,13 +396,13 @@ def test_length_restrictions(tiny_fasta_file):
         max_peptide_len=50,
         max_mods=0,
         precursor_tolerance=20,
-        isotope_error=[0],
+        isotope_error=[0, 0],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
         ),
     )
-    peptide_list = list(pdb.digest["peptide"])
+    peptide_list = list(pdb.db_peptides["peptide"])
     assert peptide_list == expected_long
 
     pdb = db_utils.ProteinDatabase(
@@ -414,13 +414,13 @@ def test_length_restrictions(tiny_fasta_file):
         max_peptide_len=8,
         max_mods=0,
         precursor_tolerance=20,
-        isotope_error=[0],
+        isotope_error=[0, 0],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
         ),
     )
-    peptide_list = list(pdb.digest["peptide"])
+    peptide_list = list(pdb.db_peptides["peptide"])
     assert peptide_list == expected_short
 
 
@@ -448,13 +448,13 @@ def test_digest_fasta_enzyme(tiny_fasta_file):
         max_peptide_len=50,
         max_mods=0,
         precursor_tolerance=20,
-        isotope_error=[0],
+        isotope_error=[0, 0],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
         ),
     )
-    peptide_list = list(pdb.digest["peptide"])
+    peptide_list = list(pdb.db_peptides["peptide"])
     assert peptide_list == expected_argc
 
     pdb = db_utils.ProteinDatabase(
@@ -466,13 +466,13 @@ def test_digest_fasta_enzyme(tiny_fasta_file):
         max_peptide_len=50,
         max_mods=0,
         precursor_tolerance=20,
-        isotope_error=[0],
+        isotope_error=[0, 0],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
         ),
     )
-    peptide_list = list(pdb.digest["peptide"])
+    peptide_list = list(pdb.db_peptides["peptide"])
     assert peptide_list == expected_aspn
 
 
@@ -495,7 +495,7 @@ def test_get_candidates(tiny_fasta_file):
         max_peptide_len=50,
         max_mods=0,
         precursor_tolerance=10000,
-        isotope_error=[0],
+        isotope_error=[0, 0],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
@@ -513,7 +513,7 @@ def test_get_candidates(tiny_fasta_file):
         max_peptide_len=50,
         max_mods=0,
         precursor_tolerance=150000,
-        isotope_error=[0],
+        isotope_error=[0, 0],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
@@ -531,7 +531,7 @@ def test_get_candidates(tiny_fasta_file):
         max_peptide_len=50,
         max_mods=0,
         precursor_tolerance=600000,
-        isotope_error=[0],
+        isotope_error=[0, 0],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
@@ -584,9 +584,8 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
     peptide_list.sort_values("calc_mass", inplace=True)
 
     expected_isotope0 = list("UTSRQPONMLKJIHGFEDCB")
-    expected_isotope1 = list("VUTSRQPONMLKJIHGFEDC")
-    expected_isotope2 = list("WVUTSRQPONMLKJIHGFED")
-    expected_isotope3 = list("XWVUTSRQPONMLKJIHGFE")
+    expected_isotope01 = list("VUTSRQPONMLKJIHGFEDCB")
+    expected_isotope012 = list("WVUTSRQPONMLKJIHGFEDCB")
     expected_isotope0123 = list("XWVUTSRQPONMLKJIHGFEDCB")
 
     pdb = db_utils.ProteinDatabase(
@@ -598,13 +597,13 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_peptide_len=0,
         max_mods=0,
         precursor_tolerance=10000,
-        isotope_error=[0],
+        isotope_error=[0, 0],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
         ),
     )
-    pdb.digest = peptide_list
+    pdb.db_peptides = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_isotope0 == candidates
 
@@ -617,15 +616,15 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_peptide_len=0,
         max_mods=0,
         precursor_tolerance=10000,
-        isotope_error=[1],
+        isotope_error=[0, 1],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
         ),
     )
-    pdb.digest = peptide_list
+    pdb.db_peptides = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    assert expected_isotope1 == candidates
+    assert expected_isotope01 == candidates
 
     pdb = db_utils.ProteinDatabase(
         fasta_path=str(tiny_fasta_file),
@@ -636,15 +635,15 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_peptide_len=0,
         max_mods=0,
         precursor_tolerance=10000,
-        isotope_error=[2],
+        isotope_error=[0, 2],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
         ),
     )
-    pdb.digest = peptide_list
+    pdb.db_peptides = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    assert expected_isotope2 == candidates
+    assert expected_isotope012 == candidates
 
     pdb = db_utils.ProteinDatabase(
         fasta_path=str(tiny_fasta_file),
@@ -655,32 +654,13 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_peptide_len=0,
         max_mods=0,
         precursor_tolerance=10000,
-        isotope_error=[3],
+        isotope_error=[0, 3],
         allowed_mods=(
             "M+15.995,N+0.984,Q+0.984,"
             "+42.011,+43.006,-17.027,+43.006-17.027"
         ),
     )
-    pdb.digest = peptide_list
-    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    assert expected_isotope3 == candidates
-
-    pdb = db_utils.ProteinDatabase(
-        fasta_path=str(tiny_fasta_file),
-        enzyme="trypsin",
-        digestion="full",
-        missed_cleavages=0,
-        min_peptide_len=0,
-        max_peptide_len=0,
-        max_mods=0,
-        precursor_tolerance=10000,
-        isotope_error=[0, 1, 2, 3],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
-        ),
-    )
-    pdb.digest = peptide_list
+    pdb.db_peptides = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_isotope0123 == candidates
 

From 0dfdb2cb89514a0189e20cf19c231363567a7c72 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Mon, 2 Sep 2024 17:48:31 -0700
Subject: [PATCH 15/21] final adjustments added

---
 casanovo/config.yaml            |  19 ++--
 casanovo/data/db_utils.py       | 158 +++++++++++++++++++-------------
 casanovo/denovo/dataloaders.py  |   7 +-
 casanovo/denovo/model.py        |   4 +-
 casanovo/denovo/model_runner.py |  17 ++--
 tests/conftest.py               |  43 ++++++++-
 tests/unit_tests/test_unit.py   | 151 +++++++++++++++++++-----------
 7 files changed, 254 insertions(+), 145 deletions(-)

diff --git a/casanovo/config.yaml b/casanovo/config.yaml
index 6c9063f5..af2f79d1 100644
--- a/casanovo/config.yaml
+++ b/casanovo/config.yaml
@@ -17,6 +17,8 @@ precursor_mass_tol: 50  # ppm
 isotope_error_range: [0, 1]
 # The minimum length of considered peptides.
 min_peptide_len: 6
+# The maximum length of considered peptides.
+max_length: 100
 # Number of spectra in one inference batch.
 predict_batch_size: 1024
 
@@ -47,19 +49,20 @@ devices:
 enzyme: "trypsin"
 # Digestion type for candidate peptide generation.
 # full: standard digestion. semi: Include products of semi-specific cleavage.
+# Can also take a regex expression to specify custom digestion rules.
 digestion: "full"
 # Number of allowed missed cleavages when digesting protein.
 missed_cleavages: 0
 # Maximum number of amino acid modifications per peptide,
 # None generates all possible isoforms as candidates.
-max_mods: 0
-# Maximum peptide length to consider.
-max_peptide_len: 50
+max_mods: 1
 # Select which modifications from the vocabulary can be used in candidate creation.
-# Permanent fixed mod (don't include): C+57.021
-# Allowed variable mods: M+15.995, N+0.984, Q+0.984
-# Allowed N-terminal mods: +42.011, +43.006, -17.027, +43.006-17.027
-allowed_mods: "M+15.995,N+0.984,Q+0.984,+42.011,+43.006,-17.027,+43.006-17.027"
+# Format: Comma-separated list of "aa:mod_residue", 
+# where aa is a standard amino acid or "X" for an N-terminal mod
+# and mod_residue is a key from the "residues" dictionary.
+# Example: "M:M+15.995,X:+43.006-17.027"
+allowed_fixed_mods: "C:C+57.021"
+allowed_var_mods: "M:M+15.995,N:N+0.984,Q:Q+0.984,X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
 
 
 ###
@@ -111,8 +114,6 @@ dropout: 0.0
 # Number of dimensions to use for encoding peak intensity.
 # Projected up to `dim_model` by default and summed with the peak m/z encoding.
 dim_intensity:
-# Max decoded peptide length.
-max_length: 100
 # The number of iterations for the linear warm-up of the learning rate.
 warmup_iters: 100_000
 # The number of iterations for the cosine half period of the learning rate.
diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index c1d5e91e..c9201538 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -1,13 +1,17 @@
 """Unique methods used within db-search mode"""
 
+import functools
 import logging
 import os
+import re
+import string
 from typing import List, Tuple
 
 import depthcharge.masses
 import pandas as pd
+import pyteomics.fasta as fasta
+import pyteomics.parser as parser
 from numba import njit
-from pyteomics import fasta, parser
 
 logger = logging.getLogger("casanovo")
 
@@ -41,8 +45,12 @@ class ProteinDatabase:
         The precursor mass tolerance in ppm.
     isotope_error : Tuple[int, int]
         Isotope range [min, max] to consider when comparing predicted and observed precursor m/z's.
-    allowed_mods : str
-        A comma separated string of allowed modifications to consider.
+    allowed_fixed_mods : str
+        A comma separated string of fixed modifications to consider.
+    allowed_var_mods : str
+        A comma separated string of variable modifications to consider.
+    residues : dict
+        A dictionary of amino acid masses.
     """
 
     def __init__(
@@ -56,9 +64,14 @@ def __init__(
         max_mods: int,
         precursor_tolerance: float,
         isotope_error: Tuple[int, int],
-        allowed_mods: str,
+        allowed_fixed_mods: str,
+        allowed_var_mods: str,
+        residues: dict,
     ):
-        self.fixed_mods, self.var_mods = _construct_mods_dict(allowed_mods)
+        self.residues = residues
+        self.fixed_mods, self.var_mods, self.swap_map = _construct_mods_dict(
+            allowed_fixed_mods, allowed_var_mods
+        )
         self.db_peptides = self._digest_fasta(
             fasta_path,
             enzyme,
@@ -88,20 +101,22 @@ def get_candidates(
 
         Returns
         -------
-        candidates : List[Tuple[str, str]]
-            A list of candidate peptides and associated
-            protein.
+        candidates : pd.Series
+            A series of candidate peptides.
         """
         candidates = []
 
         for e in range(self.isotope_error[0], self.isotope_error[1] + 1):
             iso_shift = ISOTOPE_SPACING * e
-            upper_bound = float(
-                _to_raw_mass(precursor_mz, charge) - iso_shift
-            ) * (1 + (self.precursor_tolerance / 1e6))
-            lower_bound = float(
+            shift_raw_mass = float(
                 _to_raw_mass(precursor_mz, charge) - iso_shift
-            ) * (1 - (self.precursor_tolerance / 1e6))
+            )
+            upper_bound = shift_raw_mass * (
+                1 + (self.precursor_tolerance / 1e6)
+            )
+            lower_bound = shift_raw_mass * (
+                1 - (self.precursor_tolerance / 1e6)
+            )
 
             window = self.db_peptides[
                 (self.db_peptides["calc_mass"] >= lower_bound)
@@ -112,7 +127,25 @@ def get_candidates(
         candidates = pd.concat(candidates)
         candidates.drop_duplicates(inplace=True)
         candidates.sort_values(by=["calc_mass", "peptide"], inplace=True)
-        return list(candidates["peptide"]), list(candidates["protein"])
+        return candidates["peptide"], candidates["protein"]
+
+    def get_associated_protein(self, peptide: str) -> str:
+        """
+        Returns the associated protein for a given peptide.
+
+        Parameters
+        ----------
+        peptide : str
+            The peptide sequence.
+
+        Returns
+        -------
+        protein : str
+            The associated protein.
+        """
+        return self.db_peptides[self.db_peptides["peptide"] == peptide][
+            "protein"
+        ].values[0]
 
     def _digest_fasta(
         self,
@@ -161,16 +194,18 @@ def _digest_fasta(
             logger.error("Digestion type %s not recognized.", digestion)
             raise ValueError(f"Digestion type {digestion} not recognized.")
         if enzyme not in parser.expasy_rules:
-            logger.error(
-                "Enzyme %s not recognized. Must be in pyteomics.parser.expasy_rules",
+            logger.info(
+                "Enzyme %s not recognized. Interpreting as cleavage rule.",
                 enzyme,
             )
-            raise ValueError(f"Enzyme {enzyme} not recognized.")
         semi = digestion == "partial"
+        valid_aa = set(
+            [re.sub(r"[^A-Z]+", "", res) for res in self.residues.keys()]
+        )
         for header, seq in fasta.read(fasta_filename):
             pep_set = parser.cleave(
                 seq,
-                rule=parser.expasy_rules[enzyme],
+                rule=enzyme,
                 missed_cleavages=missed_cleavages,
                 semi=semi,
             )
@@ -181,9 +216,8 @@ def _digest_fasta(
                     or len(pep) > max_peptide_length
                 ):
                     continue
-                if any(
-                    aa in pep for aa in "BJOUXZ"
-                ):  # Check for incorrect AA letters
+
+                if any(aa not in valid_aa for aa in pep):
                     logger.warn(
                         "Skipping peptide with unknown amino acids: %s", pep
                     )
@@ -207,7 +241,10 @@ def _digest_fasta(
         mod_peptide_list = [
             (mod_pep, mass_calculator.mass(mod_pep), prot)
             for isos, prot in peptide_isoforms
-            for mod_pep in map(_convert_from_modx, isos)
+            for mod_pep in map(
+                functools.partial(_convert_from_modx, swap_map=self.swap_map),
+                isos,
+            )
         ]
         # Create a DataFrame for easy sorting and filtering
         pep_table = pd.DataFrame(
@@ -261,31 +298,29 @@ def _to_raw_mass(mz_mass, charge):
     return charge * (mz_mass - PROTON)
 
 
-def _convert_from_modx(seq: str):
+def _convert_from_modx(seq: str, swap_map: dict) -> str:
     """Converts peptide sequence from modX format to Casanovo-acceptable modifications.
 
     Args:
-        seq (str): Peptide in modX format
+        seq : str
+            Peptide in modX format
+        swap_map : dict
+            Dictionary that allows for swapping of modX to Casanovo-acceptable modifications.
     """
-    seq = seq.replace("carbmC", "C+57.021")  # Fixed modification
-    seq = seq.replace("oxM", "M+15.995")
-    seq = seq.replace("dN", "N+0.984")
-    seq = seq.replace("dQ", "Q+0.984")
-    seq = seq.replace("ace-", "+42.011")
-    seq = seq.replace("carbnh3x-", "+43.006-17.027")
-    seq = seq.replace("carb-", "+43.006")
-    seq = seq.replace("nh3x-", "-17.027")
-    return seq
-
-
-def _construct_mods_dict(allowed_mods):
+    regex = re.compile("(%s)" % "|".join(map(re.escape, swap_map.keys())))
+    return regex.sub(lambda x: swap_map[x.group()], seq)
+
+
+def _construct_mods_dict(allowed_fixed_mods, allowed_var_mods):
     """
     Constructs dictionaries of fixed and variable modifications.
 
     Parameters
     ----------
-    allowed_mods : str
-        A comma-separated list of allowed modifications.
+    allowed_fixed_mods : str
+        A comma separated string of fixed modifications to consider.
+    allowed_var_mods : str
+        A comma separated string of variable modifications to consider.
 
     Returns
     -------
@@ -293,35 +328,26 @@ def _construct_mods_dict(allowed_mods):
         A dictionary of fixed modifications.
     var_mods : dict
         A dictionary of variable modifications.
+    swap_map : dict
+        A dictionary that allows for swapping of modX to Casanovo-acceptable modifications.
     """
-    fixed_mods = {"carbm": ["C"]}
-    var_mods = {}
+    swap_map = {}
+    fixed_mods = {}
+    for idx, mod in enumerate(allowed_fixed_mods.split(",")):
+        aa, mod_aa = mod.split(":")
+        mod_id = string.ascii_lowercase[idx]
+        fixed_mods[mod_id] = [aa]
+        swap_map[f"{mod_id}{aa}"] = f"{mod_aa}"
 
-    if not allowed_mods:
-        return fixed_mods, var_mods
-    for mod in allowed_mods.split(","):
-        if mod == "M+15.995":
-            if "ox" not in var_mods:
-                var_mods["ox"] = []
-            var_mods["ox"].append("M")
-        elif mod == "N+0.984":
-            if "d" not in var_mods:
-                var_mods["d"] = []
-            var_mods["d"].append("N")
-        elif mod == "Q+0.984":
-            if "d" not in var_mods:
-                var_mods["d"] = []
-            var_mods["d"].append("Q")
-        elif mod == "+42.011":
-            var_mods["ace-"] = True
-        elif mod == "+43.006":
-            var_mods["carb-"] = True
-        elif mod == "-17.027":
-            var_mods["nh3x-"] = True
-        elif mod == "+43.006-17.027":
-            var_mods["carbnh3x-"] = True
+    var_mods = {}
+    for idx, mod in enumerate(allowed_var_mods.split(",")):
+        aa, mod_aa = mod.split(":")
+        mod_id = string.ascii_lowercase[idx]
+        if aa == "X":
+            var_mods[f"{mod_id}-"] = True
+            swap_map[f"{mod_id}-"] = f"{mod_aa}"
         else:
-            logger.error("Modification %s not recognized.", mod)
-            raise ValueError(f"Modification {mod} not recognized.")
+            var_mods[mod_id] = [aa]
+            swap_map[f"{mod_id}{aa}"] = f"{mod_aa}"
 
-    return fixed_mods, var_mods
+    return fixed_mods, var_mods, swap_map
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index a6ab8ddc..6e8c93b3 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -267,7 +267,7 @@ def prepare_psm_batch(
     all_peptides : List[str]
         The candidate peptides for each spectrum.
     all_proteins : List[str]
-        The associated proteins for each candidate peptide.
+        The proteins associated with each candidate peptide.
     """
     spectra, precursor_mzs, precursor_charges, spectrum_ids = list(zip(*batch))
     spectra = torch.nn.utils.rnn.pad_sequence(spectra, batch_first=True)
@@ -285,12 +285,11 @@ def prepare_psm_batch(
     all_peptides = []
     all_proteins = []
     for idx in range(len(batch)):
-        digest_data = protein_database.get_candidates(
+        spec_peptides, spec_proteins = protein_database.get_candidates(
             precursor_mzs[idx].type(torch.float64).item(),
             precursor_charges[idx].type(torch.int64).item(),
         )
         try:
-            spec_peptides, pep_protein = digest_data
             all_spectra.append(
                 spectra[idx].unsqueeze(0).repeat(len(spec_peptides), 1, 1)
             )
@@ -299,7 +298,7 @@ def prepare_psm_batch(
             )
             all_spectrum_ids.extend([spectrum_ids[idx]] * len(spec_peptides))
             all_peptides.extend(spec_peptides)
-            all_proteins.extend(pep_protein)
+            all_proteins.extend(spec_proteins)
         except ValueError:
             logger.warning(
                 "No candidates found for spectrum %s", spectrum_ids[idx]
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index b38a27c0..dc7e5f7b 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1017,9 +1017,9 @@ def predict_step(self, batch, *args):
 
         Parameters
         ----------
-        batch : Tuple[torch.Tensor, torch.Tensor, np.array, List[str], List[str]]
+        batch : Tuple[torch.Tensor, torch.Tensor, np.array, List[str]]
             A batch of (i) MS/MS spectra, (ii) precursor information, (iii)
-            spectrum identifiers, (iv) candidate peptides, (v) associated proteins.
+            spectrum identifiers, (iv) candidate peptides, (v) associated protein.
 
         Returns
         -------
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index 6928560d..395320e5 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -110,22 +110,25 @@ def db_search(
         self.initialize_model(train=False, db_search=True)
         self.model.out_writer = self.writer
         self.model.psm_batch_size = self.config.predict_batch_size
-        test_index = self._get_index(peak_path, False, "db search")
-        self.writer.set_ms_run(test_index.ms_files)
-
-        self.initialize_data_module(test_index=test_index)
-        self.loaders.protein_database = db_utils.ProteinDatabase(
+        self.model.protein_database = db_utils.ProteinDatabase(
             fasta_path,
             self.config.enzyme,
             self.config.digestion,
             self.config.missed_cleavages,
             self.config.min_peptide_len,
-            self.config.max_peptide_len,
+            self.config.max_length,
             self.config.max_mods,
             self.config.precursor_mass_tol,
             self.config.isotope_error_range,
-            self.config.allowed_mods,
+            self.config.allowed_fixed_mods,
+            self.config.allowed_var_mods,
+            self.config.residues,
         )
+        test_index = self._get_index(peak_path, False, "db search")
+        self.writer.set_ms_run(test_index.ms_files)
+
+        self.initialize_data_module(test_index=test_index)
+        self.loaders.protein_database = self.model.protein_database
         self.loaders.setup(stage="test", annotated=False)
         self.trainer.predict(self.model, self.loaders.db_dataloader())
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 90e522fe..3b94896a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -241,7 +241,7 @@ def tiny_config(tmp_path):
         "precursor_mass_tol": 5,
         "isotope_error_range": [0, 1],
         "min_peptide_len": 6,
-        "max_peptide_len": 50,
+        "max_length": 100,
         "enzyme": "trypsin",
         "digestion": "full",
         "missed_cleavages": 0,
@@ -263,7 +263,6 @@ def tiny_config(tmp_path):
         "dim_model": 512,
         "dropout": 0.0,
         "dim_intensity": None,
-        "max_length": 100,
         "learning_rate": 5e-4,
         "weight_decay": 1e-5,
         "train_batch_size": 32,
@@ -298,9 +297,10 @@ def tiny_config(tmp_path):
             "-17.027": -17.026549,
             "+43.006-17.027": 25.980265,
         },
-        "allowed_mods": (
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        "allowed_fixed_mods": "C:C+57.021",
+        "allowed_var_mods": (
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
     }
 
@@ -311,6 +311,39 @@ def tiny_config(tmp_path):
     return cfg_file
 
 
+@pytest.fixture
+def residues_dict():
+    return {
+        "G": 57.021464,
+        "A": 71.037114,
+        "S": 87.032028,
+        "P": 97.052764,
+        "V": 99.068414,
+        "T": 101.047670,
+        "C+57.021": 160.030649,
+        "L": 113.084064,
+        "I": 113.084064,
+        "N": 114.042927,
+        "D": 115.026943,
+        "Q": 128.058578,
+        "K": 128.094963,
+        "E": 129.042593,
+        "M": 131.040485,
+        "H": 137.058912,
+        "F": 147.068414,
+        "R": 156.101111,
+        "Y": 163.063329,
+        "W": 186.079313,
+        "M+15.995": 147.035400,
+        "N+0.984": 115.026943,
+        "Q+0.984": 129.042594,
+        "+42.011": 42.010565,
+        "+43.006": 43.005814,
+        "-17.027": -17.026549,
+        "+43.006-17.027": 25.980265,
+    }
+
+
 @pytest.fixture
 def tide_dir_small(tmp_path):
     """A directory with a very small TIDE search result."""
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index 51d9a3c9..c06ec788 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -221,7 +221,7 @@ def test_calc_match_score():
     assert np.sum(masked_per_aa_scores.numpy()[3]) == 3
 
 
-def test_digest_fasta_cleave(tiny_fasta_file):
+def test_digest_fasta_cleave(tiny_fasta_file, residues_dict):
 
     # No missed cleavages
     expected_normal = [
@@ -287,16 +287,18 @@ def test_digest_fasta_cleave(tiny_fasta_file):
             max_mods=0,
             precursor_tolerance=20,
             isotope_error=[0, 0],
-            allowed_mods=(
-                "M+15.995,N+0.984,Q+0.984,"
-                "+42.011,+43.006,-17.027,+43.006-17.027"
+            allowed_fixed_mods="C:C+57.021",
+            allowed_var_mods=(
+                "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+                "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
             ),
+            residues=residues_dict,
         )
         peptide_list = list(pdb.db_peptides["peptide"])
         assert peptide_list == expected
 
 
-def test_digest_fasta_mods(tiny_fasta_file):
+def test_digest_fasta_mods(tiny_fasta_file, residues_dict):
     # 1 modification allowed
     # fixed: C+57.02146
     # variable: 1M+15.994915,1N+0.984016,1Q+0.984016
@@ -360,10 +362,12 @@ def test_digest_fasta_mods(tiny_fasta_file):
         max_mods=1,
         precursor_tolerance=20,
         isotope_error=[0, 0],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     peptide_list = list(pdb.db_peptides["peptide"])
     peptide_list = [
@@ -376,7 +380,7 @@ def test_digest_fasta_mods(tiny_fasta_file):
     assert peptide_list == expected_1mod
 
 
-def test_length_restrictions(tiny_fasta_file):
+def test_length_restrictions(tiny_fasta_file, residues_dict):
     # length between 20 and 50
     expected_long = [
         "MEAPAQLLFLLLLWLPDTTR",
@@ -397,10 +401,12 @@ def test_length_restrictions(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=20,
         isotope_error=[0, 0],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     peptide_list = list(pdb.db_peptides["peptide"])
     assert peptide_list == expected_long
@@ -415,16 +421,18 @@ def test_length_restrictions(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=20,
         isotope_error=[0, 0],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     peptide_list = list(pdb.db_peptides["peptide"])
     assert peptide_list == expected_short
 
 
-def test_digest_fasta_enzyme(tiny_fasta_file):
+def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
     # arg-c enzyme
     expected_argc = [
         "ATSIPAR",
@@ -449,10 +457,12 @@ def test_digest_fasta_enzyme(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=20,
         isotope_error=[0, 0],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     peptide_list = list(pdb.db_peptides["peptide"])
     assert peptide_list == expected_argc
@@ -467,16 +477,39 @@ def test_digest_fasta_enzyme(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=20,
         isotope_error=[0, 0],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     peptide_list = list(pdb.db_peptides["peptide"])
     assert peptide_list == expected_aspn
 
+    # Tesr regex rule instead of named enzyme
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
+        enzyme="R",
+        digestion="full",
+        missed_cleavages=0,
+        min_peptide_len=6,
+        max_peptide_len=50,
+        max_mods=0,
+        precursor_tolerance=20,
+        isotope_error=[0, 0],
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+        ),
+        residues=residues_dict,
+    )
+    peptide_list = list(pdb.db_peptides["peptide"])
+    assert peptide_list == expected_argc
+
 
-def test_get_candidates(tiny_fasta_file):
+def test_get_candidates(tiny_fasta_file, residues_dict):
     # precursor_window is 10000
     expected_smallwindow = ["LLIYGASTR"]
 
@@ -496,13 +529,15 @@ def test_get_candidates(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=10000,
         isotope_error=[0, 0],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    assert expected_smallwindow == candidates
+    assert expected_smallwindow == list(candidates)
 
     pdb = db_utils.ProteinDatabase(
         fasta_path=str(tiny_fasta_file),
@@ -514,13 +549,15 @@ def test_get_candidates(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=150000,
         isotope_error=[0, 0],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    assert expected_midwindow == candidates
+    assert expected_midwindow == list(candidates)
 
     pdb = db_utils.ProteinDatabase(
         fasta_path=str(tiny_fasta_file),
@@ -532,16 +569,18 @@ def test_get_candidates(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=600000,
         isotope_error=[0, 0],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    assert expected_widewindow == candidates
+    assert expected_widewindow == list(candidates)
 
 
-def test_get_candidates_isotope_error(tiny_fasta_file):
+def test_get_candidates_isotope_error(tiny_fasta_file, residues_dict):
 
     # Tide isotope error windows for 496.2, 2+:
     # 0: [980.481617, 1000.289326]
@@ -598,14 +637,16 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=10000,
         isotope_error=[0, 0],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     pdb.db_peptides = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    assert expected_isotope0 == candidates
+    assert expected_isotope0 == list(candidates)
 
     pdb = db_utils.ProteinDatabase(
         fasta_path=str(tiny_fasta_file),
@@ -617,14 +658,16 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=10000,
         isotope_error=[0, 1],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     pdb.db_peptides = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    assert expected_isotope01 == candidates
+    assert expected_isotope01 == list(candidates)
 
     pdb = db_utils.ProteinDatabase(
         fasta_path=str(tiny_fasta_file),
@@ -636,14 +679,16 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=10000,
         isotope_error=[0, 2],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     pdb.db_peptides = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    assert expected_isotope012 == candidates
+    assert expected_isotope012 == list(candidates)
 
     pdb = db_utils.ProteinDatabase(
         fasta_path=str(tiny_fasta_file),
@@ -655,14 +700,16 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
         max_mods=0,
         precursor_tolerance=10000,
         isotope_error=[0, 3],
-        allowed_mods=(
-            "M+15.995,N+0.984,Q+0.984,"
-            "+42.011,+43.006,-17.027,+43.006-17.027"
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
         ),
+        residues=residues_dict,
     )
     pdb.db_peptides = peptide_list
     candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
-    assert expected_isotope0123 == candidates
+    assert expected_isotope0123 == list(candidates)
 
 
 def test_beam_search_decode():

From 4a5b238133aaa1db27f584f52d9328b2f90c35f4 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Tue, 3 Sep 2024 10:29:23 -0700
Subject: [PATCH 16/21] minor changes regarding formatting and small efficiency
 boosts

---
 casanovo/config.yaml           |  8 +++---
 casanovo/data/db_utils.py      | 52 ++++++++++++++++++++--------------
 casanovo/denovo/dataloaders.py | 13 ++++-----
 casanovo/denovo/model.py       |  2 +-
 4 files changed, 42 insertions(+), 33 deletions(-)

diff --git a/casanovo/config.yaml b/casanovo/config.yaml
index af2f79d1..17cba6a4 100644
--- a/casanovo/config.yaml
+++ b/casanovo/config.yaml
@@ -46,23 +46,23 @@ devices:
 
 # Enzyme for in silico digestion, used to generate candidate peptides.
 # See pyteomics.parser.expasy_rules for valid enzymes.
+# Can also take a regex expression to specify custom digestion rules.
 enzyme: "trypsin"
 # Digestion type for candidate peptide generation.
 # full: standard digestion. semi: Include products of semi-specific cleavage.
-# Can also take a regex expression to specify custom digestion rules.
 digestion: "full"
 # Number of allowed missed cleavages when digesting protein.
 missed_cleavages: 0
-# Maximum number of amino acid modifications per peptide,
+# Maximum number of variable amino acid modifications per peptide,
 # None generates all possible isoforms as candidates.
 max_mods: 1
 # Select which modifications from the vocabulary can be used in candidate creation.
 # Format: Comma-separated list of "aa:mod_residue", 
-# where aa is a standard amino acid or "X" for an N-terminal mod
+# where aa is a standard amino acid or "nterm" for an N-terminal mod
 # and mod_residue is a key from the "residues" dictionary.
 # Example: "M:M+15.995,X:+43.006-17.027"
 allowed_fixed_mods: "C:C+57.021"
-allowed_var_mods: "M:M+15.995,N:N+0.984,Q:Q+0.984,X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+allowed_var_mods: "M:M+15.995,N:N+0.984,Q:Q+0.984,nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
 
 
 ###
diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index c9201538..86c2112d 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -5,6 +5,7 @@
 import os
 import re
 import string
+from collections import defaultdict
 from typing import List, Tuple
 
 import depthcharge.masses
@@ -13,6 +14,7 @@
 import pyteomics.parser as parser
 from numba import njit
 
+
 logger = logging.getLogger("casanovo")
 
 # CONSTANTS
@@ -72,6 +74,9 @@ def __init__(
         self.fixed_mods, self.var_mods, self.swap_map = _construct_mods_dict(
             allowed_fixed_mods, allowed_var_mods
         )
+        self.swap_regex = re.compile(
+            "(%s)" % "|".join(map(re.escape, self.swap_map.keys()))
+        )
         self.db_peptides = self._digest_fasta(
             fasta_path,
             enzyme,
@@ -167,6 +172,7 @@ def _digest_fasta(
         enzyme : str
             The enzyme to use for digestion.
             See pyteomics.parser.expasy_rules for valid enzymes.
+            Can also be a regex pattern.
         digestion : str
             The type of digestion to perform. Either 'full' or 'partial'.
         missed_cleavages : int
@@ -199,9 +205,7 @@ def _digest_fasta(
                 enzyme,
             )
         semi = digestion == "partial"
-        valid_aa = set(
-            [re.sub(r"[^A-Z]+", "", res) for res in self.residues.keys()]
-        )
+        valid_aa = set(list(self.residues.keys()) + ["C"])
         for header, seq in fasta.read(fasta_filename):
             pep_set = parser.cleave(
                 seq,
@@ -212,17 +216,16 @@ def _digest_fasta(
             protein = header.split()[0]
             for pep in pep_set:
                 if (
-                    len(pep) < min_peptide_length
-                    or len(pep) > max_peptide_length
+                    len(pep) >= min_peptide_length
+                    or len(pep) <= max_peptide_length
                 ):
-                    continue
-
-                if any(aa not in valid_aa for aa in pep):
-                    logger.warn(
-                        "Skipping peptide with unknown amino acids: %s", pep
-                    )
-                    continue
-                peptide_list.append((pep, protein))
+                    if any(aa not in valid_aa for aa in pep):
+                        logger.warn(
+                            "Skipping peptide with unknown amino acids: %s",
+                            pep,
+                        )
+                    else:
+                        peptide_list.append((pep, protein))
 
         # Generate modified peptides
         mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
@@ -242,7 +245,11 @@ def _digest_fasta(
             (mod_pep, mass_calculator.mass(mod_pep), prot)
             for isos, prot in peptide_isoforms
             for mod_pep in map(
-                functools.partial(_convert_from_modx, swap_map=self.swap_map),
+                functools.partial(
+                    _convert_from_modx,
+                    swap_map=self.swap_map,
+                    swap_regex=self.swap_regex,
+                ),
                 isos,
             )
         ]
@@ -259,7 +266,7 @@ def _digest_fasta(
 
 
 @njit
-def _to_mz(precursor_mass, charge):
+def _to_mz(precursor_mass: float, charge: int) -> float:
     """
     Convert precursor neutral mass to m/z value.
 
@@ -279,7 +286,7 @@ def _to_mz(precursor_mass, charge):
 
 
 @njit
-def _to_raw_mass(mz_mass, charge):
+def _to_raw_mass(mz_mass: float, charge: int) -> float:
     """
     Convert precursor m/z value to neutral mass.
 
@@ -298,7 +305,7 @@ def _to_raw_mass(mz_mass, charge):
     return charge * (mz_mass - PROTON)
 
 
-def _convert_from_modx(seq: str, swap_map: dict) -> str:
+def _convert_from_modx(seq: str, swap_map: dict, swap_regex: str) -> str:
     """Converts peptide sequence from modX format to Casanovo-acceptable modifications.
 
     Args:
@@ -306,12 +313,15 @@ def _convert_from_modx(seq: str, swap_map: dict) -> str:
             Peptide in modX format
         swap_map : dict
             Dictionary that allows for swapping of modX to Casanovo-acceptable modifications.
+        swap_regex : str
+            Regular expression to match modX format.
     """
-    regex = re.compile("(%s)" % "|".join(map(re.escape, swap_map.keys())))
-    return regex.sub(lambda x: swap_map[x.group()], seq)
+    return swap_regex.sub(lambda x: swap_map[x.group()], seq)
 
 
-def _construct_mods_dict(allowed_fixed_mods, allowed_var_mods):
+def _construct_mods_dict(
+    allowed_fixed_mods: str, allowed_var_mods: str
+) -> Tuple[dict, dict, dict]:
     """
     Constructs dictionaries of fixed and variable modifications.
 
@@ -343,7 +353,7 @@ def _construct_mods_dict(allowed_fixed_mods, allowed_var_mods):
     for idx, mod in enumerate(allowed_var_mods.split(",")):
         aa, mod_aa = mod.split(":")
         mod_id = string.ascii_lowercase[idx]
-        if aa == "X":
+        if aa == "nterm":
             var_mods[f"{mod_id}-"] = True
             swap_map[f"{mod_id}-"] = f"{mod_aa}"
         else:
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index 6e8c93b3..4793e2f3 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -272,11 +272,11 @@ def prepare_psm_batch(
     spectra, precursor_mzs, precursor_charges, spectrum_ids = list(zip(*batch))
     spectra = torch.nn.utils.rnn.pad_sequence(spectra, batch_first=True)
 
-    precursor_mzs = torch.tensor(precursor_mzs)
-    precursor_charges = torch.tensor(precursor_charges)
-    precursor_masses = (precursor_mzs - 1.007276) * precursor_charges
+    precursor_mzs_t = torch.tensor(precursor_mzs)
+    precursor_charges_t = torch.tensor(precursor_charges)
+    precursor_masses_t = (precursor_mzs_t - 1.007276) * precursor_charges_t
     precursors = torch.vstack(
-        [precursor_masses, precursor_charges, precursor_mzs]
+        [precursor_masses_t, precursor_charges_t, precursor_mzs_t]
     ).T.float()
 
     all_spectra = []
@@ -286,8 +286,8 @@ def prepare_psm_batch(
     all_proteins = []
     for idx in range(len(batch)):
         spec_peptides, spec_proteins = protein_database.get_candidates(
-            precursor_mzs[idx].type(torch.float64).item(),
-            precursor_charges[idx].type(torch.int64).item(),
+            precursor_mzs[idx],
+            precursor_charges[idx],
         )
         try:
             all_spectra.append(
@@ -303,7 +303,6 @@ def prepare_psm_batch(
             logger.warning(
                 "No candidates found for spectrum %s", spectrum_ids[idx]
             )
-            continue
 
     return (
         torch.cat(all_spectra, dim=0),
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index dc7e5f7b..31757d81 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1009,7 +1009,7 @@ class DbSpec2Pep(Spec2Pep):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.psm_batch_size = 1024
+        self.psm_batch_size = None
 
     def predict_step(self, batch, *args):
         """

From 4352bbdfb41aeeb61675c9a290f7bc83eae2f717 Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Tue, 3 Sep 2024 11:24:18 -0700
Subject: [PATCH 17/21] changes before reformatting config

---
 casanovo/data/db_utils.py     | 21 +++++++++++++--------
 tests/conftest.py             |  2 +-
 tests/unit_tests/test_unit.py | 28 ++++++++++++++--------------
 3 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 86c2112d..26f7152c 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -77,7 +77,7 @@ def __init__(
         self.swap_regex = re.compile(
             "(%s)" % "|".join(map(re.escape, self.swap_map.keys()))
         )
-        self.db_peptides = self._digest_fasta(
+        self.db_peptides, self.prot_map = self._digest_fasta(
             fasta_path,
             enzyme,
             digestion,
@@ -146,11 +146,9 @@ def get_associated_protein(self, peptide: str) -> str:
         Returns
         -------
         protein : str
-            The associated protein.
+            The associated protein(s).
         """
-        return self.db_peptides[self.db_peptides["peptide"] == peptide][
-            "protein"
-        ].values[0]
+        return ",".join(self.prot_map[peptide])
 
     def _digest_fasta(
         self,
@@ -186,9 +184,11 @@ def _digest_fasta(
 
         Returns
         -------
-        mod_peptide_list : pd.DataFrame
+        pep_table : pd.DataFrame
             A Pandas DataFrame with peptide, mass,
             and protein columns. Sorted by neutral mass in ascending order.
+        prot_map : dict
+            A dictionary mapping peptides to associated proteins.
         """
         # Verify the existence of the file:
         if not os.path.isfile(fasta_filename):
@@ -217,7 +217,7 @@ def _digest_fasta(
             for pep in pep_set:
                 if (
                     len(pep) >= min_peptide_length
-                    or len(pep) <= max_peptide_length
+                    and len(pep) <= max_peptide_length
                 ):
                     if any(aa not in valid_aa for aa in pep):
                         logger.warn(
@@ -259,10 +259,15 @@ def _digest_fasta(
         )
         pep_table.sort_values(by=["calc_mass", "peptide"], inplace=True)
 
+        # Create a dictionary mapping for easy accession of associated proteins
+        prot_map = defaultdict(list)
+        for pep, _, prot in mod_peptide_list:
+            prot_map[pep].append(prot)
+
         logger.info(
             "Digestion complete. %d peptides generated.", len(pep_table)
         )
-        return pep_table
+        return pep_table, prot_map
 
 
 @njit
diff --git a/tests/conftest.py b/tests/conftest.py
index 3b94896a..bf02a3ab 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -300,7 +300,7 @@ def tiny_config(tmp_path):
         "allowed_fixed_mods": "C:C+57.021",
         "allowed_var_mods": (
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
     }
 
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index c06ec788..d03d6f7f 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -290,7 +290,7 @@ def test_digest_fasta_cleave(tiny_fasta_file, residues_dict):
             allowed_fixed_mods="C:C+57.021",
             allowed_var_mods=(
                 "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-                "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+                "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
             ),
             residues=residues_dict,
         )
@@ -365,7 +365,7 @@ def test_digest_fasta_mods(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -404,7 +404,7 @@ def test_length_restrictions(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -424,7 +424,7 @@ def test_length_restrictions(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -460,7 +460,7 @@ def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -480,7 +480,7 @@ def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -501,7 +501,7 @@ def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -532,7 +532,7 @@ def test_get_candidates(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -552,7 +552,7 @@ def test_get_candidates(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -572,7 +572,7 @@ def test_get_candidates(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -640,7 +640,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -661,7 +661,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -682,7 +682,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )
@@ -703,7 +703,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file, residues_dict):
         allowed_fixed_mods="C:C+57.021",
         allowed_var_mods=(
             "M:M+15.995,N:N+0.984,Q:Q+0.984,"
-            "X:+42.011,X:+43.006,X:-17.027,X:+43.006-17.027"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
         ),
         residues=residues_dict,
     )

From ddff67fb03b06d3b27f73ff58dfdd478cd8a826b Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Tue, 3 Sep 2024 12:00:28 -0700
Subject: [PATCH 18/21] replace all occurences of "max_length" with
 "max_peptide_len"

---
 casanovo/config.py              |  2 +-
 casanovo/config.yaml            |  2 +-
 casanovo/denovo/model.py        | 26 +++++++++++++-------------
 casanovo/denovo/model_runner.py |  6 +++---
 tests/conftest.py               |  2 +-
 tests/unit_tests/test_unit.py   | 10 +++++-----
 6 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/casanovo/config.py b/casanovo/config.py
index 792da35a..8577d087 100644
--- a/casanovo/config.py
+++ b/casanovo/config.py
@@ -59,7 +59,7 @@ class Config:
         n_layers=int,
         dropout=float,
         dim_intensity=int,
-        max_length=int,
+        max_peptide_len=int,
         residues=dict,
         n_log=int,
         tb_summarywriter=str,
diff --git a/casanovo/config.yaml b/casanovo/config.yaml
index 17cba6a4..e8732b20 100644
--- a/casanovo/config.yaml
+++ b/casanovo/config.yaml
@@ -18,7 +18,7 @@ isotope_error_range: [0, 1]
 # The minimum length of considered peptides.
 min_peptide_len: 6
 # The maximum length of considered peptides.
-max_length: 100
+max_peptide_len: 100
 # Number of spectra in one inference batch.
 predict_batch_size: 1024
 
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 31757d81..6fe34bfa 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -46,7 +46,7 @@ class Spec2Pep(pl.LightningModule, ModelMixin):
         (``dim_model - dim_intensity``) are reserved for encoding the m/z value.
         If ``None``, the intensity will be projected up to ``dim_model`` using a
         linear layer, then summed with the m/z encoding for each peak.
-    max_length : int
+    max_peptide_len : int
         The maximum peptide length to decode.
     residues : Union[Dict[str, float], str]
         The amino acid dictionary and their masses. By default ("canonical) this
@@ -99,7 +99,7 @@ def __init__(
         n_layers: int = 9,
         dropout: float = 0.0,
         dim_intensity: Optional[int] = None,
-        max_length: int = 100,
+        max_peptide_len: int = 100,
         residues: Union[Dict[str, float], str] = "canonical",
         max_charge: int = 5,
         precursor_mass_tol: float = 50,
@@ -158,7 +158,7 @@ def __init__(
         self.opt_kwargs = kwargs
 
         # Data properties.
-        self.max_length = max_length
+        self.max_peptide_len = max_peptide_len
         self.residues = residues
         self.precursor_mass_tol = precursor_mass_tol
         self.isotope_error_range = isotope_error_range
@@ -241,7 +241,7 @@ def beam_search_decode(
 
         # Sizes.
         batch = spectra.shape[0]  # B
-        length = self.max_length + 1  # L
+        length = self.max_peptide_len + 1  # L
         vocab = self.decoder.vocab_size + 1  # V
         beam = self.n_beams  # S
 
@@ -269,7 +269,7 @@ def beam_search_decode(
         scores = einops.rearrange(scores, "B L V S -> (B S) L V")
 
         # The main decoding loop.
-        for step in range(0, self.max_length):
+        for step in range(0, self.max_peptide_len):
             # Terminate beams exceeding the precursor m/z tolerance and track
             # all finished beams (either terminated or stop token predicted).
             (
@@ -323,10 +323,10 @@ def _finish_beams(
 
         Parameters
         ----------
-        tokens : torch.Tensor of shape (n_spectra * n_beams, max_length)
+        tokens : torch.Tensor of shape (n_spectra * n_beams, max_peptide_len)
             Predicted amino acid tokens for all beams and all spectra.
          scores : torch.Tensor of shape
-         (n_spectra *  n_beams, max_length, n_amino_acids)
+         (n_spectra *  n_beams, max_peptide_len, n_amino_acids)
             Scores for the predicted amino acid tokens for all beams and all
             spectra.
         step : int
@@ -491,10 +491,10 @@ def _cache_finished_beams(
 
         Parameters
         ----------
-        tokens : torch.Tensor of shape (n_spectra * n_beams, max_length)
+        tokens : torch.Tensor of shape (n_spectra * n_beams, max_peptide_len)
             Predicted amino acid tokens for all beams and all spectra.
          scores : torch.Tensor of shape
-         (n_spectra *  n_beams, max_length, n_amino_acids)
+         (n_spectra *  n_beams, max_peptide_len, n_amino_acids)
             Scores for the predicted amino acid tokens for all beams and all
             spectra.
         step : int
@@ -576,10 +576,10 @@ def _get_topk_beams(
 
         Parameters
         ----------
-        tokens : torch.Tensor of shape (n_spectra * n_beams, max_length)
+        tokens : torch.Tensor of shape (n_spectra * n_beams, max_peptide_len)
             Predicted amino acid tokens for all beams and all spectra.
          scores : torch.Tensor of shape
-         (n_spectra *  n_beams, max_length, n_amino_acids)
+         (n_spectra *  n_beams, max_peptide_len, n_amino_acids)
             Scores for the predicted amino acid tokens for all beams and all
             spectra.
         finished_beams : torch.Tensor of shape (n_spectra * n_beams)
@@ -592,10 +592,10 @@ def _get_topk_beams(
 
         Returns
         -------
-        tokens : torch.Tensor of shape (n_spectra * n_beams, max_length)
+        tokens : torch.Tensor of shape (n_spectra * n_beams, max_peptide_len)
             Predicted amino acid tokens for all beams and all spectra.
          scores : torch.Tensor of shape
-         (n_spectra *  n_beams, max_length, n_amino_acids)
+         (n_spectra *  n_beams, max_peptide_len, n_amino_acids)
             Scores for the predicted amino acid tokens for all beams and all
             spectra.
         """
diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
index 395320e5..efb380cb 100644
--- a/casanovo/denovo/model_runner.py
+++ b/casanovo/denovo/model_runner.py
@@ -116,7 +116,7 @@ def db_search(
             self.config.digestion,
             self.config.missed_cleavages,
             self.config.min_peptide_len,
-            self.config.max_length,
+            self.config.max_peptide_len,
             self.config.max_mods,
             self.config.precursor_mass_tol,
             self.config.isotope_error_range,
@@ -271,7 +271,7 @@ def initialize_model(
             n_layers=self.config.n_layers,
             dropout=self.config.dropout,
             dim_intensity=self.config.dim_intensity,
-            max_length=self.config.max_length,
+            max_peptide_len=self.config.max_peptide_len,
             residues=self.config.residues,
             max_charge=self.config.max_charge,
             precursor_mass_tol=self.config.precursor_mass_tol,
@@ -292,7 +292,7 @@ def initialize_model(
 
         # Reconfigurable non-architecture related parameters for a loaded model.
         loaded_model_params = dict(
-            max_length=self.config.max_length,
+            max_peptide_len=self.config.max_peptide_len,
             precursor_mass_tol=self.config.precursor_mass_tol,
             isotope_error_range=self.config.isotope_error_range,
             n_beams=self.config.n_beams,
diff --git a/tests/conftest.py b/tests/conftest.py
index bf02a3ab..95ef2d02 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -241,7 +241,7 @@ def tiny_config(tmp_path):
         "precursor_mass_tol": 5,
         "isotope_error_range": [0, 1],
         "min_peptide_len": 6,
-        "max_length": 100,
+        "max_peptide_len": 100,
         "enzyme": "trypsin",
         "digestion": "full",
         "missed_cleavages": 0,
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index d03d6f7f..63d492f8 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -722,7 +722,7 @@ def test_beam_search_decode():
 
     # Sizes.
     batch = 1  # B
-    length = model.max_length + 1  # L
+    length = model.max_peptide_len + 1  # L
     vocab = model.decoder.vocab_size + 1  # V
     beam = model.n_beams  # S
     step = 3
@@ -839,12 +839,12 @@ def test_beam_search_decode():
     assert torch.equal(new_scores[:, step, :], expected_scores)
 
     # Test output if decoding loop isn't stopped with termination of all beams.
-    model.max_length = 0
+    model.max_peptide_len = 0
     # 1 spectrum with 5 peaks (2 values: m/z and intensity).
     spectra = torch.zeros(1, 5, 2)
     precursors = torch.tensor([[469.25364, 2.0, 235.63410]])
     assert len(list(model.beam_search_decode(spectra, precursors))[0]) == 0
-    model.max_length = 100
+    model.max_peptide_len = 100
 
     # Re-initialize scores and tokens to further test caching functionality.
     scores = torch.full(
@@ -1004,7 +1004,7 @@ def test_beam_search_decode():
     batch = 2  # B
     beam = model.n_beams  # S
     model.decoder.reverse = True
-    length = model.max_length + 1  # L
+    length = model.max_peptide_len + 1  # L
     vocab = model.decoder.vocab_size + 1  # V
     step = 4
 
@@ -1045,7 +1045,7 @@ def test_beam_search_decode():
     batch = 2  # B
     beam = model.n_beams  # S
     model.decoder.reverse = True
-    length = model.max_length + 1  # L
+    length = model.max_peptide_len + 1  # L
     vocab = model.decoder.vocab_size + 1  # V
     step = 4
 

From a3548d00124c1242350a62fdbcb2f719484254fe Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Tue, 3 Sep 2024 13:37:46 -0700
Subject: [PATCH 19/21] added nonspecific digestion

---
 casanovo/config.py            |   1 +
 casanovo/config.yaml          |   4 +-
 casanovo/data/db_utils.py     |  67 +++++++-----
 tests/unit_tests/test_unit.py | 185 ++++++++++++++++++++++++++++++++--
 4 files changed, 225 insertions(+), 32 deletions(-)

diff --git a/casanovo/config.py b/casanovo/config.py
index 8577d087..dc2a3d2c 100644
--- a/casanovo/config.py
+++ b/casanovo/config.py
@@ -18,6 +18,7 @@
 _config_deprecated = dict(
     every_n_train_steps="val_check_interval",
     max_iters="cosine_schedule_period_iters",
+    max_length="max_peptide_len",
 )
 
 
diff --git a/casanovo/config.yaml b/casanovo/config.yaml
index e8732b20..df6fa8bb 100644
--- a/casanovo/config.yaml
+++ b/casanovo/config.yaml
@@ -49,7 +49,9 @@ devices:
 # Can also take a regex expression to specify custom digestion rules.
 enzyme: "trypsin"
 # Digestion type for candidate peptide generation.
-# full: standard digestion. semi: Include products of semi-specific cleavage.
+# full: standard digestion. 
+# semi: Include products of semi-specific cleavage.
+# non-specific: Include products of non-specific cleavage.
 digestion: "full"
 # Number of allowed missed cleavages when digesting protein.
 missed_cleavages: 0
diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 26f7152c..f9c669ed 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -172,7 +172,7 @@ def _digest_fasta(
             See pyteomics.parser.expasy_rules for valid enzymes.
             Can also be a regex pattern.
         digestion : str
-            The type of digestion to perform. Either 'full' or 'partial'.
+            The type of digestion to perform. Either 'full', 'partial' or 'non-specific'.
         missed_cleavages : int
             The number of missed cleavages to allow.
         max_mods : int
@@ -196,7 +196,7 @@ def _digest_fasta(
             raise FileNotFoundError(f"File {fasta_filename} does not exist.")
 
         peptide_list = []
-        if digestion not in ["full", "partial"]:
+        if digestion not in ["full", "partial", "non-specific"]:
             logger.error("Digestion type %s not recognized.", digestion)
             raise ValueError(f"Digestion type {digestion} not recognized.")
         if enzyme not in parser.expasy_rules:
@@ -204,28 +204,49 @@ def _digest_fasta(
                 "Enzyme %s not recognized. Interpreting as cleavage rule.",
                 enzyme,
             )
-        semi = digestion == "partial"
         valid_aa = set(list(self.residues.keys()) + ["C"])
-        for header, seq in fasta.read(fasta_filename):
-            pep_set = parser.cleave(
-                seq,
-                rule=enzyme,
-                missed_cleavages=missed_cleavages,
-                semi=semi,
-            )
-            protein = header.split()[0]
-            for pep in pep_set:
-                if (
-                    len(pep) >= min_peptide_length
-                    and len(pep) <= max_peptide_length
-                ):
-                    if any(aa not in valid_aa for aa in pep):
-                        logger.warn(
-                            "Skipping peptide with unknown amino acids: %s",
-                            pep,
-                        )
-                    else:
-                        peptide_list.append((pep, protein))
+        if digestion == "non-specific":
+            for header, seq in fasta.read(fasta_filename):
+                pep_set = []
+                # Generate all possible peptides
+                for i in range(len(seq)):
+                    for j in range(i + 1, len(seq) + 1):
+                        pep_set.append(seq[i:j])
+                protein = header.split()[0]
+                for pep in pep_set:
+                    if (
+                        len(pep) >= min_peptide_length
+                        and len(pep) <= max_peptide_length
+                    ):
+                        if any(aa not in valid_aa for aa in pep):
+                            logger.warn(
+                                "Skipping peptide with unknown amino acids: %s",
+                                pep,
+                            )
+                        else:
+                            peptide_list.append((pep, protein))
+        else:
+            semi = digestion == "partial"
+            for header, seq in fasta.read(fasta_filename):
+                pep_set = parser.cleave(
+                    seq,
+                    rule=enzyme,
+                    missed_cleavages=missed_cleavages,
+                    semi=semi,
+                )
+                protein = header.split()[0]
+                for pep in pep_set:
+                    if (
+                        len(pep) >= min_peptide_length
+                        and len(pep) <= max_peptide_length
+                    ):
+                        if any(aa not in valid_aa for aa in pep):
+                            logger.warn(
+                                "Skipping peptide with unknown amino acids: %s",
+                                pep,
+                            )
+                        else:
+                            peptide_list.append((pep, protein))
 
         # Generate modified peptides
         mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index 63d492f8..594552af 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -327,12 +327,16 @@ def test_digest_fasta_mods(tiny_fasta_file, residues_dict):
         "+42.011EIVMTQSPPTLSLSPGER",
         "+43.006EIVMTQSPPTLSLSPGER",
         "-17.027MEAPAQLLFLLLLWLPDTTR",
+        "-17.027M+15.995EAPAQLLFLLLLWLPDTTR",  #
         "MEAPAQLLFLLLLWLPDTTR",
         "MEAPAQ+0.984LLFLLLLWLPDTTR",
         "M+15.995EAPAQLLFLLLLWLPDTTR",
         "+43.006-17.027MEAPAQLLFLLLLWLPDTTR",
+        "+43.006-17.027M+15.995EAPAQLLFLLLLWLPDTTR",  #
         "+42.011MEAPAQLLFLLLLWLPDTTR",
         "+43.006MEAPAQLLFLLLLWLPDTTR",
+        "+42.011M+15.995EAPAQLLFLLLLWLPDTTR",  #
+        "+43.006M+15.995EAPAQLLFLLLLWLPDTTR",  #
         "-17.027ASQSVSSSYLTWYQQKPGQAPR",
         "ASQSVSSSYLTWYQQKPGQAPR",
         "ASQ+0.984SVSSSYLTWYQQKPGQAPR",
@@ -370,13 +374,6 @@ def test_digest_fasta_mods(tiny_fasta_file, residues_dict):
         residues=residues_dict,
     )
     peptide_list = list(pdb.db_peptides["peptide"])
-    peptide_list = [
-        x
-        for x in peptide_list
-        if not re.search(
-            r"(\+42\.011|\+43\.006|\-17\.027|\+43\.006\-17\.027)+[A-Z]\+", x
-        )
-    ]
     assert peptide_list == expected_1mod
 
 
@@ -447,6 +444,136 @@ def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
     # asp-n enzyme
     expected_aspn = ["DFAVYYC+57.021QQ", "DFTLTISSLQPE", "MEAPAQLLFLLLLWLP"]
 
+    expected_semispecific = [
+        "FSGSGS",
+        "ATSIPA",
+        "ASQSVS",
+        "PGQAPR",
+        "TSIPAR",
+        "MEAPAQ",
+        "LLIYGA",
+        "YGASTR",
+        "LSPGER",
+        "LPDTTR",
+        "EIVMTQ",
+        "VTLSC+57.021R",
+        "QDYNLP",
+    ]
+
+    expected_nonspecific = [
+        "SGSGSG",
+        "GSGSGT",
+        "SGSGTD",
+        "FSGSGS",
+        "ATSIPA",
+        "GASTRA",
+        "LSLSPG",
+        "ASQSVS",
+        "GSGTDF",
+        "SLSPGE",
+        "QSVSSS",
+        "SQSVSS",
+        "KPGQAP",
+        "SPPTLS",
+        "ASTRAT",
+        "RFSGSG",
+        "IYGAST",
+        "APAQLL",
+        "PTLSLS",
+        "TLSLSP",
+        "TLTISS",
+        "STRATS",
+        "LIYGAS",
+        "ARFSGS",
+        "PGQAPR",
+        "SGTDFT",
+        "PPTLSL",
+        "EAPAQL",
+        "QKPGQA",
+        "SVSSSY",
+        "TQSPPT",
+        "LTISSL",
+        "PARFSG",
+        "GQAPRL",
+        "QSPPTL",
+        "SPGERV",
+        "ISSLQP",
+        "RATSIP",
+        "TSIPAR",
+        "MEAPAQ",
+        "RASQSV",
+        "TISSLQ",
+        "TRATSI",
+        "LLIYGA",
+        "GTDFTL",
+        "YGASTR",
+        "VSSSYL",
+        "SSSYLT",
+        "LSPGER",
+        "PGERVT",
+        "MTQSPP",
+        "SSLQPE",
+        "VMTQSP",
+        "GERVTL",
+        "PEDFAV",
+        "IVMTQS",
+        "FTLTIS",
+        "APRLLI",
+        "QQKPGQ",
+        "SLQPED",
+        "PAQLLF",
+        "IPARFS",
+        "SIPARF",
+        "LSC+57.021RAS",
+        "TDFTLT",
+        "QAPRLL",
+        "LPDTTR",
+        "ERVTLS",
+        "AQLLFL",
+        "QPEDFA",
+        "TLSC+57.021RA",
+        "C+57.021RASQS",
+        "SC+57.021RASQ",
+        "DFTLTI",
+        "PDTTRE",
+        "TTREIV",
+        "EIVMTQ",
+        "YQQKPG",
+        "LFLLLL",
+        "LLFLLL",
+        "WLPDTT",
+        "DTTREI",
+        "RLLIYG",
+        "RVTLSC+57.021",
+        "VTLSC+57.021R",
+        "EDFAVY",
+        "LWLPDT",
+        "QLLFLL",
+        "LQPEDF",
+        "REIVMT",
+        "TREIVM",
+        "QDYNLP",
+        "LLLWLP",
+        "SSYLTW",
+        "LLWLPD",
+        "LLLLWL",
+        "PRLLIY",
+        "DFAVYY",
+        "QQDYNL",
+        "AVYYC+57.021Q",
+        "FLLLLW",
+        "FAVYYC+57.021",
+        "C+57.021QQDYN",
+        "SYLTWY",
+        "LTWYQQ",
+        "WYQQKP",
+        "TWYQQK",
+        "VYYC+57.021QQ",
+        "YLTWYQ",
+        "YC+57.021QQDY",
+        "YYC+57.021QQD",
+    ]
+
     pdb = db_utils.ProteinDatabase(
         fasta_path=str(tiny_fasta_file),
         enzyme="arg-c",
@@ -487,7 +614,7 @@ def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
     peptide_list = list(pdb.db_peptides["peptide"])
     assert peptide_list == expected_aspn
 
-    # Tesr regex rule instead of named enzyme
+    # Test regex rule instead of named enzyme
     pdb = db_utils.ProteinDatabase(
         fasta_path=str(tiny_fasta_file),
         enzyme="R",
@@ -508,6 +635,48 @@ def test_digest_fasta_enzyme(tiny_fasta_file, residues_dict):
     peptide_list = list(pdb.db_peptides["peptide"])
     assert peptide_list == expected_argc
 
+    # Test semispecific digest
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
+        enzyme="trypsin",
+        digestion="partial",
+        missed_cleavages=0,
+        min_peptide_len=6,
+        max_peptide_len=6,
+        max_mods=0,
+        precursor_tolerance=10000,
+        isotope_error=[0, 0],
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
+        ),
+        residues=residues_dict,
+    )
+    peptide_list = list(pdb.db_peptides["peptide"])
+    assert peptide_list == expected_semispecific
+
+    # Test nonspecific digest
+    pdb = db_utils.ProteinDatabase(
+        fasta_path=str(tiny_fasta_file),
+        enzyme="trypsin",
+        digestion="non-specific",
+        missed_cleavages=0,
+        min_peptide_len=6,
+        max_peptide_len=6,
+        max_mods=0,
+        precursor_tolerance=10000,
+        isotope_error=[0, 0],
+        allowed_fixed_mods="C:C+57.021",
+        allowed_var_mods=(
+            "M:M+15.995,N:N+0.984,Q:Q+0.984,"
+            "nterm:+42.011,nterm:+43.006,nterm:-17.027,nterm:+43.006-17.027"
+        ),
+        residues=residues_dict,
+    )
+    peptide_list = list(pdb.db_peptides["peptide"])
+    assert peptide_list == expected_nonspecific
+
 
 def test_get_candidates(tiny_fasta_file, residues_dict):
     # precursor_window is 10000

From e8d4682241b9b4d10384e9dfd92fd04258103e3e Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Fri, 13 Sep 2024 12:06:31 -0700
Subject: [PATCH 20/21] minor comments

---
 casanovo/data/db_utils.py      | 35 +++++++++++++++++-----------------
 casanovo/denovo/dataloaders.py |  7 +------
 casanovo/denovo/model.py       |  6 ++----
 3 files changed, 20 insertions(+), 28 deletions(-)

diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index f9c669ed..19b312e2 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -127,12 +127,12 @@ def get_candidates(
                 (self.db_peptides["calc_mass"] >= lower_bound)
                 & (self.db_peptides["calc_mass"] <= upper_bound)
             ]
-            candidates.append(window[["peptide", "calc_mass", "protein"]])
+            candidates.append(window[["peptide", "calc_mass"]])
 
         candidates = pd.concat(candidates)
         candidates.drop_duplicates(inplace=True)
         candidates.sort_values(by=["calc_mass", "peptide"], inplace=True)
-        return candidates["peptide"], candidates["protein"]
+        return candidates["peptide"]
 
     def get_associated_protein(self, peptide: str) -> str:
         """
@@ -159,7 +159,7 @@ def _digest_fasta(
         max_mods: int,
         min_peptide_length: int,
         max_peptide_length: int,
-    ) -> pd.DataFrame:
+    ) -> Tuple[pd.DataFrame, dict]:
         """
         Digests a FASTA file and returns the peptides, their masses, and associated protein.
 
@@ -185,8 +185,8 @@ def _digest_fasta(
         Returns
         -------
         pep_table : pd.DataFrame
-            A Pandas DataFrame with peptide, mass,
-            and protein columns. Sorted by neutral mass in ascending order.
+            A Pandas DataFrame with peptide and mass columns.
+            Sorted by neutral mass in ascending order.
         prot_map : dict
             A dictionary mapping peptides to associated proteins.
         """
@@ -207,17 +207,14 @@ def _digest_fasta(
         valid_aa = set(list(self.residues.keys()) + ["C"])
         if digestion == "non-specific":
             for header, seq in fasta.read(fasta_filename):
-                pep_set = []
+                protein = header.split()[0]
                 # Generate all possible peptides
                 for i in range(len(seq)):
-                    for j in range(i + 1, len(seq) + 1):
-                        pep_set.append(seq[i:j])
-                protein = header.split()[0]
-                for pep in pep_set:
-                    if (
-                        len(pep) >= min_peptide_length
-                        and len(pep) <= max_peptide_length
+                    for j in range(
+                        i + min_peptide_length,
+                        min(i + max_peptide_length + 1, len(seq) + 1),
                     ):
+                        pep = seq[i:j]
                         if any(aa not in valid_aa for aa in pep):
                             logger.warn(
                                 "Skipping peptide with unknown amino acids: %s",
@@ -274,17 +271,19 @@ def _digest_fasta(
                 isos,
             )
         ]
-        # Create a DataFrame for easy sorting and filtering
-        pep_table = pd.DataFrame(
-            mod_peptide_list, columns=["peptide", "calc_mass", "protein"]
-        )
-        pep_table.sort_values(by=["calc_mass", "peptide"], inplace=True)
 
         # Create a dictionary mapping for easy accession of associated proteins
         prot_map = defaultdict(list)
         for pep, _, prot in mod_peptide_list:
             prot_map[pep].append(prot)
 
+        # Create a DataFrame for easy sorting and filtering
+        pep_table = pd.DataFrame(
+            [(pep, mass) for pep, mass, _ in mod_peptide_list],
+            columns=["peptide", "calc_mass"],
+        )
+        pep_table.sort_values(by=["calc_mass", "peptide"], inplace=True)
+
         logger.info(
             "Digestion complete. %d peptides generated.", len(pep_table)
         )
diff --git a/casanovo/denovo/dataloaders.py b/casanovo/denovo/dataloaders.py
index 4793e2f3..2646329d 100644
--- a/casanovo/denovo/dataloaders.py
+++ b/casanovo/denovo/dataloaders.py
@@ -266,8 +266,6 @@ def prepare_psm_batch(
         The spectrum identifiers.
     all_peptides : List[str]
         The candidate peptides for each spectrum.
-    all_proteins : List[str]
-        The proteins associated with each candidate peptide.
     """
     spectra, precursor_mzs, precursor_charges, spectrum_ids = list(zip(*batch))
     spectra = torch.nn.utils.rnn.pad_sequence(spectra, batch_first=True)
@@ -283,9 +281,8 @@ def prepare_psm_batch(
     all_precursors = []
     all_spectrum_ids = []
     all_peptides = []
-    all_proteins = []
     for idx in range(len(batch)):
-        spec_peptides, spec_proteins = protein_database.get_candidates(
+        spec_peptides = protein_database.get_candidates(
             precursor_mzs[idx],
             precursor_charges[idx],
         )
@@ -298,7 +295,6 @@ def prepare_psm_batch(
             )
             all_spectrum_ids.extend([spectrum_ids[idx]] * len(spec_peptides))
             all_peptides.extend(spec_peptides)
-            all_proteins.extend(spec_proteins)
         except ValueError:
             logger.warning(
                 "No candidates found for spectrum %s", spectrum_ids[idx]
@@ -309,5 +305,4 @@ def prepare_psm_batch(
         torch.cat(all_precursors, dim=0),
         all_spectrum_ids,
         all_peptides,
-        all_proteins,
     )
diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
index 6fe34bfa..ca5557fc 100644
--- a/casanovo/denovo/model.py
+++ b/casanovo/denovo/model.py
@@ -1019,7 +1019,7 @@ def predict_step(self, batch, *args):
         ----------
         batch : Tuple[torch.Tensor, torch.Tensor, np.array, List[str]]
             A batch of (i) MS/MS spectra, (ii) precursor information, (iii)
-            spectrum identifiers, (iv) candidate peptides, (v) associated protein.
+            spectrum identifiers, (iv) candidate peptides
 
         Returns
         -------
@@ -1049,7 +1049,6 @@ def predict_step(self, batch, *args):
                 peptide_score,
                 aa_scores,
                 peptide,
-                protein,
             ) in zip(
                 current_batch[1][:, 1].cpu().detach().numpy(),
                 current_batch[1][:, 2].cpu().detach().numpy(),
@@ -1057,7 +1056,6 @@ def predict_step(self, batch, *args):
                 all_scores.cpu().detach().numpy(),
                 per_aa_scores.cpu().detach().numpy(),
                 current_batch[3],
-                current_batch[4],
             ):
                 predictions.append(
                     (
@@ -1067,7 +1065,7 @@ def predict_step(self, batch, *args):
                         peptide,
                         peptide_score,
                         aa_scores,
-                        protein,
+                        self.protein_database.get_associated_protein(peptide),
                     )
                 )
         return predictions

From 68b6926032814dcc4a6b650e1736c8ff92edf7cb Mon Sep 17 00:00:00 2001
From: VarunAnanth2003 <varunananth1@gmail.com>
Date: Fri, 13 Sep 2024 13:41:39 -0700
Subject: [PATCH 21/21] full branch comments addressed

---
 casanovo/data/db_utils.py     | 197 ++++++++++++++++++++--------------
 tests/unit_tests/test_unit.py |  14 +--
 2 files changed, 123 insertions(+), 88 deletions(-)

diff --git a/casanovo/data/db_utils.py b/casanovo/data/db_utils.py
index 19b312e2..34671eb1 100644
--- a/casanovo/data/db_utils.py
+++ b/casanovo/data/db_utils.py
@@ -6,7 +6,7 @@
 import re
 import string
 from collections import defaultdict
-from typing import List, Tuple
+from typing import List, Tuple, Iterator
 
 import depthcharge.masses
 import pandas as pd
@@ -70,22 +70,23 @@ def __init__(
         allowed_var_mods: str,
         residues: dict,
     ):
-        self.residues = residues
         self.fixed_mods, self.var_mods, self.swap_map = _construct_mods_dict(
             allowed_fixed_mods, allowed_var_mods
         )
+        self.max_mods = max_mods
         self.swap_regex = re.compile(
             "(%s)" % "|".join(map(re.escape, self.swap_map.keys()))
         )
-        self.db_peptides, self.prot_map = self._digest_fasta(
+        peptide_generator = _peptide_generator(
             fasta_path,
             enzyme,
             digestion,
             missed_cleavages,
-            max_mods,
             min_peptide_len,
             max_peptide_len,
+            set(list(residues.keys()) + ["C"]),
         )
+        self.db_peptides, self.prot_map = self._digest_fasta(peptide_generator)
         self.precursor_tolerance = precursor_tolerance
         self.isotope_error = isotope_error
 
@@ -152,35 +153,15 @@ def get_associated_protein(self, peptide: str) -> str:
 
     def _digest_fasta(
         self,
-        fasta_filename: str,
-        enzyme: str,
-        digestion: str,
-        missed_cleavages: int,
-        max_mods: int,
-        min_peptide_length: int,
-        max_peptide_length: int,
+        peptide_generator: Iterator[Tuple[str, str]],
     ) -> Tuple[pd.DataFrame, dict]:
         """
         Digests a FASTA file and returns the peptides, their masses, and associated protein.
 
         Parameters
         ----------
-        fasta_filename : str
-            Path to the FASTA file.
-        enzyme : str
-            The enzyme to use for digestion.
-            See pyteomics.parser.expasy_rules for valid enzymes.
-            Can also be a regex pattern.
-        digestion : str
-            The type of digestion to perform. Either 'full', 'partial' or 'non-specific'.
-        missed_cleavages : int
-            The number of missed cleavages to allow.
-        max_mods : int
-            The maximum number of modifications to allow per peptide.
-        min_peptide_length : int
-            The minimum length of peptides to consider.
-        max_peptide_length : int
-            The maximum length of peptides to consider.
+        peptide_generator : Iterator[Tuple[str, str]]
+            An iterator that yields peptides and associated proteins.
 
         Returns
         -------
@@ -190,60 +171,9 @@ def _digest_fasta(
         prot_map : dict
             A dictionary mapping peptides to associated proteins.
         """
-        # Verify the existence of the file:
-        if not os.path.isfile(fasta_filename):
-            logger.error("File %s does not exist.", fasta_filename)
-            raise FileNotFoundError(f"File {fasta_filename} does not exist.")
-
         peptide_list = []
-        if digestion not in ["full", "partial", "non-specific"]:
-            logger.error("Digestion type %s not recognized.", digestion)
-            raise ValueError(f"Digestion type {digestion} not recognized.")
-        if enzyme not in parser.expasy_rules:
-            logger.info(
-                "Enzyme %s not recognized. Interpreting as cleavage rule.",
-                enzyme,
-            )
-        valid_aa = set(list(self.residues.keys()) + ["C"])
-        if digestion == "non-specific":
-            for header, seq in fasta.read(fasta_filename):
-                protein = header.split()[0]
-                # Generate all possible peptides
-                for i in range(len(seq)):
-                    for j in range(
-                        i + min_peptide_length,
-                        min(i + max_peptide_length + 1, len(seq) + 1),
-                    ):
-                        pep = seq[i:j]
-                        if any(aa not in valid_aa for aa in pep):
-                            logger.warn(
-                                "Skipping peptide with unknown amino acids: %s",
-                                pep,
-                            )
-                        else:
-                            peptide_list.append((pep, protein))
-        else:
-            semi = digestion == "partial"
-            for header, seq in fasta.read(fasta_filename):
-                pep_set = parser.cleave(
-                    seq,
-                    rule=enzyme,
-                    missed_cleavages=missed_cleavages,
-                    semi=semi,
-                )
-                protein = header.split()[0]
-                for pep in pep_set:
-                    if (
-                        len(pep) >= min_peptide_length
-                        and len(pep) <= max_peptide_length
-                    ):
-                        if any(aa not in valid_aa for aa in pep):
-                            logger.warn(
-                                "Skipping peptide with unknown amino acids: %s",
-                                pep,
-                            )
-                        else:
-                            peptide_list.append((pep, protein))
+        for pep, prot in peptide_generator:
+            peptide_list.append((pep, prot))
 
         # Generate modified peptides
         mass_calculator = depthcharge.masses.PeptideMass(residues="massivekb")
@@ -253,7 +183,7 @@ def _digest_fasta(
                     pep,
                     variable_mods=self.var_mods,
                     fixed_mods=self.fixed_mods,
-                    max_mods=max_mods,
+                    max_mods=self.max_mods,
                 ),
                 prot,
             )
@@ -290,6 +220,111 @@ def _digest_fasta(
         return pep_table, prot_map
 
 
+def _peptide_generator(
+    fasta_filename: str,
+    enzyme: str,
+    digestion: str,
+    missed_cleavages: int,
+    min_peptide_length: int,
+    max_peptide_length: int,
+    valid_aa: set[str],
+) -> Iterator[str]:
+    """
+    Create a generator the yields peptides from a FASTA file
+    depending on the type of digestion specified.
+
+    Parameters
+    ----------
+    fasta_filename : str
+        Path to the FASTA file.
+    enzyme : str
+        The enzyme to use for digestion.
+        See pyteomics.parser.expasy_rules for valid enzymes.
+        Can also be a regex pattern.
+    digestion : str
+        The type of digestion to perform. Either 'full', 'partial' or 'non-specific'.
+    missed_cleavages : int
+        The number of missed cleavages to allow.
+    min_peptide_length : int
+        The minimum length of peptides to consider.
+    max_peptide_length : int
+        The maximum length of peptides to consider.
+    valid_aa : set[str]
+        A set of valid amino acids.
+
+    Yields
+    ------
+    pep : str
+        A peptide sequence, unmodified.
+    protein : str
+        The associated protein.
+    """
+    # Verify the existence of the file:
+    if not os.path.isfile(fasta_filename):
+        logger.error("File %s does not exist.", fasta_filename)
+        raise FileNotFoundError(f"File {fasta_filename} does not exist.")
+    if digestion not in ["full", "partial", "non-specific"]:
+        logger.error("Digestion type %s not recognized.", digestion)
+        raise ValueError(f"Digestion type {digestion} not recognized.")
+    if enzyme not in parser.expasy_rules:
+        logger.info(
+            "Enzyme %s not recognized. Interpreting as cleavage rule.",
+            enzyme,
+        )
+
+    # Verify the existence of the file:
+    if not os.path.isfile(fasta_filename):
+        logger.error("File %s does not exist.", fasta_filename)
+        raise FileNotFoundError(f"File {fasta_filename} does not exist.")
+    if digestion not in ["full", "partial", "non-specific"]:
+        logger.error("Digestion type %s not recognized.", digestion)
+        raise ValueError(f"Digestion type {digestion} not recognized.")
+    if enzyme not in parser.expasy_rules:
+        logger.info(
+            "Enzyme %s not recognized. Interpreting as cleavage rule.",
+            enzyme,
+        )
+    if digestion == "non-specific":
+        for header, seq in fasta.read(fasta_filename):
+            protein = header.split()[0]
+            # Generate all possible peptides
+            for i in range(len(seq)):
+                for j in range(
+                    i + min_peptide_length,
+                    min(i + max_peptide_length + 1, len(seq) + 1),
+                ):
+                    pep = seq[i:j]
+                    if any(aa not in valid_aa for aa in pep):
+                        logger.warn(
+                            "Skipping peptide with unknown amino acids: %s",
+                            pep,
+                        )
+                    else:
+                        yield pep, protein
+    else:
+        semi = digestion == "partial"
+        for header, seq in fasta.read(fasta_filename):
+            pep_set = parser.cleave(
+                seq,
+                rule=enzyme,
+                missed_cleavages=missed_cleavages,
+                semi=semi,
+            )
+            protein = header.split()[0]
+            for pep in pep_set:
+                if (
+                    len(pep) >= min_peptide_length
+                    and len(pep) <= max_peptide_length
+                ):
+                    if any(aa not in valid_aa for aa in pep):
+                        logger.warn(
+                            "Skipping peptide with unknown amino acids: %s",
+                            pep,
+                        )
+                    else:
+                        yield pep, protein
+
+
 @njit
 def _to_mz(precursor_mass: float, charge: int) -> float:
     """
diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
index 594552af..a0b0935d 100644
--- a/tests/unit_tests/test_unit.py
+++ b/tests/unit_tests/test_unit.py
@@ -705,7 +705,7 @@ def test_get_candidates(tiny_fasta_file, residues_dict):
         ),
         residues=residues_dict,
     )
-    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_smallwindow == list(candidates)
 
     pdb = db_utils.ProteinDatabase(
@@ -725,7 +725,7 @@ def test_get_candidates(tiny_fasta_file, residues_dict):
         ),
         residues=residues_dict,
     )
-    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_midwindow == list(candidates)
 
     pdb = db_utils.ProteinDatabase(
@@ -745,7 +745,7 @@ def test_get_candidates(tiny_fasta_file, residues_dict):
         ),
         residues=residues_dict,
     )
-    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_widewindow == list(candidates)
 
 
@@ -814,7 +814,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file, residues_dict):
         residues=residues_dict,
     )
     pdb.db_peptides = peptide_list
-    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_isotope0 == list(candidates)
 
     pdb = db_utils.ProteinDatabase(
@@ -835,7 +835,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file, residues_dict):
         residues=residues_dict,
     )
     pdb.db_peptides = peptide_list
-    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_isotope01 == list(candidates)
 
     pdb = db_utils.ProteinDatabase(
@@ -856,7 +856,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file, residues_dict):
         residues=residues_dict,
     )
     pdb.db_peptides = peptide_list
-    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_isotope012 == list(candidates)
 
     pdb = db_utils.ProteinDatabase(
@@ -877,7 +877,7 @@ def test_get_candidates_isotope_error(tiny_fasta_file, residues_dict):
         residues=residues_dict,
     )
     pdb.db_peptides = peptide_list
-    candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
+    candidates = pdb.get_candidates(precursor_mz=496.2, charge=2)
     assert expected_isotope0123 == list(candidates)