From b7c2ff8179044edf5b5e29350d26b4781e64591f Mon Sep 17 00:00:00 2001 From: Isha Gokhale Date: Mon, 16 Oct 2023 20:15:46 -0700 Subject: [PATCH 1/7] resolves issue #238: remove custom_encoder option --- CHANGELOG.md | 2 ++ casanovo/config.yaml | 3 --- casanovo/denovo/model.py | 22 +++++++--------------- casanovo/denovo/model_runner.py | 1 - 4 files changed, 9 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e3d0e272..6263dba8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased] +### Removed +- removed custom_encoder option from config.yaml and in model_runner ### Added - Checkpoints include model parameters, allowing for mismatches with the provided configuration file. diff --git a/casanovo/config.yaml b/casanovo/config.yaml index dedb1740..729e827d 100644 --- a/casanovo/config.yaml +++ b/casanovo/config.yaml @@ -79,9 +79,6 @@ dropout: 0.0 # Number of dimensions to use for encoding peak intensity # Projected up to ``dim_model`` by default and summed with the peak m/z encoding dim_intensity: -# Option to provide a pre-trained spectrum encoder when training -# Trained from scratch by default -custom_encoder: # Max decoded peptide length max_length: 100 # Number of warmup iterations for learning rate scheduler diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py index 23eeac92..ca737ccd 100644 --- a/casanovo/denovo/model.py +++ b/casanovo/denovo/model.py @@ -43,9 +43,6 @@ class Spec2Pep(pl.LightningModule, ModelMixin): (``dim_model - dim_intensity``) are reserved for encoding the m/z value. If ``None``, the intensity will be projected up to ``dim_model`` using a linear layer, then summed with the m/z encoding for each peak. - custom_encoder : Optional[Union[SpectrumEncoder, PairedSpectrumEncoder]] - A pretrained encoder to use. The ``dim_model`` of the encoder must be - the same as that specified by the ``dim_model`` parameter here. max_length : int The maximum peptide length to decode. residues: Union[Dict[str, float], str] @@ -97,7 +94,6 @@ def __init__( n_layers: int = 9, dropout: float = 0.0, dim_intensity: Optional[int] = None, - custom_encoder: Optional[SpectrumEncoder] = None, max_length: int = 100, residues: Union[Dict[str, float], str] = "canonical", max_charge: int = 5, @@ -119,17 +115,13 @@ def __init__( super().__init__() self.save_hyperparameters() - # Build the model. - if custom_encoder is not None: - self.encoder = custom_encoder - else: - self.encoder = SpectrumEncoder( - dim_model=dim_model, - n_head=n_head, - dim_feedforward=dim_feedforward, - n_layers=n_layers, - dropout=dropout, - dim_intensity=dim_intensity, + self.encoder = SpectrumEncoder( + dim_model=dim_model, + n_head=n_head, + dim_feedforward=dim_feedforward, + n_layers=n_layers, + dropout=dropout, + dim_intensity=dim_intensity, ) self.decoder = PeptideDecoder( dim_model=dim_model, diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py index cbefd849..852860d3 100644 --- a/casanovo/denovo/model_runner.py +++ b/casanovo/denovo/model_runner.py @@ -212,7 +212,6 @@ def initialize_model(self, train: bool) -> None: n_layers=self.config.n_layers, dropout=self.config.dropout, dim_intensity=self.config.dim_intensity, - custom_encoder=self.config.custom_encoder, max_length=self.config.max_length, residues=self.config.residues, max_charge=self.config.max_charge, From 8136a4be92733ee7bcda5307b17e3285f16cf9e2 Mon Sep 17 00:00:00 2001 From: Isha Gokhale Date: Mon, 16 Oct 2023 23:50:54 -0700 Subject: [PATCH 2/7] fixed lint issue --- casanovo/denovo/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py index ca737ccd..c26529de 100644 --- a/casanovo/denovo/model.py +++ b/casanovo/denovo/model.py @@ -131,7 +131,7 @@ def __init__( dropout=dropout, residues=residues, max_charge=max_charge, - ) + ) self.softmax = torch.nn.Softmax(2) self.celoss = torch.nn.CrossEntropyLoss(ignore_index=0) # Optimizer settings. From bd1366c6040bcd7cf9c81edd481b8bb68298f0f1 Mon Sep 17 00:00:00 2001 From: Isha Gokhale Date: Mon, 16 Oct 2023 23:54:34 -0700 Subject: [PATCH 3/7] fixed lint issue --- casanovo/denovo/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py index c26529de..a5fdc0d2 100644 --- a/casanovo/denovo/model.py +++ b/casanovo/denovo/model.py @@ -122,7 +122,7 @@ def __init__( n_layers=n_layers, dropout=dropout, dim_intensity=dim_intensity, - ) + ) self.decoder = PeptideDecoder( dim_model=dim_model, n_head=n_head, @@ -131,7 +131,7 @@ def __init__( dropout=dropout, residues=residues, max_charge=max_charge, - ) + ) self.softmax = torch.nn.Softmax(2) self.celoss = torch.nn.CrossEntropyLoss(ignore_index=0) # Optimizer settings. From 29ec6ec58e77b354473977fd5e9854044a72e579 Mon Sep 17 00:00:00 2001 From: Isha Gokhale Date: Tue, 17 Oct 2023 14:27:44 -0700 Subject: [PATCH 4/7] Revert "fixed lint issue" This reverts commit bd1366c6040bcd7cf9c81edd481b8bb68298f0f1. --- casanovo/denovo/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py index a5fdc0d2..c26529de 100644 --- a/casanovo/denovo/model.py +++ b/casanovo/denovo/model.py @@ -122,7 +122,7 @@ def __init__( n_layers=n_layers, dropout=dropout, dim_intensity=dim_intensity, - ) + ) self.decoder = PeptideDecoder( dim_model=dim_model, n_head=n_head, @@ -131,7 +131,7 @@ def __init__( dropout=dropout, residues=residues, max_charge=max_charge, - ) + ) self.softmax = torch.nn.Softmax(2) self.celoss = torch.nn.CrossEntropyLoss(ignore_index=0) # Optimizer settings. From 185ec2b54a13beca2c672ff7b546641431fe84eb Mon Sep 17 00:00:00 2001 From: Isha Gokhale Date: Tue, 17 Oct 2023 14:49:22 -0700 Subject: [PATCH 5/7] lint --- casanovo/denovo/model.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py index c26529de..72ff41be 100644 --- a/casanovo/denovo/model.py +++ b/casanovo/denovo/model.py @@ -122,7 +122,7 @@ def __init__( n_layers=n_layers, dropout=dropout, dim_intensity=dim_intensity, - ) + ) self.decoder = PeptideDecoder( dim_model=dim_model, n_head=n_head, @@ -131,7 +131,7 @@ def __init__( dropout=dropout, residues=residues, max_charge=max_charge, - ) + ) self.softmax = torch.nn.Softmax(2) self.celoss = torch.nn.CrossEntropyLoss(ignore_index=0) # Optimizer settings. @@ -839,6 +839,7 @@ def on_train_epoch_end(self) -> None: "step": self.trainer.global_step, "train": train_loss.item(), } + print("training info:", metrics) self._history.append(metrics) self._log_history() @@ -861,6 +862,7 @@ def on_validation_epoch_end(self) -> None: .detach() .item() ) + print("validation info:", metrics) self._history.append(metrics) self._log_history() From 95aefee7ce10f21793cdccc3a0c6ac30c0243356 Mon Sep 17 00:00:00 2001 From: Isha Gokhale Date: Tue, 17 Oct 2023 14:53:41 -0700 Subject: [PATCH 6/7] lint issue --- casanovo/denovo/model.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py index 72ff41be..a5fdc0d2 100644 --- a/casanovo/denovo/model.py +++ b/casanovo/denovo/model.py @@ -839,7 +839,6 @@ def on_train_epoch_end(self) -> None: "step": self.trainer.global_step, "train": train_loss.item(), } - print("training info:", metrics) self._history.append(metrics) self._log_history() @@ -862,7 +861,6 @@ def on_validation_epoch_end(self) -> None: .detach() .item() ) - print("validation info:", metrics) self._history.append(metrics) self._log_history() From 5cdf662d977852a592e006422ed27ff956af3612 Mon Sep 17 00:00:00 2001 From: Wout Bittremieux Date: Thu, 19 Oct 2023 19:14:15 +0200 Subject: [PATCH 7/7] Consistently format changelog. --- CHANGELOG.md | 14 ++++---------- casanovo/denovo/model.py | 1 + 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6263dba8..73dd7510 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,8 +6,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased] -### Removed -- removed custom_encoder option from config.yaml and in model_runner ### Added - Checkpoints include model parameters, allowing for mismatches with the provided configuration file. @@ -27,22 +25,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - `every_n_train_steps` has been renamed to `val_check_interval` in accordance to the corresponding Pytorch Lightning parameter. - Training batches are randomly shuffled. -### Fixed - -- Casanovo runs on CPU and can passes all tests. -- Enable gradients during prediction and validation to avoid NaNs from occuring as a temporary workaround until a new Pytorch version is available. -- Upgrade to depthcharge v0.2.3 for `PeptideTransformerDecoder` hotfix. - ### Removed - Remove config option for a custom Pytorch Lightning logger. +- Remove superfluous `custom_encoder` config option. ### Fixed -- Casanovo now runs on CPU and can passes all tests. -- Upgrade to Depthcharge v0.2.0 to fix sinusoidal encoding. +- Casanovo runs on CPU and can pass all tests. - Correctly refer to input peak files by their full file path. - Specifying custom residues to retrain Casanovo is now possible. +- Upgrade to depthcharge v0.2.3 to fix sinusoidal encoding and for the `PeptideTransformerDecoder` hotfix. +- Enable gradients during prediction and validation to avoid NaNs from occuring as a temporary workaround until a new Pytorch version is available. ## [3.3.0] - 2023-04-04 diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py index a5fdc0d2..e3f5655d 100644 --- a/casanovo/denovo/model.py +++ b/casanovo/denovo/model.py @@ -115,6 +115,7 @@ def __init__( super().__init__() self.save_hyperparameters() + # Build the model. self.encoder = SpectrumEncoder( dim_model=dim_model, n_head=n_head,