Label smoothing in training (#261)

* Add option to change learning rate scheduler and made it easier to add a new one. * docs * tests and formatting * Add label smoothing * Modify config file * Minor fix config.yaml * Run black * Lint casanovo.py * Revert "Merge branch 'add_lr_schedule_options' into label-smoothing" This reverts commit 5716c7a, reversing changes made to b044bc6. * Add unit test * Fix config test and add changelog --------- Co-authored-by: Justin Sanders <[email protected]>
Noble-Lab · Dec 12, 2023 · 3b688e8 · 3b688e8
1 parent e073415
commit 3b688e8
Show file tree

Hide file tree

Showing 7 changed files with 33 additions and 5 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - `accelerator` parameter controls the accelerator (CPU, GPU, etc) that is used.
 - `devices` parameter controls the number of accelerators used.
 - `val_check_interval` parameter controls the frequency of both validation epochs and model checkpointing during training.
+- `train_label_smoothing` parameter controls the amount of label smoothing applied when calculating the training loss.
 
 ### Changed
 

diff --git a/casanovo/config.py b/casanovo/config.py
@@ -53,6 +53,7 @@ class Config:
         residues=dict,
         n_log=int,
         tb_summarywriter=str,
+        train_label_smoothing=float,
         warmup_iters=int,
         max_iters=int,
         learning_rate=float,

diff --git a/casanovo/config.yaml b/casanovo/config.yaml
@@ -89,6 +89,8 @@ max_iters: 600_000
 learning_rate: 5e-4
 # Regularization term for weight updates
 weight_decay: 1e-5
+# Amount of label smoothing when computing the training loss
+train_label_smoothing: 0.01
 
 # TRAINING/INFERENCE OPTIONS
 # Number of spectra in one training batch

diff --git a/casanovo/denovo/model.py b/casanovo/denovo/model.py
@@ -73,6 +73,8 @@ class Spec2Pep(pl.LightningModule, ModelMixin):
     tb_summarywriter: Optional[str]
         Folder path to record performance metrics during training. If ``None``,
         don't use a ``SummaryWriter``.
+    train_label_smoothing: float
+        Smoothing factor when calculating the training loss.
     warmup_iters: int
         The number of warm up iterations for the learning rate scheduler.
     max_iters: int
@@ -106,6 +108,7 @@ def __init__(
         tb_summarywriter: Optional[
             torch.utils.tensorboard.SummaryWriter
         ] = None,
+        train_label_smoothing: float = 0.01,
         warmup_iters: int = 100_000,
         max_iters: int = 600_000,
         out_writer: Optional[ms_io.MztabWriter] = None,
@@ -134,7 +137,10 @@ def __init__(
             max_charge=max_charge,
         )
         self.softmax = torch.nn.Softmax(2)
-        self.celoss = torch.nn.CrossEntropyLoss(ignore_index=0)
+        self.celoss = torch.nn.CrossEntropyLoss(
+            ignore_index=0, label_smoothing=train_label_smoothing
+        )
+        self.val_celoss = torch.nn.CrossEntropyLoss(ignore_index=0)
         # Optimizer settings.
         self.warmup_iters = warmup_iters
         self.max_iters = max_iters
@@ -723,7 +729,10 @@ def training_step(
         """
         pred, truth = self._forward_step(*batch)
         pred = pred[:, :-1, :].reshape(-1, self.decoder.vocab_size + 1)
-        loss = self.celoss(pred, truth.flatten())
+        if mode == "train":
+            loss = self.celoss(pred, truth.flatten())
+        else:
+            loss = self.val_celoss(pred, truth.flatten())
         self.log(
             f"{mode}_CELoss",
             loss.detach(),

diff --git a/casanovo/denovo/model_runner.py b/casanovo/denovo/model_runner.py
@@ -221,6 +221,7 @@ def initialize_model(self, train: bool) -> None:
             top_match=self.config.top_match,
             n_log=self.config.n_log,
             tb_summarywriter=self.config.tb_summarywriter,
+            train_label_smoothing=self.config.train_label_smoothing,
             warmup_iters=self.config.warmup_iters,
             max_iters=self.config.max_iters,
             lr=self.config.learning_rate,

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -190,6 +190,7 @@ def tiny_config(tmp_path):
         "n_head": 2,
         "dim_feedforward": 10,
         "n_layers": 1,
+        "train_label_smoothing": 0.01,
         "warmup_iters": 1,
         "max_iters": 1,
         "max_epochs": 20,

diff --git a/tests/unit_tests/test_unit.py b/tests/unit_tests/test_unit.py
@@ -514,15 +514,28 @@ def test_spectrum_id_mzml(mzml_small, tmp_path):
 
 def test_train_val_step_functions():
     """Test train and validation step functions operating on batches."""
-    model = Spec2Pep(n_beams=1, residues="massivekb", min_peptide_len=4)
+    model = Spec2Pep(
+        n_beams=1,
+        residues="massivekb",
+        min_peptide_len=4,
+        train_label_smoothing=0.1,
+    )
     spectra = torch.zeros(1, 5, 2)
     precursors = torch.tensor([[469.25364, 2.0, 235.63410]])
     peptides = ["PEPK"]
     batch = (spectra, precursors, peptides)
 
+    train_step_loss = model.training_step(batch)
+    val_step_loss = model.validation_step(batch)
+
     # Check if valid loss value returned
-    assert model.training_step(batch) > 0
-    assert model.validation_step(batch) > 0
+    assert train_step_loss > 0
+    assert val_step_loss > 0
+
+    # Check if smoothing is applied in training and not in validation
+    assert model.celoss.label_smoothing == 0.1
+    assert model.val_celoss.label_smoothing == 0
+    assert val_step_loss != train_step_loss
 
 
 def test_run_map(mgf_small):