Skip to content

Commit

Permalink
Label smoothing in training (#261)
Browse files Browse the repository at this point in the history
* Add option to change learning rate scheduler and made it easier to add a new one.

* docs

* tests and formatting

* Add label smoothing

* Modify config file

* Minor fix config.yaml

* Run black

* Lint casanovo.py

* Revert "Merge branch 'add_lr_schedule_options' into label-smoothing"

This reverts commit 5716c7a, reversing
changes made to b044bc6.

* Add unit test

* Fix config test and add changelog

---------

Co-authored-by: Justin Sanders <[email protected]>
  • Loading branch information
melihyilmaz and Justin Sanders authored Dec 12, 2023
1 parent e073415 commit 3b688e8
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- `accelerator` parameter controls the accelerator (CPU, GPU, etc) that is used.
- `devices` parameter controls the number of accelerators used.
- `val_check_interval` parameter controls the frequency of both validation epochs and model checkpointing during training.
- `train_label_smoothing` parameter controls the amount of label smoothing applied when calculating the training loss.

### Changed

Expand Down
1 change: 1 addition & 0 deletions casanovo/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class Config:
residues=dict,
n_log=int,
tb_summarywriter=str,
train_label_smoothing=float,
warmup_iters=int,
max_iters=int,
learning_rate=float,
Expand Down
2 changes: 2 additions & 0 deletions casanovo/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ max_iters: 600_000
learning_rate: 5e-4
# Regularization term for weight updates
weight_decay: 1e-5
# Amount of label smoothing when computing the training loss
train_label_smoothing: 0.01

# TRAINING/INFERENCE OPTIONS
# Number of spectra in one training batch
Expand Down
13 changes: 11 additions & 2 deletions casanovo/denovo/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ class Spec2Pep(pl.LightningModule, ModelMixin):
tb_summarywriter: Optional[str]
Folder path to record performance metrics during training. If ``None``,
don't use a ``SummaryWriter``.
train_label_smoothing: float
Smoothing factor when calculating the training loss.
warmup_iters: int
The number of warm up iterations for the learning rate scheduler.
max_iters: int
Expand Down Expand Up @@ -106,6 +108,7 @@ def __init__(
tb_summarywriter: Optional[
torch.utils.tensorboard.SummaryWriter
] = None,
train_label_smoothing: float = 0.01,
warmup_iters: int = 100_000,
max_iters: int = 600_000,
out_writer: Optional[ms_io.MztabWriter] = None,
Expand Down Expand Up @@ -134,7 +137,10 @@ def __init__(
max_charge=max_charge,
)
self.softmax = torch.nn.Softmax(2)
self.celoss = torch.nn.CrossEntropyLoss(ignore_index=0)
self.celoss = torch.nn.CrossEntropyLoss(
ignore_index=0, label_smoothing=train_label_smoothing
)
self.val_celoss = torch.nn.CrossEntropyLoss(ignore_index=0)
# Optimizer settings.
self.warmup_iters = warmup_iters
self.max_iters = max_iters
Expand Down Expand Up @@ -723,7 +729,10 @@ def training_step(
"""
pred, truth = self._forward_step(*batch)
pred = pred[:, :-1, :].reshape(-1, self.decoder.vocab_size + 1)
loss = self.celoss(pred, truth.flatten())
if mode == "train":
loss = self.celoss(pred, truth.flatten())
else:
loss = self.val_celoss(pred, truth.flatten())
self.log(
f"{mode}_CELoss",
loss.detach(),
Expand Down
1 change: 1 addition & 0 deletions casanovo/denovo/model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ def initialize_model(self, train: bool) -> None:
top_match=self.config.top_match,
n_log=self.config.n_log,
tb_summarywriter=self.config.tb_summarywriter,
train_label_smoothing=self.config.train_label_smoothing,
warmup_iters=self.config.warmup_iters,
max_iters=self.config.max_iters,
lr=self.config.learning_rate,
Expand Down
1 change: 1 addition & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ def tiny_config(tmp_path):
"n_head": 2,
"dim_feedforward": 10,
"n_layers": 1,
"train_label_smoothing": 0.01,
"warmup_iters": 1,
"max_iters": 1,
"max_epochs": 20,
Expand Down
19 changes: 16 additions & 3 deletions tests/unit_tests/test_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,15 +514,28 @@ def test_spectrum_id_mzml(mzml_small, tmp_path):

def test_train_val_step_functions():
"""Test train and validation step functions operating on batches."""
model = Spec2Pep(n_beams=1, residues="massivekb", min_peptide_len=4)
model = Spec2Pep(
n_beams=1,
residues="massivekb",
min_peptide_len=4,
train_label_smoothing=0.1,
)
spectra = torch.zeros(1, 5, 2)
precursors = torch.tensor([[469.25364, 2.0, 235.63410]])
peptides = ["PEPK"]
batch = (spectra, precursors, peptides)

train_step_loss = model.training_step(batch)
val_step_loss = model.validation_step(batch)

# Check if valid loss value returned
assert model.training_step(batch) > 0
assert model.validation_step(batch) > 0
assert train_step_loss > 0
assert val_step_loss > 0

# Check if smoothing is applied in training and not in validation
assert model.celoss.label_smoothing == 0.1
assert model.val_celoss.label_smoothing == 0
assert val_step_loss != train_step_loss


def test_run_map(mgf_small):
Expand Down

0 comments on commit 3b688e8

Please sign in to comment.