Skip to content

Commit

Permalink
Log optimizer and training metrics to CSV file (#376)
Browse files Browse the repository at this point in the history
* csv logger

* optimizer metrics logger

* metrics logging unit tests

* config item retrieval, additional requested changes

* Generate new screengrabs with rich-codex

* changelog update

* Generate new screengrabs with rich-codex

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
  • Loading branch information
Lilferrit and github-actions[bot] authored Sep 20, 2024
1 parent aefa73c commit 34c456d
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 3 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

- During training, model checkpoints will be saved at the end of each training epoch in addition to the checkpoints saved at the end of every validation run.
- Besides as a local file, model weights can be specified from a URL. Upon initial download, the weights file is cached for future re-use.
- Training and optimizer metrics can now be logged to a CSV file by setting the `log_metrics` config file option to true - the CSV file will be written to under a sub-directory of the output directory named `csv_logs`.

### Changed

- Removed the `evaluate` sub-command, and all model evaluation functionality has been moved to the `sequence` command using the new `--evaluate` flag.
- The `--output` option has been split into two options, `--output_dir` and `--output_root`.
- The `--validation_peak_path` is now optional when training; if `--validation_peak_path` is not set then the `train_peak_path` will also be used for validation.
- The `tb_summarywriter` config option is now a boolean config option, and if set to true the TensorBoard summary will be written to a sub-directory of the output directory named `tensorboard`.

### Fixed

Expand Down
2 changes: 1 addition & 1 deletion casanovo/casanovo.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def sequence(

runner.predict(
peak_path,
str((output_path / output_root).with_suffix(".mztab")),
str((output_path / output_root_name).with_suffix(".mztab")),
evaluate=evaluate,
)
psms = runner.writer.psms
Expand Down
2 changes: 2 additions & 0 deletions casanovo/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ class Config:
residues=dict,
n_log=int,
tb_summarywriter=bool,
log_metrics=bool,
log_every_n_steps=int,
train_label_smoothing=float,
warmup_iters=int,
cosine_schedule_period_iters=int,
Expand Down
4 changes: 4 additions & 0 deletions casanovo/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ random_seed: 454
n_log: 1
# Whether to create tensorboard directory
tb_summarywriter: false
# Whether to create csv_logs directory
log_metrics: false
# How often to log optimizer parameters in steps
log_every_n_steps: 50
# Model validation and checkpointing frequency in training steps.
val_check_interval: 50_000

Expand Down
35 changes: 34 additions & 1 deletion casanovo/denovo/model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@

import depthcharge.masses
import lightning.pytorch as pl
import lightning.pytorch.loggers
import numpy as np
import torch
from depthcharge.data import AnnotatedSpectrumIndex, SpectrumIndex
from lightning.pytorch.strategies import DDPStrategy
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.callbacks import ModelCheckpoint, LearningRateMonitor

from .. import utils
from ..config import Config
Expand Down Expand Up @@ -63,6 +64,8 @@ def __init__(
self.config = config
self.model_filename = model_filename
self.output_dir = output_dir
self.output_rootname = output_rootname
self.overwrite_ckpt_check = overwrite_ckpt_check

# Initialized later:
self.tmp_dir = None
Expand Down Expand Up @@ -105,6 +108,7 @@ def __init__(
filename=best_filename,
enable_version_counter=False,
),
LearningRateMonitor(log_momentum=True, log_weight_decay=True),
]

def __enter__(self):
Expand Down Expand Up @@ -255,7 +259,36 @@ def initialize_trainer(self, train: bool) -> None:
strategy=self._get_strategy(),
val_check_interval=self.config.val_check_interval,
check_val_every_n_epoch=None,
log_every_n_steps=self.config.get("log_every_n_steps"),
)

if self.config.get("log_metrics"):
if not self.output_dir:
logger.warning(
"Output directory not set in model runner. "
"No loss file will be created."
)
else:
csv_log_dir = "csv_logs"
if self.overwrite_ckpt_check:
utils.check_dir_file_exists(
self.output_dir,
csv_log_dir,
)

additional_cfg.update(
{
"logger": lightning.pytorch.loggers.CSVLogger(
self.output_dir,
version=csv_log_dir,
name=None,
),
"log_every_n_steps": self.config.get(
"log_every_n_steps"
),
}
)

trainer_cfg.update(additional_cfg)

self.trainer = pl.Trainer(**trainer_cfg)
Expand Down
4 changes: 3 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,9 @@ def tiny_config(tmp_path):
"devices": None,
"random_seed": 454,
"n_log": 1,
"tb_summarywriter": None,
"tb_summarywriter": False,
"log_metrics": False,
"log_every_n_steps": 50,
"n_peaks": 150,
"min_mz": 50.0,
"max_mz": 2500.0,
Expand Down
39 changes: 39 additions & 0 deletions tests/unit_tests/test_runner.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Unit tests specifically for the model_runner module."""

import shutil
from pathlib import Path

import pytest
Expand Down Expand Up @@ -282,3 +283,41 @@ def test_evaluate(
)

result_file.unlink()


def test_metrics_logging(tmp_path, mgf_small, tiny_config):
config = Config(tiny_config)
config._user_config["log_metrics"] = True
config._user_config["log_every_n_steps"] = 1
config.tb_summarywriter = True
config.max_epochs = 1

curr_model_path = tmp_path / "foo.epoch=0-step=1.ckpt"
best_model_path = tmp_path / "foo.best.ckpt"
tb_path = tmp_path / "tensorboard"
csv_path = tmp_path / "csv_logs"

with ModelRunner(
config, output_dir=tmp_path, output_rootname="foo"
) as runner:
runner.train([mgf_small], [mgf_small])

assert curr_model_path.is_file()
assert best_model_path.is_file()
assert tb_path.is_dir()
assert csv_path.is_dir()

curr_model_path.unlink()
best_model_path.unlink()
shutil.rmtree(tb_path)

with pytest.raises(FileExistsError):
with ModelRunner(
config, output_dir=tmp_path, output_rootname="foo"
) as runner:
runner.train([mgf_small], [mgf_small])

assert not curr_model_path.is_file()
assert not best_model_path.is_file()
assert not tb_path.is_dir()
assert csv_path.is_dir()

0 comments on commit 34c456d

Please sign in to comment.