Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Log optimizer and training metrics to CSV file #376

Merged
merged 8 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion casanovo/casanovo.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def sequence(

runner.predict(
peak_path,
str((output_path / output_root).with_suffix(".mztab")),
str((output_path / output_root_name).with_suffix(".mztab")),
bittremieux marked this conversation as resolved.
Show resolved Hide resolved
evaluate=evaluate,
)
psms = runner.writer.psms
Expand Down
2 changes: 2 additions & 0 deletions casanovo/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ class Config:
residues=dict,
n_log=int,
tb_summarywriter=bool,
log_metrics=bool,
log_every_n_steps=int,
train_label_smoothing=float,
warmup_iters=int,
cosine_schedule_period_iters=int,
Expand Down
4 changes: 4 additions & 0 deletions casanovo/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ random_seed: 454
n_log: 1
# Whether to create tensorboard directory
tb_summarywriter: false
# Whether to create csv_logs directory
log_metrics: false
# How often to log optimizer parameters in steps
log_every_n_steps: 50
# Model validation and checkpointing frequency in training steps.
val_check_interval: 50_000

Expand Down
35 changes: 34 additions & 1 deletion casanovo/denovo/model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@

import depthcharge.masses
import lightning.pytorch as pl
import lightning.pytorch.loggers
import numpy as np
import torch
from depthcharge.data import AnnotatedSpectrumIndex, SpectrumIndex
from lightning.pytorch.strategies import DDPStrategy
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.callbacks import ModelCheckpoint, LearningRateMonitor

from .. import utils
from ..config import Config
Expand Down Expand Up @@ -63,6 +64,8 @@ def __init__(
self.config = config
self.model_filename = model_filename
self.output_dir = output_dir
self.output_rootname = output_rootname
self.overwrite_ckpt_check = overwrite_ckpt_check

# Initialized later:
self.tmp_dir = None
Expand Down Expand Up @@ -105,6 +108,7 @@ def __init__(
filename=best_filename,
enable_version_counter=False,
),
LearningRateMonitor(log_momentum=True, log_weight_decay=True),
]

def __enter__(self):
Expand Down Expand Up @@ -255,7 +259,36 @@ def initialize_trainer(self, train: bool) -> None:
strategy=self._get_strategy(),
val_check_interval=self.config.val_check_interval,
check_val_every_n_epoch=None,
log_every_n_steps=self.config.get("log_every_n_steps"),
)

if self.config.get("log_metrics"):
if not self.output_dir:
logger.warning(
"Output directory not set in model runner. "
"No loss file will be created."
)
else:
csv_log_dir = "csv_logs"
if self.overwrite_ckpt_check:
utils.check_dir_file_exists(
self.output_dir,
bittremieux marked this conversation as resolved.
Show resolved Hide resolved
csv_log_dir,
)

additional_cfg.update(
{
"logger": lightning.pytorch.loggers.CSVLogger(
self.output_dir,
version=csv_log_dir,
name=None,
),
"log_every_n_steps": self.config.get(
"log_every_n_steps"
),
}
)

trainer_cfg.update(additional_cfg)

self.trainer = pl.Trainer(**trainer_cfg)
Expand Down
4 changes: 3 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,9 @@ def tiny_config(tmp_path):
"devices": None,
"random_seed": 454,
"n_log": 1,
"tb_summarywriter": None,
"tb_summarywriter": False,
"log_metrics": False,
"log_every_n_steps": 50,
"n_peaks": 150,
"min_mz": 50.0,
"max_mz": 2500.0,
Expand Down
39 changes: 39 additions & 0 deletions tests/unit_tests/test_runner.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Unit tests specifically for the model_runner module."""

import shutil
from pathlib import Path

import pytest
Expand Down Expand Up @@ -282,3 +283,41 @@ def test_evaluate(
)

result_file.unlink()


def test_metrics_logging(tmp_path, mgf_small, tiny_config):
config = Config(tiny_config)
config._user_config["log_metrics"] = True
config._user_config["log_every_n_steps"] = 1
config.tb_summarywriter = True
config.max_epochs = 1

curr_model_path = tmp_path / "foo.epoch=0-step=1.ckpt"
best_model_path = tmp_path / "foo.best.ckpt"
tb_path = tmp_path / "tensorboard"
csv_path = tmp_path / "csv_logs"

with ModelRunner(
config, output_dir=tmp_path, output_rootname="foo"
) as runner:
runner.train([mgf_small], [mgf_small])

assert curr_model_path.is_file()
assert best_model_path.is_file()
assert tb_path.is_dir()
assert csv_path.is_dir()

curr_model_path.unlink()
best_model_path.unlink()
shutil.rmtree(tb_path)

with pytest.raises(FileExistsError):
with ModelRunner(
config, output_dir=tmp_path, output_rootname="foo"
) as runner:
runner.train([mgf_small], [mgf_small])

assert not curr_model_path.is_file()
assert not best_model_path.is_file()
assert not tb_path.is_dir()
assert csv_path.is_dir()
Loading