From c3161a46032dac8f98fad40c19008a71e37943af Mon Sep 17 00:00:00 2001 From: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> Date: Tue, 9 Apr 2024 10:48:56 -0700 Subject: [PATCH] Copy logs file Signed-off-by: Thara Palanivel <130496890+tharapalanivel@users.noreply.github.com> --- build/launch_training.py | 8 +++++--- tuning/sft_trainer.py | 4 +++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/build/launch_training.py b/build/launch_training.py index db5cadf5c..96fee7872 100644 --- a/build/launch_training.py +++ b/build/launch_training.py @@ -107,9 +107,11 @@ def main(): dirs_exist_ok=True, ) - # copy over any loss logs - for file in glob.glob(f"{training_args.output_dir}/*loss.jsonl"): - shutil.copy(file, original_output_dir) + # copy over logs + shutil.copy( + os.path.join(training_args.output_dir, sft_trainer.TRAINING_LOGS_FILENAME), + original_output_dir, + ) if __name__ == "__main__": diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py index d6e6973a8..5583a2dfa 100644 --- a/tuning/sft_trainer.py +++ b/tuning/sft_trainer.py @@ -48,6 +48,8 @@ # Local from tuning.aim_loader import get_aimstack_callback +TRAINING_LOGS_FILENAME = "training_logs.jsonl" + class FileLoggingCallback(TrainerCallback): """Exports metrics, e.g., training loss to a file in the checkpoint directory.""" @@ -64,7 +66,7 @@ def on_log(self, args, state, control, logs=None, **kwargs): if not state.is_world_process_zero: return - log_file_path = os.path.join(args.output_dir, "training_logs.jsonl") + log_file_path = os.path.join(args.output_dir, TRAINING_LOGS_FILENAME) if logs is not None and "loss" in logs and "epoch" in logs: self._track_loss("loss", "training_loss", log_file_path, logs, state) elif logs is not None and "eval_loss" in logs and "epoch" in logs: