From 4ca0338995f826e440104f3949240f500f74fed4 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Mon, 11 Nov 2024 20:16:39 +0000
Subject: [PATCH] Migrate to epochs: 1 in all configs

---
 recipes/configs/dev/8B_full_experimental.yaml       | 2 +-
 recipes/configs/gemma/2B_full.yaml                  | 2 +-
 recipes/configs/gemma/2B_lora.yaml                  | 2 +-
 recipes/configs/gemma/2B_lora_single_device.yaml    | 2 +-
 recipes/configs/gemma/2B_qlora_single_device.yaml   | 2 +-
 recipes/configs/gemma/7B_lora.yaml                  | 2 +-
 recipes/configs/gemma/7B_qlora_single_device.yaml   | 2 +-
 recipes/configs/gemma2/27B_lora.yaml                | 2 +-
 recipes/configs/gemma2/27B_qlora_single_device.yaml | 2 +-
 recipes/configs/gemma2/2B_full.yaml                 | 2 +-
 recipes/configs/gemma2/2B_lora.yaml                 | 2 +-
 recipes/configs/gemma2/2B_lora_single_device.yaml   | 2 +-
 recipes/configs/gemma2/2B_qlora_single_device.yaml  | 2 +-
 recipes/configs/gemma2/9B_lora.yaml                 | 2 +-
 recipes/configs/gemma2/9B_qlora_single_device.yaml  | 2 +-
 recipes/configs/llama2/13B_full.yaml                | 2 +-
 recipes/configs/llama2/7B_full.yaml                 | 2 +-
 recipes/configs/llama2/7B_full_low_memory.yaml      | 2 +-
 recipes/configs/llama2/7B_qat_full.yaml             | 2 +-
 recipes/configs/llama3/70B_full.yaml                | 2 +-
 recipes/configs/llama3/8B_full.yaml                 | 2 +-
 recipes/configs/llama3/8B_full_single_device.yaml   | 2 +-
 recipes/configs/llama3/8B_qat_full.yaml             | 2 +-
 recipes/configs/llama3_1/70B_full.yaml              | 2 +-
 recipes/configs/llama3_1/8B_full.yaml               | 2 +-
 recipes/configs/llama3_1/8B_full_single_device.yaml | 2 +-
 recipes/configs/llama3_2/1B_full.yaml               | 2 +-
 recipes/configs/llama3_2/1B_full_single_device.yaml | 2 +-
 recipes/configs/llama3_2/3B_full.yaml               | 2 +-
 recipes/configs/llama3_2/3B_full_single_device.yaml | 2 +-
 recipes/configs/mistral/7B_full.yaml                | 2 +-
 recipes/configs/mistral/7B_full_low_memory.yaml     | 2 +-
 recipes/configs/mistral/7B_lora.yaml                | 2 +-
 recipes/configs/mistral/7B_lora_single_device.yaml  | 2 +-
 recipes/configs/mistral/7B_qlora_single_device.yaml | 2 +-
 recipes/configs/qwen2/1.5B_full.yaml                | 2 +-
 recipes/configs/qwen2_5/1_5B_full.yaml              | 2 +-
 37 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/recipes/configs/dev/8B_full_experimental.yaml b/recipes/configs/dev/8B_full_experimental.yaml
index 288c55e105..c3fb212093 100644
--- a/recipes/configs/dev/8B_full_experimental.yaml
+++ b/recipes/configs/dev/8B_full_experimental.yaml
@@ -48,7 +48,7 @@ resume_from_checkpoint: False
 
 # Fine-tuning arguments
 batch_size: 2
-epochs: 3
+epochs: 1
 
 optimizer:
   _component_: torch.optim.AdamW
diff --git a/recipes/configs/gemma/2B_full.yaml b/recipes/configs/gemma/2B_full.yaml
index 0e34bb205c..5bd0f05a02 100644
--- a/recipes/configs/gemma/2B_full.yaml
+++ b/recipes/configs/gemma/2B_full.yaml
@@ -46,7 +46,7 @@ resume_from_checkpoint: False
 
 # Fine-tuning arguments
 batch_size: 2
-epochs: 3
+epochs: 1
 optimizer:
   _component_: torch.optim.AdamW
   fused: True
diff --git a/recipes/configs/gemma/2B_lora.yaml b/recipes/configs/gemma/2B_lora.yaml
index 9895736c35..d947b358b0 100644
--- a/recipes/configs/gemma/2B_lora.yaml
+++ b/recipes/configs/gemma/2B_lora.yaml
@@ -64,7 +64,7 @@ loss:
 
 # Fine-tuning arguments
 batch_size: 4
-epochs: 3
+epochs: 1
 max_steps_per_epoch: null
 gradient_accumulation_steps: 1  # Use to increase virtual batch size
 compile: False  # pytorch compile, set to true for better perf/memory
diff --git a/recipes/configs/gemma/2B_lora_single_device.yaml b/recipes/configs/gemma/2B_lora_single_device.yaml
index ed7aa11360..0559dc218c 100644
--- a/recipes/configs/gemma/2B_lora_single_device.yaml
+++ b/recipes/configs/gemma/2B_lora_single_device.yaml
@@ -63,7 +63,7 @@ loss:
 
 # Fine-tuning arguments
 batch_size: 4
-epochs: 3
+epochs: 1
 max_steps_per_epoch: null
 gradient_accumulation_steps: 8  # Use to increase virtual batch size
 compile: False  # pytorch compile, set to true for better perf/memory
diff --git a/recipes/configs/gemma/2B_qlora_single_device.yaml b/recipes/configs/gemma/2B_qlora_single_device.yaml
index ea288595ba..a3c7f3a5f9 100644
--- a/recipes/configs/gemma/2B_qlora_single_device.yaml
+++ b/recipes/configs/gemma/2B_qlora_single_device.yaml
@@ -63,7 +63,7 @@ loss:
 
 # Fine-tuning arguments
 batch_size: 4
-epochs: 3
+epochs: 1
 max_steps_per_epoch: null
 gradient_accumulation_steps: 8  # Use to increase virtual batch size
 compile: False  # pytorch compile, set to true for better perf/memory
diff --git a/recipes/configs/gemma/7B_lora.yaml b/recipes/configs/gemma/7B_lora.yaml
index 97685e66e1..a67e9ea3e7 100644
--- a/recipes/configs/gemma/7B_lora.yaml
+++ b/recipes/configs/gemma/7B_lora.yaml
@@ -66,7 +66,7 @@ loss:
 
 # Fine-tuning arguments
 batch_size: 4
-epochs: 3
+epochs: 1
 max_steps_per_epoch: null
 gradient_accumulation_steps: 1  # Use to increase virtual batch size
 compile: False  # pytorch compile, set to true for better perf/memory
diff --git a/recipes/configs/gemma/7B_qlora_single_device.yaml b/recipes/configs/gemma/7B_qlora_single_device.yaml
index 985ab6cae8..471de7572a 100644
--- a/recipes/configs/gemma/7B_qlora_single_device.yaml
+++ b/recipes/configs/gemma/7B_qlora_single_device.yaml
@@ -65,7 +65,7 @@ loss:
 
 # Fine-tuning arguments
 batch_size: 4
-epochs: 3
+epochs: 1
 max_steps_per_epoch: null
 gradient_accumulation_steps: 8  # Use to increase virtual batch size
 compile: False  # pytorch compile, set to true for better perf/memory
diff --git a/recipes/configs/gemma2/27B_lora.yaml b/recipes/configs/gemma2/27B_lora.yaml
index a138441199..2ec94264c9 100644
--- a/recipes/configs/gemma2/27B_lora.yaml
+++ b/recipes/configs/gemma2/27B_lora.yaml
@@ -63,7 +63,7 @@ loss:
 
 # Fine-tuning arguments
 batch_size: 4
-epochs: 3
+epochs: 1
 max_steps_per_epoch: null
 gradient_accumulation_steps: 1
 compile: False  # pytorch compile, set to true for perf/memory improvement
diff --git a/recipes/configs/gemma2/27B_qlora_single_device.yaml b/recipes/configs/gemma2/27B_qlora_single_device.yaml
index 14d9b75ba7..b56f42db9f 100644
--- a/recipes/configs/gemma2/27B_qlora_single_device.yaml
+++ b/recipes/configs/gemma2/27B_qlora_single_device.yaml
@@ -62,7 +62,7 @@ loss:
 
 # Fine-tuning arguments
 batch_size: 4
-epochs: 3
+epochs: 1
 max_steps_per_epoch: null
 gradient_accumulation_steps: 4
 compile: False  # pytorch compile, set to true for perf/memory improvement
diff --git a/recipes/configs/gemma2/2B_full.yaml b/recipes/configs/gemma2/2B_full.yaml
index e302dd759d..b87cf1ccf9 100644
--- a/recipes/configs/gemma2/2B_full.yaml
+++ b/recipes/configs/gemma2/2B_full.yaml
@@ -47,7 +47,7 @@ resume_from_checkpoint: False
 
 # Fine-tuning arguments
 batch_size: 2
-epochs: 3
+epochs: 1
 optimizer:
   _component_: torch.optim.AdamW
   fused: True
diff --git a/recipes/configs/gemma2/2B_lora.yaml b/recipes/configs/gemma2/2B_lora.yaml
index 9a439ee0a3..7569d2cf2f 100644
--- a/recipes/configs/gemma2/2B_lora.yaml
+++ b/recipes/configs/gemma2/2B_lora.yaml
@@ -65,7 +65,7 @@ loss:
 
 # Fine-tuning arguments
 batch_size: 4
-epochs: 3
+epochs: 1
 max_steps_per_epoch: null
 gradient_accumulation_steps: 1
 compile: False  # pytorch compile, set to true for perf/memory improvement
diff --git a/recipes/configs/gemma2/2B_lora_single_device.yaml b/recipes/configs/gemma2/2B_lora_single_device.yaml
index 1a2703fb47..d1fad95114 100644
--- a/recipes/configs/gemma2/2B_lora_single_device.yaml
+++ b/recipes/configs/gemma2/2B_lora_single_device.yaml
@@ -64,7 +64,7 @@ loss:
 
 # Fine-tuning arguments
 batch_size: 8
-epochs: 3
+epochs: 1
 max_steps_per_epoch: null
 gradient_accumulation_steps: 2
 compile: False  # pytorch compile, set to true for perf/memory improvement
diff --git a/recipes/configs/gemma2/2B_qlora_single_device.yaml b/recipes/configs/gemma2/2B_qlora_single_device.yaml
index c2525460ff..880c2d4b81 100644
--- a/recipes/configs/gemma2/2B_qlora_single_device.yaml
+++ b/recipes/configs/gemma2/2B_qlora_single_device.yaml
@@ -64,7 +64,7 @@ loss:
 
 # Fine-tuning arguments
 batch_size: 4
-epochs: 3
+epochs: 1
 max_steps_per_epoch: null
 gradient_accumulation_steps: 4
 compile: False  # pytorch compile, set to true for perf/memory improvement
diff --git a/recipes/configs/gemma2/9B_lora.yaml b/recipes/configs/gemma2/9B_lora.yaml
index 960e4fa881..59c95aa619 100644
--- a/recipes/configs/gemma2/9B_lora.yaml
+++ b/recipes/configs/gemma2/9B_lora.yaml
@@ -63,7 +63,7 @@ loss:
 
 # Fine-tuning arguments
 batch_size: 4
-epochs: 3
+epochs: 1
 max_steps_per_epoch: null
 gradient_accumulation_steps: 1
 compile: False  # pytorch compile, set to true for perf/memory improvement
diff --git a/recipes/configs/gemma2/9B_qlora_single_device.yaml b/recipes/configs/gemma2/9B_qlora_single_device.yaml
index 8991ba9ece..b0dfda2c67 100644
--- a/recipes/configs/gemma2/9B_qlora_single_device.yaml
+++ b/recipes/configs/gemma2/9B_qlora_single_device.yaml
@@ -62,7 +62,7 @@ loss:
 
 # Fine-tuning arguments
 batch_size: 4
-epochs: 3
+epochs: 1
 max_steps_per_epoch: null
 gradient_accumulation_steps: 4
 compile: False  # pytorch compile, set to true for perf/memory improvement
diff --git a/recipes/configs/llama2/13B_full.yaml b/recipes/configs/llama2/13B_full.yaml
index d02ce13c0b..d577559305 100644
--- a/recipes/configs/llama2/13B_full.yaml
+++ b/recipes/configs/llama2/13B_full.yaml
@@ -50,7 +50,7 @@ shuffle: True
 
 # Fine-tuning arguments
 batch_size: 2
-epochs: 3
+epochs: 1
 optimizer:
   _component_: torch.optim.AdamW
   fused: True
diff --git a/recipes/configs/llama2/7B_full.yaml b/recipes/configs/llama2/7B_full.yaml
index 99e7fcc30b..3031538137 100644
--- a/recipes/configs/llama2/7B_full.yaml
+++ b/recipes/configs/llama2/7B_full.yaml
@@ -49,7 +49,7 @@ resume_from_checkpoint: False
 
 # Fine-tuning arguments
 batch_size: 2
-epochs: 3
+epochs: 1
 optimizer:
   _component_: torch.optim.AdamW
   fused: True
diff --git a/recipes/configs/llama2/7B_full_low_memory.yaml b/recipes/configs/llama2/7B_full_low_memory.yaml
index c5300c0a90..07514959db 100644
--- a/recipes/configs/llama2/7B_full_low_memory.yaml
+++ b/recipes/configs/llama2/7B_full_low_memory.yaml
@@ -51,7 +51,7 @@ resume_from_checkpoint: False
 
 # Fine-tuning arguments
 batch_size: 2
-epochs: 3
+epochs: 1
 optimizer:
   _component_: bitsandbytes.optim.PagedAdamW
   lr: 1e-5
diff --git a/recipes/configs/llama2/7B_qat_full.yaml b/recipes/configs/llama2/7B_qat_full.yaml
index e404b0c4dc..1208627f1c 100644
--- a/recipes/configs/llama2/7B_qat_full.yaml
+++ b/recipes/configs/llama2/7B_qat_full.yaml
@@ -45,7 +45,7 @@ resume_from_checkpoint: False
 
 # Fine-tuning arguments
 batch_size: 2
-epochs: 3
+epochs: 1
 optimizer:
   _component_: torch.optim.AdamW
   fused: True
diff --git a/recipes/configs/llama3/70B_full.yaml b/recipes/configs/llama3/70B_full.yaml
index fde65da8c6..7cffc3fce9 100644
--- a/recipes/configs/llama3/70B_full.yaml
+++ b/recipes/configs/llama3/70B_full.yaml
@@ -76,7 +76,7 @@ resume_from_checkpoint: False
 
 # Fine-tuning arguments
 batch_size: 2
-epochs: 3
+epochs: 1
 
 optimizer:
   _component_: torch.optim.AdamW
diff --git a/recipes/configs/llama3/8B_full.yaml b/recipes/configs/llama3/8B_full.yaml
index 4d7f7e7b8e..9a93d59061 100644
--- a/recipes/configs/llama3/8B_full.yaml
+++ b/recipes/configs/llama3/8B_full.yaml
@@ -48,7 +48,7 @@ resume_from_checkpoint: False
 
 # Fine-tuning arguments
 batch_size: 2
-epochs: 3
+epochs: 1
 
 optimizer:
   _component_: torch.optim.AdamW
diff --git a/recipes/configs/llama3/8B_full_single_device.yaml b/recipes/configs/llama3/8B_full_single_device.yaml
index 26f635fac0..b3fee76295 100644
--- a/recipes/configs/llama3/8B_full_single_device.yaml
+++ b/recipes/configs/llama3/8B_full_single_device.yaml
@@ -50,7 +50,7 @@ resume_from_checkpoint: False
 
 # Fine-tuning arguments
 batch_size: 2
-epochs: 3
+epochs: 1
 optimizer:
   _component_: bitsandbytes.optim.PagedAdamW8bit
   lr: 1e-5
diff --git a/recipes/configs/llama3/8B_qat_full.yaml b/recipes/configs/llama3/8B_qat_full.yaml
index 2b08cbb10f..274ee0eae0 100644
--- a/recipes/configs/llama3/8B_qat_full.yaml
+++ b/recipes/configs/llama3/8B_qat_full.yaml
@@ -43,7 +43,7 @@ resume_from_checkpoint: False
 
 # Fine-tuning arguments
 batch_size: 2
-epochs: 3
+epochs: 1
 
 # QAT arguments
 quantizer:
diff --git a/recipes/configs/llama3_1/70B_full.yaml b/recipes/configs/llama3_1/70B_full.yaml
index 8e70706414..d92fcef1f6 100644
--- a/recipes/configs/llama3_1/70B_full.yaml
+++ b/recipes/configs/llama3_1/70B_full.yaml
@@ -75,7 +75,7 @@ resume_from_checkpoint: False
 
 # Fine-tuning arguments
 batch_size: 2
-epochs: 3
+epochs: 1
 
 optimizer:
   _component_: torch.optim.AdamW
diff --git a/recipes/configs/llama3_1/8B_full.yaml b/recipes/configs/llama3_1/8B_full.yaml
index b85c70ed1c..32aff922cf 100644
--- a/recipes/configs/llama3_1/8B_full.yaml
+++ b/recipes/configs/llama3_1/8B_full.yaml
@@ -51,7 +51,7 @@ resume_from_checkpoint: False
 
 # Fine-tuning arguments
 batch_size: 2
-epochs: 3
+epochs: 1
 
 optimizer:
   _component_: torch.optim.AdamW
diff --git a/recipes/configs/llama3_1/8B_full_single_device.yaml b/recipes/configs/llama3_1/8B_full_single_device.yaml
index 7e06ca4a6d..66f397e1df 100644
--- a/recipes/configs/llama3_1/8B_full_single_device.yaml
+++ b/recipes/configs/llama3_1/8B_full_single_device.yaml
@@ -53,7 +53,7 @@ resume_from_checkpoint: False
 
 # Fine-tuning arguments
 batch_size: 2
-epochs: 3
+epochs: 1
 optimizer:
   _component_: bitsandbytes.optim.PagedAdamW8bit
   lr: 2e-5
diff --git a/recipes/configs/llama3_2/1B_full.yaml b/recipes/configs/llama3_2/1B_full.yaml
index 437c222d28..56fc968b0d 100644
--- a/recipes/configs/llama3_2/1B_full.yaml
+++ b/recipes/configs/llama3_2/1B_full.yaml
@@ -48,7 +48,7 @@ resume_from_checkpoint: False
 
 # Fine-tuning arguments
 batch_size: 4
-epochs: 3
+epochs: 1
 
 optimizer:
   _component_: torch.optim.AdamW
diff --git a/recipes/configs/llama3_2/1B_full_single_device.yaml b/recipes/configs/llama3_2/1B_full_single_device.yaml
index 4f367f03a5..e2aa1c110b 100644
--- a/recipes/configs/llama3_2/1B_full_single_device.yaml
+++ b/recipes/configs/llama3_2/1B_full_single_device.yaml
@@ -50,7 +50,7 @@ resume_from_checkpoint: False
 
 # Fine-tuning arguments
 batch_size: 4
-epochs: 3
+epochs: 1
 optimizer:
   _component_: bitsandbytes.optim.PagedAdamW8bit
   lr: 2e-5
diff --git a/recipes/configs/llama3_2/3B_full.yaml b/recipes/configs/llama3_2/3B_full.yaml
index 54f810c33a..4128bb58e7 100644
--- a/recipes/configs/llama3_2/3B_full.yaml
+++ b/recipes/configs/llama3_2/3B_full.yaml
@@ -49,7 +49,7 @@ resume_from_checkpoint: False
 
 # Fine-tuning arguments
 batch_size: 4
-epochs: 3
+epochs: 1
 
 optimizer:
   _component_: torch.optim.AdamW
diff --git a/recipes/configs/llama3_2/3B_full_single_device.yaml b/recipes/configs/llama3_2/3B_full_single_device.yaml
index cffa1fb83e..ebc49ae1fb 100644
--- a/recipes/configs/llama3_2/3B_full_single_device.yaml
+++ b/recipes/configs/llama3_2/3B_full_single_device.yaml
@@ -51,7 +51,7 @@ resume_from_checkpoint: False
 
 # Fine-tuning arguments
 batch_size: 4
-epochs: 3
+epochs: 1
 optimizer:
   _component_: bitsandbytes.optim.PagedAdamW8bit
   lr: 2e-5
diff --git a/recipes/configs/mistral/7B_full.yaml b/recipes/configs/mistral/7B_full.yaml
index 2452ef275b..e025cee824 100644
--- a/recipes/configs/mistral/7B_full.yaml
+++ b/recipes/configs/mistral/7B_full.yaml
@@ -52,7 +52,7 @@ resume_from_checkpoint: False
 
 # Fine-tuning arguments
 batch_size: 2
-epochs: 3
+epochs: 1
 optimizer:
   _component_: torch.optim.AdamW
   fused: True
diff --git a/recipes/configs/mistral/7B_full_low_memory.yaml b/recipes/configs/mistral/7B_full_low_memory.yaml
index 7ae9f916ab..dbd8a9bae5 100644
--- a/recipes/configs/mistral/7B_full_low_memory.yaml
+++ b/recipes/configs/mistral/7B_full_low_memory.yaml
@@ -54,7 +54,7 @@ resume_from_checkpoint: False
 
 # Fine-tuning arguments
 batch_size: 2
-epochs: 3
+epochs: 1
 optimizer:
   _component_: bitsandbytes.optim.PagedAdamW
   lr: 5e-6
diff --git a/recipes/configs/mistral/7B_lora.yaml b/recipes/configs/mistral/7B_lora.yaml
index 2724a0754d..30919c13e8 100644
--- a/recipes/configs/mistral/7B_lora.yaml
+++ b/recipes/configs/mistral/7B_lora.yaml
@@ -72,7 +72,7 @@ loss:
 
 # Fine-tuning arguments
 batch_size: 4
-epochs: 3
+epochs: 1
 max_steps_per_epoch: null
 gradient_accumulation_steps: 1  # Use to increase virtual batch size
 compile: False  # pytorch compile, set to true for better perf/memory
diff --git a/recipes/configs/mistral/7B_lora_single_device.yaml b/recipes/configs/mistral/7B_lora_single_device.yaml
index be143ce480..b6d2492bf1 100644
--- a/recipes/configs/mistral/7B_lora_single_device.yaml
+++ b/recipes/configs/mistral/7B_lora_single_device.yaml
@@ -69,7 +69,7 @@ loss:
 
 # Fine-tuning arguments
 batch_size: 4
-epochs: 3
+epochs: 1
 max_steps_per_epoch: null
 gradient_accumulation_steps: 8  # Use to increase virtual batch size
 compile: False  # pytorch compile, set to true for better perf/memory
diff --git a/recipes/configs/mistral/7B_qlora_single_device.yaml b/recipes/configs/mistral/7B_qlora_single_device.yaml
index b3c1337901..c0252fcb32 100644
--- a/recipes/configs/mistral/7B_qlora_single_device.yaml
+++ b/recipes/configs/mistral/7B_qlora_single_device.yaml
@@ -70,7 +70,7 @@ loss:
 
 # Fine-tuning arguments
 batch_size: 4
-epochs: 3
+epochs: 1
 max_steps_per_epoch: null
 gradient_accumulation_steps: 8  # Use to increase virtual batch size
 compile: False  # pytorch compile, set to true for better perf/memory
diff --git a/recipes/configs/qwen2/1.5B_full.yaml b/recipes/configs/qwen2/1.5B_full.yaml
index bae27e0a70..812ea45b10 100644
--- a/recipes/configs/qwen2/1.5B_full.yaml
+++ b/recipes/configs/qwen2/1.5B_full.yaml
@@ -48,7 +48,7 @@ resume_from_checkpoint: False
 
 # Fine-tuning arguments
 batch_size: 2
-epochs: 3
+epochs: 1
 optimizer:
   _component_: torch.optim.AdamW
   fused: True
diff --git a/recipes/configs/qwen2_5/1_5B_full.yaml b/recipes/configs/qwen2_5/1_5B_full.yaml
index be01ab8670..13999e478d 100644
--- a/recipes/configs/qwen2_5/1_5B_full.yaml
+++ b/recipes/configs/qwen2_5/1_5B_full.yaml
@@ -48,7 +48,7 @@ resume_from_checkpoint: False
 
 # Fine-tuning arguments
 batch_size: 2
-epochs: 3
+epochs: 1
 optimizer:
   _component_: torch.optim.AdamW
   fused: True