From 4ca0338995f826e440104f3949240f500f74fed4 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Mon, 11 Nov 2024 20:16:39 +0000 Subject: [PATCH] Migrate to epochs: 1 in all configs --- recipes/configs/dev/8B_full_experimental.yaml | 2 +- recipes/configs/gemma/2B_full.yaml | 2 +- recipes/configs/gemma/2B_lora.yaml | 2 +- recipes/configs/gemma/2B_lora_single_device.yaml | 2 +- recipes/configs/gemma/2B_qlora_single_device.yaml | 2 +- recipes/configs/gemma/7B_lora.yaml | 2 +- recipes/configs/gemma/7B_qlora_single_device.yaml | 2 +- recipes/configs/gemma2/27B_lora.yaml | 2 +- recipes/configs/gemma2/27B_qlora_single_device.yaml | 2 +- recipes/configs/gemma2/2B_full.yaml | 2 +- recipes/configs/gemma2/2B_lora.yaml | 2 +- recipes/configs/gemma2/2B_lora_single_device.yaml | 2 +- recipes/configs/gemma2/2B_qlora_single_device.yaml | 2 +- recipes/configs/gemma2/9B_lora.yaml | 2 +- recipes/configs/gemma2/9B_qlora_single_device.yaml | 2 +- recipes/configs/llama2/13B_full.yaml | 2 +- recipes/configs/llama2/7B_full.yaml | 2 +- recipes/configs/llama2/7B_full_low_memory.yaml | 2 +- recipes/configs/llama2/7B_qat_full.yaml | 2 +- recipes/configs/llama3/70B_full.yaml | 2 +- recipes/configs/llama3/8B_full.yaml | 2 +- recipes/configs/llama3/8B_full_single_device.yaml | 2 +- recipes/configs/llama3/8B_qat_full.yaml | 2 +- recipes/configs/llama3_1/70B_full.yaml | 2 +- recipes/configs/llama3_1/8B_full.yaml | 2 +- recipes/configs/llama3_1/8B_full_single_device.yaml | 2 +- recipes/configs/llama3_2/1B_full.yaml | 2 +- recipes/configs/llama3_2/1B_full_single_device.yaml | 2 +- recipes/configs/llama3_2/3B_full.yaml | 2 +- recipes/configs/llama3_2/3B_full_single_device.yaml | 2 +- recipes/configs/mistral/7B_full.yaml | 2 +- recipes/configs/mistral/7B_full_low_memory.yaml | 2 +- recipes/configs/mistral/7B_lora.yaml | 2 +- recipes/configs/mistral/7B_lora_single_device.yaml | 2 +- recipes/configs/mistral/7B_qlora_single_device.yaml | 2 +- recipes/configs/qwen2/1.5B_full.yaml | 2 +- recipes/configs/qwen2_5/1_5B_full.yaml | 2 +- 37 files changed, 37 insertions(+), 37 deletions(-) diff --git a/recipes/configs/dev/8B_full_experimental.yaml b/recipes/configs/dev/8B_full_experimental.yaml index 288c55e105..c3fb212093 100644 --- a/recipes/configs/dev/8B_full_experimental.yaml +++ b/recipes/configs/dev/8B_full_experimental.yaml @@ -48,7 +48,7 @@ resume_from_checkpoint: False # Fine-tuning arguments batch_size: 2 -epochs: 3 +epochs: 1 optimizer: _component_: torch.optim.AdamW diff --git a/recipes/configs/gemma/2B_full.yaml b/recipes/configs/gemma/2B_full.yaml index 0e34bb205c..5bd0f05a02 100644 --- a/recipes/configs/gemma/2B_full.yaml +++ b/recipes/configs/gemma/2B_full.yaml @@ -46,7 +46,7 @@ resume_from_checkpoint: False # Fine-tuning arguments batch_size: 2 -epochs: 3 +epochs: 1 optimizer: _component_: torch.optim.AdamW fused: True diff --git a/recipes/configs/gemma/2B_lora.yaml b/recipes/configs/gemma/2B_lora.yaml index 9895736c35..d947b358b0 100644 --- a/recipes/configs/gemma/2B_lora.yaml +++ b/recipes/configs/gemma/2B_lora.yaml @@ -64,7 +64,7 @@ loss: # Fine-tuning arguments batch_size: 4 -epochs: 3 +epochs: 1 max_steps_per_epoch: null gradient_accumulation_steps: 1 # Use to increase virtual batch size compile: False # pytorch compile, set to true for better perf/memory diff --git a/recipes/configs/gemma/2B_lora_single_device.yaml b/recipes/configs/gemma/2B_lora_single_device.yaml index ed7aa11360..0559dc218c 100644 --- a/recipes/configs/gemma/2B_lora_single_device.yaml +++ b/recipes/configs/gemma/2B_lora_single_device.yaml @@ -63,7 +63,7 @@ loss: # Fine-tuning arguments batch_size: 4 -epochs: 3 +epochs: 1 max_steps_per_epoch: null gradient_accumulation_steps: 8 # Use to increase virtual batch size compile: False # pytorch compile, set to true for better perf/memory diff --git a/recipes/configs/gemma/2B_qlora_single_device.yaml b/recipes/configs/gemma/2B_qlora_single_device.yaml index ea288595ba..a3c7f3a5f9 100644 --- a/recipes/configs/gemma/2B_qlora_single_device.yaml +++ b/recipes/configs/gemma/2B_qlora_single_device.yaml @@ -63,7 +63,7 @@ loss: # Fine-tuning arguments batch_size: 4 -epochs: 3 +epochs: 1 max_steps_per_epoch: null gradient_accumulation_steps: 8 # Use to increase virtual batch size compile: False # pytorch compile, set to true for better perf/memory diff --git a/recipes/configs/gemma/7B_lora.yaml b/recipes/configs/gemma/7B_lora.yaml index 97685e66e1..a67e9ea3e7 100644 --- a/recipes/configs/gemma/7B_lora.yaml +++ b/recipes/configs/gemma/7B_lora.yaml @@ -66,7 +66,7 @@ loss: # Fine-tuning arguments batch_size: 4 -epochs: 3 +epochs: 1 max_steps_per_epoch: null gradient_accumulation_steps: 1 # Use to increase virtual batch size compile: False # pytorch compile, set to true for better perf/memory diff --git a/recipes/configs/gemma/7B_qlora_single_device.yaml b/recipes/configs/gemma/7B_qlora_single_device.yaml index 985ab6cae8..471de7572a 100644 --- a/recipes/configs/gemma/7B_qlora_single_device.yaml +++ b/recipes/configs/gemma/7B_qlora_single_device.yaml @@ -65,7 +65,7 @@ loss: # Fine-tuning arguments batch_size: 4 -epochs: 3 +epochs: 1 max_steps_per_epoch: null gradient_accumulation_steps: 8 # Use to increase virtual batch size compile: False # pytorch compile, set to true for better perf/memory diff --git a/recipes/configs/gemma2/27B_lora.yaml b/recipes/configs/gemma2/27B_lora.yaml index a138441199..2ec94264c9 100644 --- a/recipes/configs/gemma2/27B_lora.yaml +++ b/recipes/configs/gemma2/27B_lora.yaml @@ -63,7 +63,7 @@ loss: # Fine-tuning arguments batch_size: 4 -epochs: 3 +epochs: 1 max_steps_per_epoch: null gradient_accumulation_steps: 1 compile: False # pytorch compile, set to true for perf/memory improvement diff --git a/recipes/configs/gemma2/27B_qlora_single_device.yaml b/recipes/configs/gemma2/27B_qlora_single_device.yaml index 14d9b75ba7..b56f42db9f 100644 --- a/recipes/configs/gemma2/27B_qlora_single_device.yaml +++ b/recipes/configs/gemma2/27B_qlora_single_device.yaml @@ -62,7 +62,7 @@ loss: # Fine-tuning arguments batch_size: 4 -epochs: 3 +epochs: 1 max_steps_per_epoch: null gradient_accumulation_steps: 4 compile: False # pytorch compile, set to true for perf/memory improvement diff --git a/recipes/configs/gemma2/2B_full.yaml b/recipes/configs/gemma2/2B_full.yaml index e302dd759d..b87cf1ccf9 100644 --- a/recipes/configs/gemma2/2B_full.yaml +++ b/recipes/configs/gemma2/2B_full.yaml @@ -47,7 +47,7 @@ resume_from_checkpoint: False # Fine-tuning arguments batch_size: 2 -epochs: 3 +epochs: 1 optimizer: _component_: torch.optim.AdamW fused: True diff --git a/recipes/configs/gemma2/2B_lora.yaml b/recipes/configs/gemma2/2B_lora.yaml index 9a439ee0a3..7569d2cf2f 100644 --- a/recipes/configs/gemma2/2B_lora.yaml +++ b/recipes/configs/gemma2/2B_lora.yaml @@ -65,7 +65,7 @@ loss: # Fine-tuning arguments batch_size: 4 -epochs: 3 +epochs: 1 max_steps_per_epoch: null gradient_accumulation_steps: 1 compile: False # pytorch compile, set to true for perf/memory improvement diff --git a/recipes/configs/gemma2/2B_lora_single_device.yaml b/recipes/configs/gemma2/2B_lora_single_device.yaml index 1a2703fb47..d1fad95114 100644 --- a/recipes/configs/gemma2/2B_lora_single_device.yaml +++ b/recipes/configs/gemma2/2B_lora_single_device.yaml @@ -64,7 +64,7 @@ loss: # Fine-tuning arguments batch_size: 8 -epochs: 3 +epochs: 1 max_steps_per_epoch: null gradient_accumulation_steps: 2 compile: False # pytorch compile, set to true for perf/memory improvement diff --git a/recipes/configs/gemma2/2B_qlora_single_device.yaml b/recipes/configs/gemma2/2B_qlora_single_device.yaml index c2525460ff..880c2d4b81 100644 --- a/recipes/configs/gemma2/2B_qlora_single_device.yaml +++ b/recipes/configs/gemma2/2B_qlora_single_device.yaml @@ -64,7 +64,7 @@ loss: # Fine-tuning arguments batch_size: 4 -epochs: 3 +epochs: 1 max_steps_per_epoch: null gradient_accumulation_steps: 4 compile: False # pytorch compile, set to true for perf/memory improvement diff --git a/recipes/configs/gemma2/9B_lora.yaml b/recipes/configs/gemma2/9B_lora.yaml index 960e4fa881..59c95aa619 100644 --- a/recipes/configs/gemma2/9B_lora.yaml +++ b/recipes/configs/gemma2/9B_lora.yaml @@ -63,7 +63,7 @@ loss: # Fine-tuning arguments batch_size: 4 -epochs: 3 +epochs: 1 max_steps_per_epoch: null gradient_accumulation_steps: 1 compile: False # pytorch compile, set to true for perf/memory improvement diff --git a/recipes/configs/gemma2/9B_qlora_single_device.yaml b/recipes/configs/gemma2/9B_qlora_single_device.yaml index 8991ba9ece..b0dfda2c67 100644 --- a/recipes/configs/gemma2/9B_qlora_single_device.yaml +++ b/recipes/configs/gemma2/9B_qlora_single_device.yaml @@ -62,7 +62,7 @@ loss: # Fine-tuning arguments batch_size: 4 -epochs: 3 +epochs: 1 max_steps_per_epoch: null gradient_accumulation_steps: 4 compile: False # pytorch compile, set to true for perf/memory improvement diff --git a/recipes/configs/llama2/13B_full.yaml b/recipes/configs/llama2/13B_full.yaml index d02ce13c0b..d577559305 100644 --- a/recipes/configs/llama2/13B_full.yaml +++ b/recipes/configs/llama2/13B_full.yaml @@ -50,7 +50,7 @@ shuffle: True # Fine-tuning arguments batch_size: 2 -epochs: 3 +epochs: 1 optimizer: _component_: torch.optim.AdamW fused: True diff --git a/recipes/configs/llama2/7B_full.yaml b/recipes/configs/llama2/7B_full.yaml index 99e7fcc30b..3031538137 100644 --- a/recipes/configs/llama2/7B_full.yaml +++ b/recipes/configs/llama2/7B_full.yaml @@ -49,7 +49,7 @@ resume_from_checkpoint: False # Fine-tuning arguments batch_size: 2 -epochs: 3 +epochs: 1 optimizer: _component_: torch.optim.AdamW fused: True diff --git a/recipes/configs/llama2/7B_full_low_memory.yaml b/recipes/configs/llama2/7B_full_low_memory.yaml index c5300c0a90..07514959db 100644 --- a/recipes/configs/llama2/7B_full_low_memory.yaml +++ b/recipes/configs/llama2/7B_full_low_memory.yaml @@ -51,7 +51,7 @@ resume_from_checkpoint: False # Fine-tuning arguments batch_size: 2 -epochs: 3 +epochs: 1 optimizer: _component_: bitsandbytes.optim.PagedAdamW lr: 1e-5 diff --git a/recipes/configs/llama2/7B_qat_full.yaml b/recipes/configs/llama2/7B_qat_full.yaml index e404b0c4dc..1208627f1c 100644 --- a/recipes/configs/llama2/7B_qat_full.yaml +++ b/recipes/configs/llama2/7B_qat_full.yaml @@ -45,7 +45,7 @@ resume_from_checkpoint: False # Fine-tuning arguments batch_size: 2 -epochs: 3 +epochs: 1 optimizer: _component_: torch.optim.AdamW fused: True diff --git a/recipes/configs/llama3/70B_full.yaml b/recipes/configs/llama3/70B_full.yaml index fde65da8c6..7cffc3fce9 100644 --- a/recipes/configs/llama3/70B_full.yaml +++ b/recipes/configs/llama3/70B_full.yaml @@ -76,7 +76,7 @@ resume_from_checkpoint: False # Fine-tuning arguments batch_size: 2 -epochs: 3 +epochs: 1 optimizer: _component_: torch.optim.AdamW diff --git a/recipes/configs/llama3/8B_full.yaml b/recipes/configs/llama3/8B_full.yaml index 4d7f7e7b8e..9a93d59061 100644 --- a/recipes/configs/llama3/8B_full.yaml +++ b/recipes/configs/llama3/8B_full.yaml @@ -48,7 +48,7 @@ resume_from_checkpoint: False # Fine-tuning arguments batch_size: 2 -epochs: 3 +epochs: 1 optimizer: _component_: torch.optim.AdamW diff --git a/recipes/configs/llama3/8B_full_single_device.yaml b/recipes/configs/llama3/8B_full_single_device.yaml index 26f635fac0..b3fee76295 100644 --- a/recipes/configs/llama3/8B_full_single_device.yaml +++ b/recipes/configs/llama3/8B_full_single_device.yaml @@ -50,7 +50,7 @@ resume_from_checkpoint: False # Fine-tuning arguments batch_size: 2 -epochs: 3 +epochs: 1 optimizer: _component_: bitsandbytes.optim.PagedAdamW8bit lr: 1e-5 diff --git a/recipes/configs/llama3/8B_qat_full.yaml b/recipes/configs/llama3/8B_qat_full.yaml index 2b08cbb10f..274ee0eae0 100644 --- a/recipes/configs/llama3/8B_qat_full.yaml +++ b/recipes/configs/llama3/8B_qat_full.yaml @@ -43,7 +43,7 @@ resume_from_checkpoint: False # Fine-tuning arguments batch_size: 2 -epochs: 3 +epochs: 1 # QAT arguments quantizer: diff --git a/recipes/configs/llama3_1/70B_full.yaml b/recipes/configs/llama3_1/70B_full.yaml index 8e70706414..d92fcef1f6 100644 --- a/recipes/configs/llama3_1/70B_full.yaml +++ b/recipes/configs/llama3_1/70B_full.yaml @@ -75,7 +75,7 @@ resume_from_checkpoint: False # Fine-tuning arguments batch_size: 2 -epochs: 3 +epochs: 1 optimizer: _component_: torch.optim.AdamW diff --git a/recipes/configs/llama3_1/8B_full.yaml b/recipes/configs/llama3_1/8B_full.yaml index b85c70ed1c..32aff922cf 100644 --- a/recipes/configs/llama3_1/8B_full.yaml +++ b/recipes/configs/llama3_1/8B_full.yaml @@ -51,7 +51,7 @@ resume_from_checkpoint: False # Fine-tuning arguments batch_size: 2 -epochs: 3 +epochs: 1 optimizer: _component_: torch.optim.AdamW diff --git a/recipes/configs/llama3_1/8B_full_single_device.yaml b/recipes/configs/llama3_1/8B_full_single_device.yaml index 7e06ca4a6d..66f397e1df 100644 --- a/recipes/configs/llama3_1/8B_full_single_device.yaml +++ b/recipes/configs/llama3_1/8B_full_single_device.yaml @@ -53,7 +53,7 @@ resume_from_checkpoint: False # Fine-tuning arguments batch_size: 2 -epochs: 3 +epochs: 1 optimizer: _component_: bitsandbytes.optim.PagedAdamW8bit lr: 2e-5 diff --git a/recipes/configs/llama3_2/1B_full.yaml b/recipes/configs/llama3_2/1B_full.yaml index 437c222d28..56fc968b0d 100644 --- a/recipes/configs/llama3_2/1B_full.yaml +++ b/recipes/configs/llama3_2/1B_full.yaml @@ -48,7 +48,7 @@ resume_from_checkpoint: False # Fine-tuning arguments batch_size: 4 -epochs: 3 +epochs: 1 optimizer: _component_: torch.optim.AdamW diff --git a/recipes/configs/llama3_2/1B_full_single_device.yaml b/recipes/configs/llama3_2/1B_full_single_device.yaml index 4f367f03a5..e2aa1c110b 100644 --- a/recipes/configs/llama3_2/1B_full_single_device.yaml +++ b/recipes/configs/llama3_2/1B_full_single_device.yaml @@ -50,7 +50,7 @@ resume_from_checkpoint: False # Fine-tuning arguments batch_size: 4 -epochs: 3 +epochs: 1 optimizer: _component_: bitsandbytes.optim.PagedAdamW8bit lr: 2e-5 diff --git a/recipes/configs/llama3_2/3B_full.yaml b/recipes/configs/llama3_2/3B_full.yaml index 54f810c33a..4128bb58e7 100644 --- a/recipes/configs/llama3_2/3B_full.yaml +++ b/recipes/configs/llama3_2/3B_full.yaml @@ -49,7 +49,7 @@ resume_from_checkpoint: False # Fine-tuning arguments batch_size: 4 -epochs: 3 +epochs: 1 optimizer: _component_: torch.optim.AdamW diff --git a/recipes/configs/llama3_2/3B_full_single_device.yaml b/recipes/configs/llama3_2/3B_full_single_device.yaml index cffa1fb83e..ebc49ae1fb 100644 --- a/recipes/configs/llama3_2/3B_full_single_device.yaml +++ b/recipes/configs/llama3_2/3B_full_single_device.yaml @@ -51,7 +51,7 @@ resume_from_checkpoint: False # Fine-tuning arguments batch_size: 4 -epochs: 3 +epochs: 1 optimizer: _component_: bitsandbytes.optim.PagedAdamW8bit lr: 2e-5 diff --git a/recipes/configs/mistral/7B_full.yaml b/recipes/configs/mistral/7B_full.yaml index 2452ef275b..e025cee824 100644 --- a/recipes/configs/mistral/7B_full.yaml +++ b/recipes/configs/mistral/7B_full.yaml @@ -52,7 +52,7 @@ resume_from_checkpoint: False # Fine-tuning arguments batch_size: 2 -epochs: 3 +epochs: 1 optimizer: _component_: torch.optim.AdamW fused: True diff --git a/recipes/configs/mistral/7B_full_low_memory.yaml b/recipes/configs/mistral/7B_full_low_memory.yaml index 7ae9f916ab..dbd8a9bae5 100644 --- a/recipes/configs/mistral/7B_full_low_memory.yaml +++ b/recipes/configs/mistral/7B_full_low_memory.yaml @@ -54,7 +54,7 @@ resume_from_checkpoint: False # Fine-tuning arguments batch_size: 2 -epochs: 3 +epochs: 1 optimizer: _component_: bitsandbytes.optim.PagedAdamW lr: 5e-6 diff --git a/recipes/configs/mistral/7B_lora.yaml b/recipes/configs/mistral/7B_lora.yaml index 2724a0754d..30919c13e8 100644 --- a/recipes/configs/mistral/7B_lora.yaml +++ b/recipes/configs/mistral/7B_lora.yaml @@ -72,7 +72,7 @@ loss: # Fine-tuning arguments batch_size: 4 -epochs: 3 +epochs: 1 max_steps_per_epoch: null gradient_accumulation_steps: 1 # Use to increase virtual batch size compile: False # pytorch compile, set to true for better perf/memory diff --git a/recipes/configs/mistral/7B_lora_single_device.yaml b/recipes/configs/mistral/7B_lora_single_device.yaml index be143ce480..b6d2492bf1 100644 --- a/recipes/configs/mistral/7B_lora_single_device.yaml +++ b/recipes/configs/mistral/7B_lora_single_device.yaml @@ -69,7 +69,7 @@ loss: # Fine-tuning arguments batch_size: 4 -epochs: 3 +epochs: 1 max_steps_per_epoch: null gradient_accumulation_steps: 8 # Use to increase virtual batch size compile: False # pytorch compile, set to true for better perf/memory diff --git a/recipes/configs/mistral/7B_qlora_single_device.yaml b/recipes/configs/mistral/7B_qlora_single_device.yaml index b3c1337901..c0252fcb32 100644 --- a/recipes/configs/mistral/7B_qlora_single_device.yaml +++ b/recipes/configs/mistral/7B_qlora_single_device.yaml @@ -70,7 +70,7 @@ loss: # Fine-tuning arguments batch_size: 4 -epochs: 3 +epochs: 1 max_steps_per_epoch: null gradient_accumulation_steps: 8 # Use to increase virtual batch size compile: False # pytorch compile, set to true for better perf/memory diff --git a/recipes/configs/qwen2/1.5B_full.yaml b/recipes/configs/qwen2/1.5B_full.yaml index bae27e0a70..812ea45b10 100644 --- a/recipes/configs/qwen2/1.5B_full.yaml +++ b/recipes/configs/qwen2/1.5B_full.yaml @@ -48,7 +48,7 @@ resume_from_checkpoint: False # Fine-tuning arguments batch_size: 2 -epochs: 3 +epochs: 1 optimizer: _component_: torch.optim.AdamW fused: True diff --git a/recipes/configs/qwen2_5/1_5B_full.yaml b/recipes/configs/qwen2_5/1_5B_full.yaml index be01ab8670..13999e478d 100644 --- a/recipes/configs/qwen2_5/1_5B_full.yaml +++ b/recipes/configs/qwen2_5/1_5B_full.yaml @@ -48,7 +48,7 @@ resume_from_checkpoint: False # Fine-tuning arguments batch_size: 2 -epochs: 3 +epochs: 1 optimizer: _component_: torch.optim.AdamW fused: True