From b061aba73ae8b791ff5a7ec4e79f052f57d6e810 Mon Sep 17 00:00:00 2001 From: Salman Mohammadi Date: Tue, 10 Dec 2024 21:26:52 +0000 Subject: [PATCH 1/2] updating configs --- recipes/configs/llama2/70B_lora.yaml | 20 ++------- recipes/configs/llama2/70B_qlora.yaml | 20 ++------- recipes/configs/llama3/70B_full.yaml | 35 ++-------------- recipes/configs/llama3/70B_lora.yaml | 35 ++-------------- recipes/configs/llama3_1/70B_full.yaml | 35 ++-------------- recipes/configs/llama3_1/70B_lora.yaml | 35 ++-------------- recipes/configs/llama3_3/70B_full.yaml | 35 ++-------------- recipes/configs/llama3_3/70B_lora.yaml | 35 ++-------------- recipes/configs/llama3_3/70B_qlora.yaml | 35 ++-------------- .../qwen2_5/14B_lora_single_device.yaml | 13 ++---- recipes/configs/qwen2_5/32B_lora.yaml | 22 ++-------- recipes/configs/qwen2_5/72B_lora.yaml | 42 ++----------------- 12 files changed, 36 insertions(+), 326 deletions(-) diff --git a/recipes/configs/llama2/70B_lora.yaml b/recipes/configs/llama2/70B_lora.yaml index a9be1f6cb6..2dfa239de3 100644 --- a/recipes/configs/llama2/70B_lora.yaml +++ b/recipes/configs/llama2/70B_lora.yaml @@ -29,23 +29,9 @@ tokenizer: checkpointer: _component_: torchtune.training.FullModelHFCheckpointer checkpoint_dir: /tmp/Llama-2-70b-hf - checkpoint_files: [ - pytorch_model-00001-of-00015.bin, - pytorch_model-00002-of-00015.bin, - pytorch_model-00003-of-00015.bin, - pytorch_model-00004-of-00015.bin, - pytorch_model-00005-of-00015.bin, - pytorch_model-00006-of-00015.bin, - pytorch_model-00007-of-00015.bin, - pytorch_model-00008-of-00015.bin, - pytorch_model-00009-of-00015.bin, - pytorch_model-00010-of-00015.bin, - pytorch_model-00011-of-00015.bin, - pytorch_model-00012-of-00015.bin, - pytorch_model-00013-of-00015.bin, - pytorch_model-00014-of-00015.bin, - pytorch_model-00015-of-00015.bin, - ] + checkpoint_files: + filename_format: model-{}-of-{}.safetensors + max_filename: 00015 recipe_checkpoint: null output_dir: ${output_dir} model_type: LLAMA2 diff --git a/recipes/configs/llama2/70B_qlora.yaml b/recipes/configs/llama2/70B_qlora.yaml index 3e48bbcaa0..0835f8cde0 100644 --- a/recipes/configs/llama2/70B_qlora.yaml +++ b/recipes/configs/llama2/70B_qlora.yaml @@ -34,23 +34,9 @@ tokenizer: checkpointer: _component_: torchtune.training.FullModelHFCheckpointer checkpoint_dir: /tmp/Llama-2-70b-hf - checkpoint_files: [ - pytorch_model-00001-of-00015.bin, - pytorch_model-00002-of-00015.bin, - pytorch_model-00003-of-00015.bin, - pytorch_model-00004-of-00015.bin, - pytorch_model-00005-of-00015.bin, - pytorch_model-00006-of-00015.bin, - pytorch_model-00007-of-00015.bin, - pytorch_model-00008-of-00015.bin, - pytorch_model-00009-of-00015.bin, - pytorch_model-00010-of-00015.bin, - pytorch_model-00011-of-00015.bin, - pytorch_model-00012-of-00015.bin, - pytorch_model-00013-of-00015.bin, - pytorch_model-00014-of-00015.bin, - pytorch_model-00015-of-00015.bin, - ] + checkpoint_files: + filename_format: model-{}-of-{}.safetensors + max_filename: 00015 recipe_checkpoint: null output_dir: ${output_dir} model_type: LLAMA2 diff --git a/recipes/configs/llama3/70B_full.yaml b/recipes/configs/llama3/70B_full.yaml index df07de0165..f08019bdab 100644 --- a/recipes/configs/llama3/70B_full.yaml +++ b/recipes/configs/llama3/70B_full.yaml @@ -39,38 +39,9 @@ model: checkpointer: _component_: torchtune.training.FullModelHFCheckpointer checkpoint_dir: /tmp/Meta-Llama-3-70B-Instruct - checkpoint_files: [ - model-00001-of-00030.safetensors, - model-00002-of-00030.safetensors, - model-00003-of-00030.safetensors, - model-00004-of-00030.safetensors, - model-00005-of-00030.safetensors, - model-00006-of-00030.safetensors, - model-00007-of-00030.safetensors, - model-00008-of-00030.safetensors, - model-00009-of-00030.safetensors, - model-00010-of-00030.safetensors, - model-00011-of-00030.safetensors, - model-00012-of-00030.safetensors, - model-00013-of-00030.safetensors, - model-00014-of-00030.safetensors, - model-00015-of-00030.safetensors, - model-00016-of-00030.safetensors, - model-00017-of-00030.safetensors, - model-00018-of-00030.safetensors, - model-00019-of-00030.safetensors, - model-00020-of-00030.safetensors, - model-00021-of-00030.safetensors, - model-00022-of-00030.safetensors, - model-00023-of-00030.safetensors, - model-00024-of-00030.safetensors, - model-00025-of-00030.safetensors, - model-00026-of-00030.safetensors, - model-00027-of-00030.safetensors, - model-00028-of-00030.safetensors, - model-00029-of-00030.safetensors, - model-00030-of-00030.safetensors, - ] + checkpoint_files: + filename_format: model-{}-of-{}.safetensors + max_filename: 00030 recipe_checkpoint: null output_dir: ${output_dir} model_type: LLAMA3 diff --git a/recipes/configs/llama3/70B_lora.yaml b/recipes/configs/llama3/70B_lora.yaml index 2d0931cc07..23151a7193 100644 --- a/recipes/configs/llama3/70B_lora.yaml +++ b/recipes/configs/llama3/70B_lora.yaml @@ -29,38 +29,9 @@ tokenizer: checkpointer: _component_: torchtune.training.FullModelHFCheckpointer checkpoint_dir: /tmp/Meta-Llama-3-70B-Instruct - checkpoint_files: [ - model-00001-of-00030.safetensors, - model-00002-of-00030.safetensors, - model-00003-of-00030.safetensors, - model-00004-of-00030.safetensors, - model-00005-of-00030.safetensors, - model-00006-of-00030.safetensors, - model-00007-of-00030.safetensors, - model-00008-of-00030.safetensors, - model-00009-of-00030.safetensors, - model-00010-of-00030.safetensors, - model-00011-of-00030.safetensors, - model-00012-of-00030.safetensors, - model-00013-of-00030.safetensors, - model-00014-of-00030.safetensors, - model-00015-of-00030.safetensors, - model-00016-of-00030.safetensors, - model-00017-of-00030.safetensors, - model-00018-of-00030.safetensors, - model-00019-of-00030.safetensors, - model-00020-of-00030.safetensors, - model-00021-of-00030.safetensors, - model-00022-of-00030.safetensors, - model-00023-of-00030.safetensors, - model-00024-of-00030.safetensors, - model-00025-of-00030.safetensors, - model-00026-of-00030.safetensors, - model-00027-of-00030.safetensors, - model-00028-of-00030.safetensors, - model-00029-of-00030.safetensors, - model-00030-of-00030.safetensors, - ] + checkpoint_files: + filename_format: model-{}-of-{}.safetensors + max_filename: 00030 recipe_checkpoint: null output_dir: ${output_dir} model_type: LLAMA3 diff --git a/recipes/configs/llama3_1/70B_full.yaml b/recipes/configs/llama3_1/70B_full.yaml index d3d546cfdb..1cd06413a2 100644 --- a/recipes/configs/llama3_1/70B_full.yaml +++ b/recipes/configs/llama3_1/70B_full.yaml @@ -38,38 +38,9 @@ model: checkpointer: _component_: torchtune.training.FullModelHFCheckpointer checkpoint_dir: /tmp/Meta-Llama-3.1-70B-Instruct/ - checkpoint_files: [ - model-00001-of-00030.safetensors, - model-00002-of-00030.safetensors, - model-00003-of-00030.safetensors, - model-00004-of-00030.safetensors, - model-00005-of-00030.safetensors, - model-00006-of-00030.safetensors, - model-00007-of-00030.safetensors, - model-00008-of-00030.safetensors, - model-00009-of-00030.safetensors, - model-00010-of-00030.safetensors, - model-00011-of-00030.safetensors, - model-00012-of-00030.safetensors, - model-00013-of-00030.safetensors, - model-00014-of-00030.safetensors, - model-00015-of-00030.safetensors, - model-00016-of-00030.safetensors, - model-00017-of-00030.safetensors, - model-00018-of-00030.safetensors, - model-00019-of-00030.safetensors, - model-00020-of-00030.safetensors, - model-00021-of-00030.safetensors, - model-00022-of-00030.safetensors, - model-00023-of-00030.safetensors, - model-00024-of-00030.safetensors, - model-00025-of-00030.safetensors, - model-00026-of-00030.safetensors, - model-00027-of-00030.safetensors, - model-00028-of-00030.safetensors, - model-00029-of-00030.safetensors, - model-00030-of-00030.safetensors, - ] + checkpoint_files: + filename_format: model-{}-of-{}.safetensors + max_filename: 00030 recipe_checkpoint: null output_dir: ${output_dir} model_type: LLAMA3 diff --git a/recipes/configs/llama3_1/70B_lora.yaml b/recipes/configs/llama3_1/70B_lora.yaml index c27636d2fb..ed0a917025 100644 --- a/recipes/configs/llama3_1/70B_lora.yaml +++ b/recipes/configs/llama3_1/70B_lora.yaml @@ -28,38 +28,9 @@ tokenizer: checkpointer: _component_: torchtune.training.FullModelHFCheckpointer checkpoint_dir: /tmp/Meta-Llama-3.1-70B-Instruct/ - checkpoint_files: [ - model-00001-of-00030.safetensors, - model-00002-of-00030.safetensors, - model-00003-of-00030.safetensors, - model-00004-of-00030.safetensors, - model-00005-of-00030.safetensors, - model-00006-of-00030.safetensors, - model-00007-of-00030.safetensors, - model-00008-of-00030.safetensors, - model-00009-of-00030.safetensors, - model-00010-of-00030.safetensors, - model-00011-of-00030.safetensors, - model-00012-of-00030.safetensors, - model-00013-of-00030.safetensors, - model-00014-of-00030.safetensors, - model-00015-of-00030.safetensors, - model-00016-of-00030.safetensors, - model-00017-of-00030.safetensors, - model-00018-of-00030.safetensors, - model-00019-of-00030.safetensors, - model-00020-of-00030.safetensors, - model-00021-of-00030.safetensors, - model-00022-of-00030.safetensors, - model-00023-of-00030.safetensors, - model-00024-of-00030.safetensors, - model-00025-of-00030.safetensors, - model-00026-of-00030.safetensors, - model-00027-of-00030.safetensors, - model-00028-of-00030.safetensors, - model-00029-of-00030.safetensors, - model-00030-of-00030.safetensors, - ] + checkpoint_files: + filename_format: model-{}-of-{}.safetensors + max_filename: 00030 recipe_checkpoint: null output_dir: ${output_dir} model_type: LLAMA3 diff --git a/recipes/configs/llama3_3/70B_full.yaml b/recipes/configs/llama3_3/70B_full.yaml index ae028ed207..fc9621631b 100644 --- a/recipes/configs/llama3_3/70B_full.yaml +++ b/recipes/configs/llama3_3/70B_full.yaml @@ -38,38 +38,9 @@ model: checkpointer: _component_: torchtune.training.FullModelHFCheckpointer checkpoint_dir: /tmp/Llama-3.3-70B-Instruct/ - checkpoint_files: [ - model-00001-of-00030.safetensors, - model-00002-of-00030.safetensors, - model-00003-of-00030.safetensors, - model-00004-of-00030.safetensors, - model-00005-of-00030.safetensors, - model-00006-of-00030.safetensors, - model-00007-of-00030.safetensors, - model-00008-of-00030.safetensors, - model-00009-of-00030.safetensors, - model-00010-of-00030.safetensors, - model-00011-of-00030.safetensors, - model-00012-of-00030.safetensors, - model-00013-of-00030.safetensors, - model-00014-of-00030.safetensors, - model-00015-of-00030.safetensors, - model-00016-of-00030.safetensors, - model-00017-of-00030.safetensors, - model-00018-of-00030.safetensors, - model-00019-of-00030.safetensors, - model-00020-of-00030.safetensors, - model-00021-of-00030.safetensors, - model-00022-of-00030.safetensors, - model-00023-of-00030.safetensors, - model-00024-of-00030.safetensors, - model-00025-of-00030.safetensors, - model-00026-of-00030.safetensors, - model-00027-of-00030.safetensors, - model-00028-of-00030.safetensors, - model-00029-of-00030.safetensors, - model-00030-of-00030.safetensors, - ] + checkpoint_files: + filename_format: model-{}-of-{}.safetensors + max_filename: 00030 recipe_checkpoint: null output_dir: ${output_dir} model_type: LLAMA3 diff --git a/recipes/configs/llama3_3/70B_lora.yaml b/recipes/configs/llama3_3/70B_lora.yaml index 84a861d428..5c09749abb 100644 --- a/recipes/configs/llama3_3/70B_lora.yaml +++ b/recipes/configs/llama3_3/70B_lora.yaml @@ -28,38 +28,9 @@ tokenizer: checkpointer: _component_: torchtune.training.FullModelHFCheckpointer checkpoint_dir: /tmp/Llama-3.3-70B-Instruct/ - checkpoint_files: [ - model-00001-of-00030.safetensors, - model-00002-of-00030.safetensors, - model-00003-of-00030.safetensors, - model-00004-of-00030.safetensors, - model-00005-of-00030.safetensors, - model-00006-of-00030.safetensors, - model-00007-of-00030.safetensors, - model-00008-of-00030.safetensors, - model-00009-of-00030.safetensors, - model-00010-of-00030.safetensors, - model-00011-of-00030.safetensors, - model-00012-of-00030.safetensors, - model-00013-of-00030.safetensors, - model-00014-of-00030.safetensors, - model-00015-of-00030.safetensors, - model-00016-of-00030.safetensors, - model-00017-of-00030.safetensors, - model-00018-of-00030.safetensors, - model-00019-of-00030.safetensors, - model-00020-of-00030.safetensors, - model-00021-of-00030.safetensors, - model-00022-of-00030.safetensors, - model-00023-of-00030.safetensors, - model-00024-of-00030.safetensors, - model-00025-of-00030.safetensors, - model-00026-of-00030.safetensors, - model-00027-of-00030.safetensors, - model-00028-of-00030.safetensors, - model-00029-of-00030.safetensors, - model-00030-of-00030.safetensors, - ] + checkpoint_files: + filename_format: model-{}-of-{}.safetensors + max_filename: 00030 recipe_checkpoint: null output_dir: ${output_dir} model_type: LLAMA3 diff --git a/recipes/configs/llama3_3/70B_qlora.yaml b/recipes/configs/llama3_3/70B_qlora.yaml index d4f4081b8c..ebc18e9b01 100644 --- a/recipes/configs/llama3_3/70B_qlora.yaml +++ b/recipes/configs/llama3_3/70B_qlora.yaml @@ -28,38 +28,9 @@ tokenizer: checkpointer: _component_: torchtune.training.FullModelHFCheckpointer checkpoint_dir: /tmp/Llama-3.3-70B-Instruct/ - checkpoint_files: [ - model-00001-of-00030.safetensors, - model-00002-of-00030.safetensors, - model-00003-of-00030.safetensors, - model-00004-of-00030.safetensors, - model-00005-of-00030.safetensors, - model-00006-of-00030.safetensors, - model-00007-of-00030.safetensors, - model-00008-of-00030.safetensors, - model-00009-of-00030.safetensors, - model-00010-of-00030.safetensors, - model-00011-of-00030.safetensors, - model-00012-of-00030.safetensors, - model-00013-of-00030.safetensors, - model-00014-of-00030.safetensors, - model-00015-of-00030.safetensors, - model-00016-of-00030.safetensors, - model-00017-of-00030.safetensors, - model-00018-of-00030.safetensors, - model-00019-of-00030.safetensors, - model-00020-of-00030.safetensors, - model-00021-of-00030.safetensors, - model-00022-of-00030.safetensors, - model-00023-of-00030.safetensors, - model-00024-of-00030.safetensors, - model-00025-of-00030.safetensors, - model-00026-of-00030.safetensors, - model-00027-of-00030.safetensors, - model-00028-of-00030.safetensors, - model-00029-of-00030.safetensors, - model-00030-of-00030.safetensors, - ] + checkpoint_files: + filename_format: model-{}-of-{}.safetensors + max_filename: 00030 recipe_checkpoint: null output_dir: ${output_dir} model_type: LLAMA3 diff --git a/recipes/configs/qwen2_5/14B_lora_single_device.yaml b/recipes/configs/qwen2_5/14B_lora_single_device.yaml index 93220bb466..35c85cdb6e 100644 --- a/recipes/configs/qwen2_5/14B_lora_single_device.yaml +++ b/recipes/configs/qwen2_5/14B_lora_single_device.yaml @@ -37,16 +37,9 @@ tokenizer: checkpointer: _component_: torchtune.training.FullModelHFCheckpointer checkpoint_dir: /tmp/Qwen2_5-14B-Instruct - checkpoint_files: [ - model-00001-of-00008.safetensors, - model-00002-of-00008.safetensors, - model-00003-of-00008.safetensors, - model-00004-of-00008.safetensors, - model-00005-of-00008.safetensors, - model-00006-of-00008.safetensors, - model-00007-of-00008.safetensors, - model-00008-of-00008.safetensors, - ] + checkpoint_files: + filename_format: model-{}-of-{}.safetensors + max_filename: 00008 recipe_checkpoint: null output_dir: ${output_dir} model_type: QWEN2 diff --git a/recipes/configs/qwen2_5/32B_lora.yaml b/recipes/configs/qwen2_5/32B_lora.yaml index 6e5ab5174f..f8d2f6850e 100644 --- a/recipes/configs/qwen2_5/32B_lora.yaml +++ b/recipes/configs/qwen2_5/32B_lora.yaml @@ -35,25 +35,9 @@ tokenizer: checkpointer: _component_: torchtune.training.FullModelHFCheckpointer checkpoint_dir: /tmp/Qwen2_5-32B-Instruct - checkpoint_files: [ - model-00001-of-00017.safetensors, - model-00002-of-00017.safetensors, - model-00003-of-00017.safetensors, - model-00004-of-00017.safetensors, - model-00005-of-00017.safetensors, - model-00006-of-00017.safetensors, - model-00007-of-00017.safetensors, - model-00008-of-00017.safetensors, - model-00009-of-00017.safetensors, - model-00010-of-00017.safetensors, - model-00011-of-00017.safetensors, - model-00012-of-00017.safetensors, - model-00013-of-00017.safetensors, - model-00014-of-00017.safetensors, - model-00015-of-00017.safetensors, - model-00016-of-00017.safetensors, - model-00017-of-00017.safetensors, - ] + checkpoint_files: + filename_format: model-{}-of-{}.safetensors + max_filename: 00017 recipe_checkpoint: null output_dir: ${output_dir} model_type: QWEN2 diff --git a/recipes/configs/qwen2_5/72B_lora.yaml b/recipes/configs/qwen2_5/72B_lora.yaml index 41ff800c5a..86b36340fc 100644 --- a/recipes/configs/qwen2_5/72B_lora.yaml +++ b/recipes/configs/qwen2_5/72B_lora.yaml @@ -35,45 +35,9 @@ tokenizer: checkpointer: _component_: torchtune.training.FullModelHFCheckpointer checkpoint_dir: /tmp/Qwen2_5-72B-Instruct - checkpoint_files: [ - model-00001-of-00037.safetensors, - model-00002-of-00037.safetensors, - model-00003-of-00037.safetensors, - model-00004-of-00037.safetensors, - model-00005-of-00037.safetensors, - model-00006-of-00037.safetensors, - model-00007-of-00037.safetensors, - model-00008-of-00037.safetensors, - model-00009-of-00037.safetensors, - model-00010-of-00037.safetensors, - model-00011-of-00037.safetensors, - model-00012-of-00037.safetensors, - model-00013-of-00037.safetensors, - model-00014-of-00037.safetensors, - model-00015-of-00037.safetensors, - model-00016-of-00037.safetensors, - model-00017-of-00037.safetensors, - model-00018-of-00037.safetensors, - model-00019-of-00037.safetensors, - model-00020-of-00037.safetensors, - model-00021-of-00037.safetensors, - model-00022-of-00037.safetensors, - model-00023-of-00037.safetensors, - model-00024-of-00037.safetensors, - model-00025-of-00037.safetensors, - model-00026-of-00037.safetensors, - model-00027-of-00037.safetensors, - model-00028-of-00037.safetensors, - model-00029-of-00037.safetensors, - model-00030-of-00037.safetensors, - model-00031-of-00037.safetensors, - model-00032-of-00037.safetensors, - model-00033-of-00037.safetensors, - model-00034-of-00037.safetensors, - model-00035-of-00037.safetensors, - model-00036-of-00037.safetensors, - model-00037-of-00037.safetensors, - ] + checkpoint_files: + filename_format: model-{}-of-{}.safetensors + max_filename: 00037 recipe_checkpoint: null output_dir: ${output_dir} model_type: QWEN2 From 921ccde58004d9a923de2e9fcee6b7818a7df4a4 Mon Sep 17 00:00:00 2001 From: Salman Mohammadi Date: Tue, 10 Dec 2024 21:34:58 +0000 Subject: [PATCH 2/2] woopsiedaisy --- recipes/configs/llama2/70B_lora.yaml | 2 +- recipes/configs/llama2/70B_qlora.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/recipes/configs/llama2/70B_lora.yaml b/recipes/configs/llama2/70B_lora.yaml index 2dfa239de3..717abacd18 100644 --- a/recipes/configs/llama2/70B_lora.yaml +++ b/recipes/configs/llama2/70B_lora.yaml @@ -30,7 +30,7 @@ checkpointer: _component_: torchtune.training.FullModelHFCheckpointer checkpoint_dir: /tmp/Llama-2-70b-hf checkpoint_files: - filename_format: model-{}-of-{}.safetensors + filename_format: pytorch_model-{}-of-{}.bin max_filename: 00015 recipe_checkpoint: null output_dir: ${output_dir} diff --git a/recipes/configs/llama2/70B_qlora.yaml b/recipes/configs/llama2/70B_qlora.yaml index 0835f8cde0..5a380d3d0e 100644 --- a/recipes/configs/llama2/70B_qlora.yaml +++ b/recipes/configs/llama2/70B_qlora.yaml @@ -35,7 +35,7 @@ checkpointer: _component_: torchtune.training.FullModelHFCheckpointer checkpoint_dir: /tmp/Llama-2-70b-hf checkpoint_files: - filename_format: model-{}-of-{}.safetensors + filename_format: pytorch_model-{}-of-{}.bin max_filename: 00015 recipe_checkpoint: null output_dir: ${output_dir}