From 068ed6fa87c3a34c4481ce63c2becf085ad7a08c Mon Sep 17 00:00:00 2001 From: Felipe Mello Date: Thu, 7 Nov 2024 07:44:10 -0800 Subject: [PATCH] remove apply lora to output from config --- recipes/configs/llama3_2/1B_lora.yaml | 1 - recipes/configs/llama3_2/1B_lora_single_device.yaml | 1 - recipes/configs/llama3_2/1B_qlora_single_device.yaml | 1 - recipes/configs/llama3_2/3B_lora.yaml | 1 - recipes/configs/llama3_2/3B_lora_single_device.yaml | 1 - recipes/configs/llama3_2/3B_qlora_single_device.yaml | 1 - recipes/configs/qwen2_5/0_5B_lora.yaml | 1 - recipes/configs/qwen2_5/0_5B_lora_single_device.yaml | 1 - recipes/configs/qwen2_5/1_5B_lora.yaml | 1 - recipes/configs/qwen2_5/1_5B_lora_single_device.yaml | 1 - recipes/configs/qwen2_5/3B_lora.yaml | 1 - recipes/configs/qwen2_5/3B_lora_single_device.yaml | 1 - 12 files changed, 12 deletions(-) diff --git a/recipes/configs/llama3_2/1B_lora.yaml b/recipes/configs/llama3_2/1B_lora.yaml index dad03a7595..4903e482ba 100644 --- a/recipes/configs/llama3_2/1B_lora.yaml +++ b/recipes/configs/llama3_2/1B_lora.yaml @@ -28,7 +28,6 @@ model: _component_: torchtune.models.llama3_2.lora_llama3_2_1b lora_attn_modules: ['q_proj', 'v_proj', 'output_proj'] apply_lora_to_mlp: True - apply_lora_to_output: False lora_rank: 64 # higher increases accuracy and memory lora_alpha: 128 # usually alpha=2*rank lora_dropout: 0.0 diff --git a/recipes/configs/llama3_2/1B_lora_single_device.yaml b/recipes/configs/llama3_2/1B_lora_single_device.yaml index 4e64d59895..911129987c 100644 --- a/recipes/configs/llama3_2/1B_lora_single_device.yaml +++ b/recipes/configs/llama3_2/1B_lora_single_device.yaml @@ -21,7 +21,6 @@ model: _component_: torchtune.models.llama3_2.lora_llama3_2_1b lora_attn_modules: ['q_proj', 'v_proj', 'output_proj'] apply_lora_to_mlp: True - apply_lora_to_output: False lora_rank: 64 # higher increases accuracy and memory lora_alpha: 128 # usually alpha=2*rank lora_dropout: 0.0 diff --git a/recipes/configs/llama3_2/1B_qlora_single_device.yaml b/recipes/configs/llama3_2/1B_qlora_single_device.yaml index 41841aa50d..3573ae38fc 100644 --- a/recipes/configs/llama3_2/1B_qlora_single_device.yaml +++ b/recipes/configs/llama3_2/1B_qlora_single_device.yaml @@ -20,7 +20,6 @@ model: _component_: torchtune.models.llama3_2.qlora_llama3_2_1b lora_attn_modules: ['q_proj', 'v_proj', 'output_proj'] apply_lora_to_mlp: True - apply_lora_to_output: False lora_rank: 64 # higher increases accuracy and memory lora_alpha: 128 # usually alpha=2*rank lora_dropout: 0.0 diff --git a/recipes/configs/llama3_2/3B_lora.yaml b/recipes/configs/llama3_2/3B_lora.yaml index 9959930f19..0e790b20cb 100644 --- a/recipes/configs/llama3_2/3B_lora.yaml +++ b/recipes/configs/llama3_2/3B_lora.yaml @@ -28,7 +28,6 @@ model: _component_: torchtune.models.llama3_2.lora_llama3_2_3b lora_attn_modules: ['q_proj', 'v_proj', 'output_proj'] apply_lora_to_mlp: True - apply_lora_to_output: False lora_rank: 64 # higher increases accuracy and memory lora_alpha: 128 # usually alpha=2*rank lora_dropout: 0.0 diff --git a/recipes/configs/llama3_2/3B_lora_single_device.yaml b/recipes/configs/llama3_2/3B_lora_single_device.yaml index 5bb2fd48a0..29e021d150 100644 --- a/recipes/configs/llama3_2/3B_lora_single_device.yaml +++ b/recipes/configs/llama3_2/3B_lora_single_device.yaml @@ -21,7 +21,6 @@ model: _component_: torchtune.models.llama3_2.lora_llama3_2_3b lora_attn_modules: ['q_proj', 'v_proj', 'output_proj'] apply_lora_to_mlp: True - apply_lora_to_output: False lora_rank: 64 # higher increases accuracy and memory lora_alpha: 128 # usually alpha=2*rank lora_dropout: 0.0 diff --git a/recipes/configs/llama3_2/3B_qlora_single_device.yaml b/recipes/configs/llama3_2/3B_qlora_single_device.yaml index a2fe3adaa0..7ffa146e51 100644 --- a/recipes/configs/llama3_2/3B_qlora_single_device.yaml +++ b/recipes/configs/llama3_2/3B_qlora_single_device.yaml @@ -20,7 +20,6 @@ model: _component_: torchtune.models.llama3_2.qlora_llama3_2_3b lora_attn_modules: ['q_proj', 'v_proj', 'output_proj'] apply_lora_to_mlp: True - apply_lora_to_output: False lora_rank: 64 # higher increases accuracy and memory lora_alpha: 128 # usually alpha=2*rank lora_dropout: 0.0 diff --git a/recipes/configs/qwen2_5/0_5B_lora.yaml b/recipes/configs/qwen2_5/0_5B_lora.yaml index 54f92a0071..63ec87897c 100644 --- a/recipes/configs/qwen2_5/0_5B_lora.yaml +++ b/recipes/configs/qwen2_5/0_5B_lora.yaml @@ -22,7 +22,6 @@ model: _component_: torchtune.models.qwen2_5.lora_qwen2_5_0_5b lora_attn_modules: ['q_proj', 'v_proj', 'output_proj'] apply_lora_to_mlp: True - apply_lora_to_output: False lora_rank: 32 # higher increases accuracy and memory lora_alpha: 64 # usually alpha=2*rank lora_dropout: 0.0 diff --git a/recipes/configs/qwen2_5/0_5B_lora_single_device.yaml b/recipes/configs/qwen2_5/0_5B_lora_single_device.yaml index 1487fd1845..e11e34bcb7 100644 --- a/recipes/configs/qwen2_5/0_5B_lora_single_device.yaml +++ b/recipes/configs/qwen2_5/0_5B_lora_single_device.yaml @@ -21,7 +21,6 @@ model: _component_: torchtune.models.qwen2_5.lora_qwen2_5_0_5b lora_attn_modules: ['q_proj', 'v_proj', 'output_proj'] apply_lora_to_mlp: True - apply_lora_to_output: False lora_rank: 32 # higher increases accuracy and memory lora_alpha: 64 # usually alpha=2*rank lora_dropout: 0.0 diff --git a/recipes/configs/qwen2_5/1_5B_lora.yaml b/recipes/configs/qwen2_5/1_5B_lora.yaml index b583e537fe..d47835d0b5 100644 --- a/recipes/configs/qwen2_5/1_5B_lora.yaml +++ b/recipes/configs/qwen2_5/1_5B_lora.yaml @@ -22,7 +22,6 @@ model: _component_: torchtune.models.qwen2_5.lora_qwen2_5_1_5b_instruct lora_attn_modules: ['q_proj', 'v_proj', 'output_proj'] apply_lora_to_mlp: True - apply_lora_to_output: False lora_rank: 32 # higher increases accuracy and memory lora_alpha: 64 # usually alpha=2*rank lora_dropout: 0.0 diff --git a/recipes/configs/qwen2_5/1_5B_lora_single_device.yaml b/recipes/configs/qwen2_5/1_5B_lora_single_device.yaml index 5622dce0c0..e9583ea62a 100644 --- a/recipes/configs/qwen2_5/1_5B_lora_single_device.yaml +++ b/recipes/configs/qwen2_5/1_5B_lora_single_device.yaml @@ -21,7 +21,6 @@ model: _component_: torchtune.models.qwen2_5.lora_qwen2_5_1_5b_instruct lora_attn_modules: ['q_proj', 'v_proj', 'output_proj'] apply_lora_to_mlp: True - apply_lora_to_output: False lora_rank: 32 # higher increases accuracy and memory lora_alpha: 64 # usually alpha=2*rank lora_dropout: 0.0 diff --git a/recipes/configs/qwen2_5/3B_lora.yaml b/recipes/configs/qwen2_5/3B_lora.yaml index f6ec4b8275..ffd3b6c494 100644 --- a/recipes/configs/qwen2_5/3B_lora.yaml +++ b/recipes/configs/qwen2_5/3B_lora.yaml @@ -22,7 +22,6 @@ model: _component_: torchtune.models.qwen2_5.lora_qwen2_5_3b lora_attn_modules: ['q_proj', 'v_proj', 'output_proj'] apply_lora_to_mlp: True - apply_lora_to_output: False lora_rank: 8 # higher increases accuracy and memory lora_alpha: 16 # usually alpha=2*rank lora_dropout: 0.0 diff --git a/recipes/configs/qwen2_5/3B_lora_single_device.yaml b/recipes/configs/qwen2_5/3B_lora_single_device.yaml index fbbb72a710..b6c5be1a0a 100644 --- a/recipes/configs/qwen2_5/3B_lora_single_device.yaml +++ b/recipes/configs/qwen2_5/3B_lora_single_device.yaml @@ -21,7 +21,6 @@ model: _component_: torchtune.models.qwen2_5.lora_qwen2_5_3b lora_attn_modules: ['q_proj', 'v_proj', 'output_proj'] apply_lora_to_mlp: True - apply_lora_to_output: False lora_rank: 8 # higher increases accuracy and memory lora_alpha: 16 # usually alpha=2*rank lora_dropout: 0.0