Skip to content

Commit

Permalink
Mistral QLoRA and config spring cleaning (#670)
Browse files Browse the repository at this point in the history
  • Loading branch information
ebsmothers authored Apr 11, 2024
1 parent 8bb3aae commit 6e9ea22
Show file tree
Hide file tree
Showing 23 changed files with 362 additions and 87 deletions.
3 changes: 0 additions & 3 deletions recipes/configs/gemma/2B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,6 @@ gradient_accumulation_steps: 1
# Training env
device: cuda

# Distributed
cpu_offload: False

# Memory management
enable_activation_checkpointing: True

Expand Down
5 changes: 1 addition & 4 deletions recipes/configs/llama2/13B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#
# This config assumes that you've run the following command before launching
# this run:
# tune download --repo-id meta-llama/Llama-2-13b-hf \
# tune download meta-llama/Llama-2-13b-hf \
# --hf-token <HF_TOKEN> \
# --output-dir /tmp/llama2-13b-hf
#
Expand Down Expand Up @@ -68,9 +68,6 @@ gradient_accumulation_steps: 1
# Training env
device: cuda

# Distributed
cpu_offload: False

# Memory management
enable_activation_checkpointing: True

Expand Down
4 changes: 2 additions & 2 deletions recipes/configs/llama2/13B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#
# This config assumes that you've run the following command before launching
# this run:
# tune download --repo-id meta-llama/Llama-2-13b-hf \
# tune download meta-llama/Llama-2-13b-hf \
# --hf-token <HF_TOKEN> \
# --output-dir /tmp/llama2-13b-hf
#
Expand All @@ -19,7 +19,7 @@
# checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
#
# This config works best when the model is being fine-tuned on 2+ GPUs.
# For single device lora finetuning please use 7B_lora_single_device.yaml
# For single device LoRA finetuning please use 7B_lora_single_device.yaml
# or 7B_qlora_single_device.yaml and update the model and checkpoints to
# the 13B model.

Expand Down
5 changes: 1 addition & 4 deletions recipes/configs/llama2/7B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#
# This config assumes that you've run the following command before launching
# this run:
# tune download --repo-id meta-llama/Llama-2-7b \
# tune download meta-llama/Llama-2-7b \
# --hf-token <HF_TOKEN> \
# --output-dir /tmp/llama2
#
Expand Down Expand Up @@ -63,9 +63,6 @@ gradient_accumulation_steps: 1
# Training env
device: cuda

# Distributed
cpu_offload: False

# Memory management
enable_activation_checkpointing: True

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,19 @@
#
# This config assumes that you've run the following command before launching
# this run:
# tune download --repo-id meta-llama/Llama-2-7b \
# tune download meta-llama/Llama-2-7b \
# --hf-token <HF_TOKEN> \
# --output-dir /tmp/llama2
#
# To launch on a single device, run the following command from root:
# tune run full_finetune_single_device \
# --config llama2/7B_full_single_device_low_memory \
# --config llama2/7B_full_low_memory \
#
# You can add specific overrides through the command line. For example
# to override the checkpointer directory while launching training
# you can run:
# tune run --nnodes 1 --nproc_per_node 1 full_finetune_single_device \
# --config llama2/7B_full_single_device_low_memory \
# tune run full_finetune_single_device \
# --config llama2/7B_full_low_memory \
# checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
#
# This config works only for training on single device.
Expand Down Expand Up @@ -48,7 +48,7 @@ resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 2
epochs: 1
epochs: 3
optimizer:
_component_: bitsandbytes.optim.PagedAdamW
lr: 2e-5
Expand All @@ -57,7 +57,7 @@ loss:
_component_: torch.nn.CrossEntropyLoss
max_steps_per_epoch: null
gradient_accumulation_steps: 1

compile: False

# Training environment
device: cuda
Expand All @@ -68,9 +68,6 @@ enable_activation_checkpointing: True
# Reduced precision
dtype: bf16

# Model compilation
compile: False

# Logging
metric_logger:
_component_: torchtune.utils.metric_logging.DiskLogger
Expand Down
4 changes: 2 additions & 2 deletions recipes/configs/llama2/7B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#
# This config assumes that you've run the following command before launching
# this run:
# tune download --repo-id meta-llama/Llama-2-7b \
# tune download meta-llama/Llama-2-7b \
# --hf-token <HF_TOKEN> \
# --output-dir /tmp/llama2
#
Expand All @@ -19,7 +19,7 @@
# checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
#
# This config works best when the model is being fine-tuned on 2+ GPUs.
# For single device lora finetuning please use 7B_lora_single_device.yaml
# For single device LoRA finetuning please use 7B_lora_single_device.yaml
# or 7B_qlora_single_device.yaml


Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/llama2/7B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#
# This config assumes that you've run the following command before launching
# this run:
# tune download --repo-id meta-llama/Llama-2-7b \
# tune download meta-llama/Llama-2-7b \
# --hf-token <HF_TOKEN> \
# --output-dir /tmp/llama2
#
Expand Down
3 changes: 1 addition & 2 deletions recipes/configs/llama2/7B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#
# This config assumes that you've run the following command before launching
# this run:
# tune download --repo-id meta-llama/Llama-2-7b \
# tune download meta-llama/Llama-2-7b \
# --hf-token <HF_TOKEN> \
# --output-dir /tmp/llama2
#
Expand All @@ -28,7 +28,6 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
quantize_base: True

checkpointer:
_component_: torchtune.utils.FullModelMetaCheckpointer
Expand Down
23 changes: 20 additions & 3 deletions recipes/configs/mistral/7B_full.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,29 @@
# Config for multi-device full finetuning in full_finetune_distributed.py
# using a Mistral 7B model
#
# This config uses hyperparameters based on small set of experiments and information
# available on various forums. These are not meant to replicate the numbers
# from the paper
#
# This config assumes that you've run the following command before launching
# this run:
# tune download mistralai/Mistral-7B-v0.1 \
# --hf-token <HF_TOKEN> \
# --output-dir /tmp/Mistral-7B-v0.1
#
# Run this config on 4 GPUs using the following:
# tune run --nproc_per_node 4 full_finetune_distributed --config mistral/7B_full
#
# You can add specific overrides through the command line. For example
# to override the checkpointer directory while launching training
# you can run:
# tune run --nnodes 1 --nproc_per_node 4 full_finetune_distributed \
# --config mistral/7B_full \
# checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
#
# This config works best when the model is being fine-tuned on 2+ GPUs.
# Single device full finetuning requires more memory optimizations. It's
# best to use 7B_full_single_device.yaml for those cases

# Tokenizer
tokenizer:
Expand Down Expand Up @@ -48,9 +68,6 @@ gradient_accumulation_steps: 1
# Training env
device: cuda

# Distributed
cpu_offload: False

# Memory management
enable_activation_checkpointing: True

Expand Down
Original file line number Diff line number Diff line change
@@ -1,30 +1,33 @@
# Config for single device full finetuning in full_finetune_single_device.py
# using a Llama2 7B model
# using a Mistral 7B model
#
# This config uses hyperparameters based on small set of experiments and information
# available on various forums. These are not meant to replicate the numbers
# from the paper
#
# This config assumes that you've run the following command before launching
# this run:
# tune download --repo-id meta-llama/Llama-2-7b \
# tune download mistralai/Mistral-7B-v0.1 \
# --hf-token <HF_TOKEN> \
# --output-dir /tmp/llama2
# --output-dir /tmp/Mistral-7B-v0.1
#
# To launch on a single device, run the following command from root:
# tune run full_finetune_single_device \
# --config llama2/7B_full_single_device \
# --config mistral/7B_full_low_memory \
#
# You can add specific overrides through the command line. For example
# to override the checkpointer directory while launching training
# you can run:
# tune run --nnodes 1 --nproc_per_node 1 full_finetune_single_device \
# --config llama2/7B_full_single_device \
# tune run full_finetune_single_device \
# --config mistral/7B_full_low_memory \
# checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
#
# This config works only for training on single device.


# Tokenizer
tokenizer:
_component_: torchtune.models.llama2.llama2_tokenizer
path: /tmp/llama2/tokenizer.model
_component_: torchtune.models.mistral.mistral_tokenizer
path: /tmp/Mistral-7B-v0.1/tokenizer.model

# Dataset
dataset:
Expand All @@ -35,32 +38,33 @@ shuffle: True

# Model Arguments
model:
_component_: torchtune.models.llama2.llama2_7b
_component_: torchtune.models.mistral.mistral_7b

checkpointer:
_component_: torchtune.utils.FullModelMetaCheckpointer
checkpoint_dir: /tmp/llama2
checkpoint_files: [consolidated.00.pth]
_component_: torchtune.utils.FullModelHFCheckpointer
checkpoint_dir: /tmp/Mistral-7B-v0.1
checkpoint_files: [
pytorch_model-00001-of-00002.bin,
pytorch_model-00002-of-00002.bin
]
recipe_checkpoint: null
output_dir: /tmp/llama2
model_type: LLAMA2
output_dir: /tmp/Mistral-7B-v0.1/
model_type: MISTRAL
resume_from_checkpoint: False

# Fine-tuning arguments
batch_size: 2
epochs: 3
optimizer:
_component_: torch.optim.SGD
lr: 2e-5
_component_: bitsandbytes.optim.PagedAdamW
lr: 5e-6
loss:
_component_: torch.nn.CrossEntropyLoss
max_steps_per_epoch: null
gradient_accumulation_steps: 1
compile: False
optimizer_in_bwd: False
optimizer_in_bwd: True


# Training environment
# Training env
device: cuda

# Memory management
Expand All @@ -69,9 +73,12 @@ enable_activation_checkpointing: True
# Reduced precision
dtype: bf16

# Model compilation
compile: False

# Logging
metric_logger:
_component_: torchtune.utils.metric_logging.DiskLogger
log_dir: ${output_dir}
output_dir: /tmp/alpaca-llama2-finetune
output_dir: /tmp/Mistral-7B-v0.1/
log_every_n_steps: null
20 changes: 20 additions & 0 deletions recipes/configs/mistral/7B_lora.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,29 @@
# Config for multi-device LoRA finetuning in lora_finetune_distributed.py
# using a Mistral 7B model
#
# This config uses hyperparameters based on small set of experiments and information
# available on various forums. These are not meant to replicate the numbers
# from the paper
#
# This config assumes that you've run the following command before launching
# this run:
# tune download mistralai/Mistral-7B-v0.1 \
# --hf-token <HF_TOKEN> \
# --output-dir /tmp/Mistral-7B-v0.1
#
# Run this config on 4 GPUs using the following:
# tune run --nproc_per_node 4 lora_finetune_distributed --config mistral/7B_lora
#
# You can add specific overrides through the command line. For example
# to override the checkpointer directory while launching training
# you can run:
# tune run --nnodes 1 --nproc_per_node 4 lora_finetune_distributed \
# --config mistral/7B_lora \
# checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
#
# This config works best when the model is being fine-tuned on 2+ GPUs.
# For single device LoRA finetuning please use 7B_lora_single_device.yaml
# or 7B_qlora_single_device.yaml for those cases


# Tokenizer
Expand Down
Loading

0 comments on commit 6e9ea22

Please sign in to comment.