From be9e3c03667ed8079dc9d5c3348a2da342b4381a Mon Sep 17 00:00:00 2001 From: Anh Uong Date: Wed, 4 Sep 2024 14:48:41 -0600 Subject: [PATCH 01/16] add enable_aim build args in all stages Signed-off-by: Anh Uong --- build/Dockerfile | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/build/Dockerfile b/build/Dockerfile index 116044f33..4bd9cab6a 100644 --- a/build/Dockerfile +++ b/build/Dockerfile @@ -19,6 +19,9 @@ ARG USER=tuning ARG USER_UID=1000 ARG PYTHON_VERSION=3.11 ARG WHEEL_VERSION="" +## Enable Aimstack if requested via ENABLE_AIM set to "true" +ARG ENABLE_AIM=false +ARG ENABLE_FMS_ACCELERATION=false ## Base Layer ################################################################## FROM registry.access.redhat.com/ubi9/ubi:${BASE_UBI_IMAGE_TAG} AS base @@ -105,10 +108,8 @@ FROM cuda-devel AS python-installations ARG WHEEL_VERSION ARG USER ARG USER_UID -ARG ENABLE_FMS_ACCELERATION=false - -## Enable Aimstack if requested via ENABLE_AIM set to "true" -ARG ENABLE_AIM=false +ARG ENABLE_FMS_ACCELERATION +ARG ENABLE_AIM RUN dnf install -y git && \ # perl-Net-SSLeay.x86_64 and server_key.pem are installed with git as dependencies @@ -154,6 +155,7 @@ RUN python -m pip uninstall wheel build -y && \ FROM release-base AS release ARG USER ARG PYTHON_VERSION +ARG ENABLE_AIM RUN mkdir -p /licenses COPY LICENSE /licenses/ From c40ae7f1615b95b2d0c5f02206d1a3799b0f615c Mon Sep 17 00:00:00 2001 From: Abhishek Maurya <124327945+Abhishek-TAMU@users.noreply.github.com> Date: Thu, 12 Sep 2024 21:15:01 -0400 Subject: [PATCH 02/16] fix: remove lm_head post processing (#333) * fix: Removal of lm head hack Signed-off-by: Abhishek * set fms_accelerate to true by default Signed-off-by: Anh Uong --------- Signed-off-by: Abhishek Signed-off-by: Anh Uong Co-authored-by: Anh Uong --- build/Dockerfile | 2 +- build/accelerate_launch.py | 92 -------------------------------------- 2 files changed, 1 insertion(+), 93 deletions(-) diff --git a/build/Dockerfile b/build/Dockerfile index 4bd9cab6a..ffae818da 100644 --- a/build/Dockerfile +++ b/build/Dockerfile @@ -21,7 +21,7 @@ ARG PYTHON_VERSION=3.11 ARG WHEEL_VERSION="" ## Enable Aimstack if requested via ENABLE_AIM set to "true" ARG ENABLE_AIM=false -ARG ENABLE_FMS_ACCELERATION=false +ARG ENABLE_FMS_ACCELERATION=true ## Base Layer ################################################################## FROM registry.access.redhat.com/ubi9/ubi:${BASE_UBI_IMAGE_TAG} AS base diff --git a/build/accelerate_launch.py b/build/accelerate_launch.py index d7753728c..50d8eef0c 100644 --- a/build/accelerate_launch.py +++ b/build/accelerate_launch.py @@ -24,18 +24,13 @@ import sys import traceback from pathlib import Path -import json # Third Party from accelerate.commands.launch import launch_command -from transformers import AutoModelForCausalLM, AutoTokenizer -from peft import PeftModel -from torch import bfloat16 # Local from build.utils import ( process_accelerate_launch_args, - get_highest_checkpoint, ) from tuning.utils.config_utils import get_json_config from tuning.utils.error_logging import ( @@ -43,18 +38,10 @@ USER_ERROR_EXIT_CODE, INTERNAL_ERROR_EXIT_CODE, ) -from tuning.data import tokenizer_data_utils ERROR_LOG = "/dev/termination-log" -def get_base_model_from_adapter_config(adapter_config): - """Given path to adapter_config.json file, returns the base model name""" - with open(adapter_config, "r", encoding="utf-8") as config_file: - adapter_config = json.load(config_file) - return adapter_config.get("base_model_name_or_path") - - def main(): if not os.getenv("TERMINATION_LOG_FILE"): os.environ["TERMINATION_LOG_FILE"] = ERROR_LOG @@ -128,85 +115,6 @@ def main(): write_termination_log(f"Unhandled exception during training. {e}") sys.exit(INTERNAL_ERROR_EXIT_CODE) - # remove lm_head from granite with llama arch models - try: - checkpoint_dir = job_config.get("save_model_dir") - if not checkpoint_dir: - checkpoint_dir = os.path.join( - output_dir, get_highest_checkpoint(output_dir) - ) - - use_flash_attn = job_config.get("use_flash_attn", True) - adapter_config_path = os.path.join(checkpoint_dir, "adapter_config.json") - tokenizer = AutoTokenizer.from_pretrained(checkpoint_dir) - - if os.path.exists(adapter_config_path): - base_model_path = get_base_model_from_adapter_config(adapter_config_path) - base_model = AutoModelForCausalLM.from_pretrained( - base_model_path, - attn_implementation="flash_attention_2" if use_flash_attn else None, - torch_dtype=bfloat16 if use_flash_attn else None, - ) - - # since the peft library (PEFTModelForCausalLM) does not handle cases - # where the model's layers are modified, in our case the embedding layer - # is modified, so we resize the backbone model's embedding layer with our own - # utility before passing it along to load the PEFT model. - tokenizer_data_utils.tokenizer_and_embedding_resize( - {}, tokenizer=tokenizer, model=base_model - ) - model = PeftModel.from_pretrained( - base_model, - checkpoint_dir, - attn_implementation="flash_attention_2" if use_flash_attn else None, - torch_dtype=bfloat16 if use_flash_attn else None, - ) - else: - model = AutoModelForCausalLM.from_pretrained( - checkpoint_dir, - attn_implementation="flash_attention_2" if use_flash_attn else None, - torch_dtype=bfloat16 if use_flash_attn else None, - ) - - model_arch = model.config.model_type - # check that it is a granite model with llama architecture with tied weights - # ie. lm_head is duplicate of embeddings - - # a fine tuned model will have params_dict.get("model.embed_tokens.weight") - # a prompt adapter has params_dict.get("base_model.model.embed_tokens.weight") - # a lora adapter has params_dict.get("base_model.model.model.embed_tokens.weight") - if model_arch == "llama" and hasattr(model, "lm_head"): - if ( - # lora tuned model has an addt model layer - ( - hasattr(model.model, "model") - and model.lm_head.weight.untyped_storage().data_ptr() - == model.model.model.embed_tokens.weight.untyped_storage().data_ptr() - ) - # prompt tuned model or fine tuned model - or ( - hasattr(model.model, "embed_tokens") - and model.lm_head.weight.untyped_storage().data_ptr() - == model.model.embed_tokens.weight.untyped_storage().data_ptr() - ) - ): - - logging.info("Removing lm_head from checkpoint") - del model.lm_head.weight - - if hasattr(model, "lm_head.weight"): - logging.warning("Failed to delete lm_head.weight from model") - - logging.info("Saving checkpoint to %s", output_dir) - model.save_pretrained(checkpoint_dir) - # save tokenizer with model - tokenizer.save_pretrained(checkpoint_dir) - - except Exception as e: # pylint: disable=broad-except - logging.error(traceback.format_exc()) - write_termination_log(f"Exception encountered removing lm_head from model: {e}") - sys.exit(INTERNAL_ERROR_EXIT_CODE) - # The .complete file will signal to users that we are finished copying # files over if os.path.exists(output_dir): From 673a79c33546d81593260992fffc9f5e6092bca4 Mon Sep 17 00:00:00 2001 From: Angel Luu Date: Fri, 30 Aug 2024 08:55:37 -0600 Subject: [PATCH 03/16] Add README Signed-off-by: Angel Luu --- README.md | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1859b1b7e..2a1f11deb 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ - [Tips on Parameters to Set](#tips-on-parameters-to-set) - [Tuning Techniques](#tuning-techniques) - [LoRA Tuning Example](#lora-tuning-example) + - [qLoRA Tuning Example](#lora-tuning-example) - [Prompt Tuning](#prompt-tuning) - [Fine Tuning](#fine-tuning) - [FMS Acceleration](#fms-acceleration) @@ -432,6 +433,73 @@ Example 3: _________________________ + +### qLoRA Tuning Example + +This method is similar to LoRA Tuning, but the base model is a quantized model. +Set `peft_method` to `"lora"`. You can pass any of LoraConfig, see section on [LoRA Example](#lora-tuning-example). +In addition, you can pass [LoRA quantization config](https://github.com/foundation-model-stack/fms-hf-tuning/blob/main/tuning/config/acceleration_configs/quantized_lora_config.py#L62). +```py +# to use auto_gptq 4bit lora base layers +auto_gptq: AutoGPTQLoraConfig = None + +# to use auto_gptq 4bit lora base layers +bnb_qlora: BNBQLoraConfig = None +``` + +```py +class AutoGPTQLoraConfig: + + # auto_gptq supports various kernels, to select the kernel to use. + kernel: str = "triton_v2" + + # allow auto_gptq to quantize a model before training commences. + # NOTE: currently this is not allowed. + from_quantized: bool = True + +``` + +Example command to run: + +```bash +python tuning/sft_trainer.py \ +--model_name_or_path $MODEL_PATH \ +--tokenizer_name_or_path $MODEL_PATH \ # This field is optional and if not specified, tokenizer from model_name_or_path will be used +--training_data_path $TRAIN_DATA_PATH \ +--output_dir $OUTPUT_PATH \ +--num_train_epochs 40 \ +--per_device_train_batch_size 4 \ +---learning_rate 1e-4 \ +--response_template "\n### Label:" \ +--dataset_text_field "output" \ +--peft_method "lora" \ +--r 8 \ +--lora_dropout 0.05 \ +--lora_alpha 16 \ +--target_modules c_attn c_proj +--auto_gptq triton_v2 + +Equally you can pass in a JSON configuration for running tuning. See [build doc](./build/README.md) for more details. The above can also be passed in as JSON: +```json +{ + "model_name_or_path": $MODEL_PATH, + "training_data_path": $TRAIN_DATA_PATH, + "output_dir": $OUTPUT_PATH, + "num_train_epochs": 40.0, + "per_device_train_batch_size": 4, + "learning_rate": 1e-4, + "response_template": "\n### Label:", + "dataset_text_field": "output", + "peft_method": "lora", + "r": 8, + "lora_dropout": 0.05, + "lora_alpha": 16, + "target_modules": ["c_attn", "c_proj"] + "auto_gptq": ["triton_v2"] +} +``` +_________________________ + ### Prompt Tuning: Specify `peft_method` to `'pt'` . You can additionally pass any arguments from [PromptTuningConfig](https://github.com/foundation-model-stack/fms-hf-tuning/blob/main/tuning/config/peft_config.py#L63). @@ -676,4 +744,4 @@ Further details on enabling and using the trackers mentioned above can be found [Prompt Tuning on Twitter Complaints](examples/prompt_tuning_twitter_complaints/README.md) -A good simple example can be found [here](examples/kfto-kueue-sft-trainer.yaml) which launches a Kubernetes-native `PyTorchJob` using the [Kubeflow Training Operator](https://github.com/kubeflow/training-operator/) with [Kueue](https://github.com/kubernetes-sigs/kueue) for the queue management of tuning jobs. +A good simple example can be found [here](examples/kfto-kueue-sft-trainer.yaml) which launches a Kubernetes-native `PyTorchJob` using the [Kubeflow Training Operator](https://github.com/kubeflow/training-operator/) with [Kueue](https://github.com/kubernetes-sigs/kueue) for the queue management of tuning jobs. \ No newline at end of file From 1a249408022528e97e6abce885f7a57af2138e5c Mon Sep 17 00:00:00 2001 From: Hari Date: Tue, 3 Sep 2024 23:21:45 +0530 Subject: [PATCH 04/16] fix: need to pass skip_prepare_dataset for pretokenized dataset due to breaking change in HF SFTTrainer (#326) * fix: need to pass skip_prepare_dataset for pretokenized dataset due to breaking change in HF SFTTrainer Signed-off-by: Harikrishnan Balagopal * fix: wrong dataset paths, was using non-tokenized data in pre-tokenized dataset tests Signed-off-by: Harikrishnan Balagopal --------- Signed-off-by: Harikrishnan Balagopal Signed-off-by: Angel Luu --- tests/test_sft_trainer.py | 5 ++--- tuning/sft_trainer.py | 7 +++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index fd36785ce..251f6d6b9 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -35,7 +35,6 @@ EMPTY_DATA, MALFORMATTED_DATA, MODEL_NAME, - TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_JSON, TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_JSONL, TWITTER_COMPLAINTS_DATA_JSON, TWITTER_COMPLAINTS_DATA_JSONL, @@ -850,8 +849,8 @@ def test_run_with_good_experimental_metadata(): @pytest.mark.parametrize( "dataset_path", [ - TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_JSONL, - TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_JSON, + TWITTER_COMPLAINTS_TOKENIZED_JSONL, + TWITTER_COMPLAINTS_TOKENIZED_JSON, ], ) ### Tests for pretokenized data diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py index b5e6cb62e..bc1937c32 100644 --- a/tuning/sft_trainer.py +++ b/tuning/sft_trainer.py @@ -66,6 +66,7 @@ from tuning.utils.preprocessing_utils import ( format_dataset, get_data_collator, + is_pretokenized_dataset, validate_data_args, ) @@ -318,6 +319,11 @@ def train( } training_args = SFTConfig(**transformer_kwargs) + dataset_kwargs = {} + if is_pretokenized_dataset( + data_args.training_data_path or data_args.validation_data_path + ): + dataset_kwargs["skip_prepare_dataset"] = True trainer = SFTTrainer( model=model, tokenizer=tokenizer, @@ -330,6 +336,7 @@ def train( max_seq_length=max_seq_length, callbacks=trainer_callbacks, peft_config=peft_config, + dataset_kwargs=dataset_kwargs, ) # We track additional metrics and experiment metadata after trainer object creation From c5f471afe5fb8e7b7b971c2567c6d2bf54fe9140 Mon Sep 17 00:00:00 2001 From: Anh Uong Date: Wed, 4 Sep 2024 10:05:10 -0600 Subject: [PATCH 05/16] feat: install fms-acceleration to enable qlora (#284) * add fms-acceleration deps and pytorh layer with cuda Signed-off-by: Anh-Uong * add build args needed Signed-off-by: Anh-Uong * allow transformers v4.40 for fms-acceleration Signed-off-by: Anh-Uong * set wider transformers version Signed-off-by: Anh-Uong * remove nvidia stage Signed-off-by: Anh-Uong * add gcc and dev tools Signed-off-by: Anh-Uong * install c compiler and python deps Signed-off-by: Anh-Uong * remove transformers lower bound and dev deps Signed-off-by: Anh-Uong * install python-devel by version Signed-off-by: Anh Uong * update python installations Signed-off-by: Anh Uong --------- Signed-off-by: Anh-Uong Signed-off-by: Anh Uong Signed-off-by: Angel Luu --- build/Dockerfile | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/build/Dockerfile b/build/Dockerfile index 281f80dc6..116044f33 100644 --- a/build/Dockerfile +++ b/build/Dockerfile @@ -27,9 +27,10 @@ ARG PYTHON_VERSION ARG USER ARG USER_UID +# Note this works for 3.9, 3.11, 3.12 RUN dnf remove -y --disableplugin=subscription-manager \ subscription-manager \ - && dnf install -y python${PYTHON_VERSION} procps \ + && dnf install -y python${PYTHON_VERSION} procps g++ python${PYTHON_VERSION}-devel \ && ln -s /usr/bin/python${PYTHON_VERSION} /bin/python \ && python -m ensurepip --upgrade \ && python -m pip install --upgrade pip \ @@ -104,6 +105,7 @@ FROM cuda-devel AS python-installations ARG WHEEL_VERSION ARG USER ARG USER_UID +ARG ENABLE_FMS_ACCELERATION=false ## Enable Aimstack if requested via ENABLE_AIM set to "true" ARG ENABLE_AIM=false @@ -132,12 +134,19 @@ RUN if [[ -z "${WHEEL_VERSION}" ]]; \ RUN --mount=type=cache,target=/home/${USER}/.cache/pip,uid=${USER_UID} \ python -m pip install --user wheel && \ python -m pip install --user "$(head bdist_name)" && \ - python -m pip install --user "$(head bdist_name)[flash-attn]" && \ - if [[ "${ENABLE_AIM}" == "true" ]]; then \ + python -m pip install --user "$(head bdist_name)[flash-attn]" + +RUN if [[ "${ENABLE_FMS_ACCELERATION}" == "true" ]]; then \ + python -m pip install --user "$(head bdist_name)[fms-accel]"; \ + python -m fms_acceleration.cli install fms_acceleration_peft; \ + fi + +RUN if [[ "${ENABLE_AIM}" == "true" ]]; then \ python -m pip install --user "$(head bdist_name)[aim]"; \ - fi && \ + fi + # Clean up the wheel module. It's only needed by flash-attn install - python -m pip uninstall wheel build -y && \ +RUN python -m pip uninstall wheel build -y && \ # Cleanup the bdist whl file rm $(head bdist_name) /tmp/bdist_name From 9f23d9e22e74f85c57201f22e213f7733e63710c Mon Sep 17 00:00:00 2001 From: Angel Luu Date: Wed, 4 Sep 2024 17:06:42 -0600 Subject: [PATCH 06/16] Update for target modules Signed-off-by: Angel Luu --- README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2a1f11deb..c164b8dc9 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ - [Tips on Parameters to Set](#tips-on-parameters-to-set) - [Tuning Techniques](#tuning-techniques) - [LoRA Tuning Example](#lora-tuning-example) - - [qLoRA Tuning Example](#lora-tuning-example) + - [qLoRA Tuning Example](#qlora-tuning-example) - [Prompt Tuning](#prompt-tuning) - [Fine Tuning](#fine-tuning) - [FMS Acceleration](#fms-acceleration) @@ -478,6 +478,7 @@ python tuning/sft_trainer.py \ --lora_alpha 16 \ --target_modules c_attn c_proj --auto_gptq triton_v2 +``` Equally you can pass in a JSON configuration for running tuning. See [build doc](./build/README.md) for more details. The above can also be passed in as JSON: ```json @@ -498,6 +499,11 @@ Equally you can pass in a JSON configuration for running tuning. See [build doc] "auto_gptq": ["triton_v2"] } ``` + +Similarly to LoRA, the `target_modules` are the names of the modules to apply the adapter to. See the LoRA [section](#lora-tuning-example) on `target_modules` for more info. + +Note that with LoRA tuning technique, setting `all-linear` on `target_modules` returns linear modules. And with qLoRA tuning technique, `all-linear` returns all quant linear modules, excluding `lm_head`. + _________________________ ### Prompt Tuning: From 82005048887389212b1411178922bfb223ed3a18 Mon Sep 17 00:00:00 2001 From: Padmanabha V Seshadri Date: Fri, 6 Sep 2024 02:39:28 +0530 Subject: [PATCH 07/16] feat: Migrating the trainer controller to python logger (#309) * fix: Migrate tranformer logging to python logging Signed-off-by: Padmanabha V Seshadri * fix: Migrate tranformer logging to python logging Signed-off-by: Padmanabha V Seshadri * fix: Removed unwanted file Signed-off-by: Padmanabha V Seshadri * fix: Log levels obtained from reversing the dictionary Signed-off-by: Padmanabha V Seshadri * fix: Format issues Signed-off-by: Padmanabha V Seshadri * fix: Variable names made meaningful Signed-off-by: Padmanabha V Seshadri * fix: Removed unwanted log line Signed-off-by: Padmanabha V Seshadri * fix: Added name to getLogger Signed-off-by: Padmanabha V Seshadri * fix: Added default logging level to DEBUG Signed-off-by: Padmanabha V Seshadri * fix: Added default logging level to DEBUG Signed-off-by: Padmanabha V Seshadri * fix: Added default logging level to DEBUG Signed-off-by: Padmanabha V Seshadri * fix: Removed setLevel() calls from the packages Signed-off-by: Padmanabha V Seshadri * fix: Format issues resolved Signed-off-by: Padmanabha V Seshadri --------- Signed-off-by: Padmanabha V Seshadri Signed-off-by: Angel Luu --- tuning/trainercontroller/callback.py | 10 +++++----- .../controllermetrics/trainingstate.py | 3 --- tuning/trainercontroller/operations/hfcontrols.py | 4 +++- tuning/trainercontroller/operations/logcontrol.py | 12 ++++++------ tuning/trainercontroller/operations/operation.py | 6 ++---- tuning/trainercontroller/patience.py | 6 ++++-- 6 files changed, 20 insertions(+), 21 deletions(-) diff --git a/tuning/trainercontroller/callback.py b/tuning/trainercontroller/callback.py index fad1bbf70..a1b3397d7 100644 --- a/tuning/trainercontroller/callback.py +++ b/tuning/trainercontroller/callback.py @@ -18,6 +18,7 @@ # Standard from typing import Dict, List, Union import inspect +import logging import os import re @@ -29,7 +30,6 @@ TrainerState, TrainingArguments, ) -from transformers.utils import logging import yaml # Local @@ -45,7 +45,7 @@ from tuning.trainercontroller.patience import PatienceControl from tuning.utils.evaluator import MetricUnavailableError, RuleEvaluator -logger = logging.get_logger(__name__) +logger = logging.getLogger(__name__) # Configuration keys CONTROLLER_METRICS_KEY = "controller_metrics" @@ -66,7 +66,7 @@ DEFAULT_OPERATIONS = {"operations": [{"name": "hfcontrols", "class": "HFControls"}]} DEFAULT_METRICS = {} DEFAULT_CONFIG = {} -DEFAULT_TRIGGER_LOG_LEVEL = "debug" +DEFAULT_TRIGGER_LOG_LEVEL = "DEBUG" # pylint: disable=too-many-instance-attributes class TrainerControllerCallback(TrainerCallback): @@ -305,7 +305,7 @@ def on_init_end( kwargs["state"] = state kwargs["control"] = control - log_levels = logging.get_log_levels_dict() + log_levels = dict((value, key) for key, value in logging._levelToName.items()) # Check if there any metrics listed in the configuration if ( CONTROLLER_METRICS_KEY not in self.trainer_controller_config @@ -407,7 +407,7 @@ def on_init_end( control.config = controller[CONTROLLER_CONFIG_KEY] config_log_level_str = control.config.get( CONTROLLER_CONFIG_TRIGGER_LOG_LEVEL, config_log_level_str - ) + ).upper() if config_log_level_str not in log_levels: logger.warning( "Incorrect trigger log-level [%s] specified in the config." diff --git a/tuning/trainercontroller/controllermetrics/trainingstate.py b/tuning/trainercontroller/controllermetrics/trainingstate.py index 8dc276339..06da4035a 100644 --- a/tuning/trainercontroller/controllermetrics/trainingstate.py +++ b/tuning/trainercontroller/controllermetrics/trainingstate.py @@ -21,13 +21,10 @@ # Third Party from transformers import TrainerState -from transformers.utils import logging # Local from tuning.trainercontroller.controllermetrics.metricshandler import MetricHandler -logger = logging.get_logger(__name__) - class TrainingState(MetricHandler): """Implements the controller metric which exposes the trainer state""" diff --git a/tuning/trainercontroller/operations/hfcontrols.py b/tuning/trainercontroller/operations/hfcontrols.py index 0548b4c12..90988c16a 100644 --- a/tuning/trainercontroller/operations/hfcontrols.py +++ b/tuning/trainercontroller/operations/hfcontrols.py @@ -10,6 +10,8 @@ # Local from .operation import Operation +logger = logging.getLogger(__name__) + class HFControls(Operation): """Implements the control actions for the HuggingFace controls in @@ -37,7 +39,7 @@ def control_action(self, control: TrainerControl, **kwargs): control: TrainerControl. Data class for controls. kwargs: List of arguments (key, value)-pairs """ - logging.debug("Arguments passed to control_action: %s", repr(kwargs)) + logger.debug("Arguments passed to control_action: %s", repr(kwargs)) frame_info = inspect.currentframe().f_back arg_values = inspect.getargvalues(frame_info) setattr(control, arg_values.locals["action"], True) diff --git a/tuning/trainercontroller/operations/logcontrol.py b/tuning/trainercontroller/operations/logcontrol.py index 385de3b4d..eabb420c9 100644 --- a/tuning/trainercontroller/operations/logcontrol.py +++ b/tuning/trainercontroller/operations/logcontrol.py @@ -1,12 +1,13 @@ +# Standard +import logging + # Third Party from transformers import TrainingArguments -from transformers.utils import logging # Local from .operation import Operation -logger = logging.get_logger(__name__) -logger.setLevel(level=logging.DEBUG) +logger = logging.getLogger(__name__) class LogControl(Operation): @@ -20,12 +21,11 @@ def __init__(self, log_format: str, log_level: str, **kwargs): Args: kwargs: List of arguments (key, value)-pairs """ - log_levels = logging.get_log_levels_dict() - if log_level not in log_levels: + self.log_level = getattr(logging, log_level.upper(), None) + if not isinstance(self.log_level, int): raise ValueError( "Specified log_level [%s] is invalid for LogControl" % (log_level) ) - self.log_level = log_levels[log_level] self.log_format = log_format super().__init__(**kwargs) diff --git a/tuning/trainercontroller/operations/operation.py b/tuning/trainercontroller/operations/operation.py index 70805a015..f6b4884fc 100644 --- a/tuning/trainercontroller/operations/operation.py +++ b/tuning/trainercontroller/operations/operation.py @@ -1,12 +1,10 @@ # Standard import abc import inspect +import logging import re -# Third Party -from transformers.utils import logging - -logger = logging.get_logger(__name__) +logger = logging.getLogger(__name__) class Operation(metaclass=abc.ABCMeta): diff --git a/tuning/trainercontroller/patience.py b/tuning/trainercontroller/patience.py index ecdb0699a..bda91363c 100644 --- a/tuning/trainercontroller/patience.py +++ b/tuning/trainercontroller/patience.py @@ -31,6 +31,8 @@ # will be exceeded afer the fifth event. MODE_NO_RESET_ON_FAILURE = "no_reset_on_failure" +logger = logging.getLogger(__name__) + class PatienceControl: """Implements the patience control for every rule""" @@ -49,7 +51,7 @@ def should_tolerate( elif self._mode == MODE_RESET_ON_FAILURE: self._patience_counter = 0 if self._patience_counter <= self._patience_threshold: - logging.debug( + logger.debug( "Control {} triggered on event {}: " "Enforcing patience [patience_counter = {:.2f}, " "patience_threshold = {:.2f}]".format( @@ -60,7 +62,7 @@ def should_tolerate( ) ) return True - logging.debug( + logger.debug( "Control {} triggered on event {}: " "Exceeded patience [patience_counter = {:.2f}, " "patience_threshold = {:.2f}]".format( From 761cde41d4d5331cbfab9f1e64b310b19b5c0c98 Mon Sep 17 00:00:00 2001 From: Hari Date: Tue, 10 Sep 2024 20:54:34 +0530 Subject: [PATCH 08/16] fix: remove fire for handling CLI args (#324) Signed-off-by: Mehant Kammakomati Signed-off-by: Harikrishnan Balagopal Signed-off-by: Anh Uong Co-authored-by: Mehant Kammakomati Signed-off-by: Angel Luu --- pyproject.toml | 1 - tuning/sft_trainer.py | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e31192470..2675f49b4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,6 @@ dependencies = [ "trl>=0.9.3,<1.0", "peft>=0.8.0,<0.13", "datasets>=2.15.0,<3.0", -"fire>=0.5.0,<1.0", "simpleeval>=0.9.13,<1.0", ] diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py index bc1937c32..2ab8f7de0 100644 --- a/tuning/sft_trainer.py +++ b/tuning/sft_trainer.py @@ -37,7 +37,6 @@ ) from transformers.utils import is_accelerate_available from trl import SFTConfig, SFTTrainer -import fire import transformers # Local @@ -515,7 +514,7 @@ def parse_arguments(parser, json_config=None): ) -def main(**kwargs): # pylint: disable=unused-argument +def main(): parser = get_parser() logger = logging.getLogger() job_config = get_json_config() @@ -636,4 +635,4 @@ def main(**kwargs): # pylint: disable=unused-argument if __name__ == "__main__": - fire.Fire(main) + main() From 25abbca7e9f7c2fd837bbeeef85515e1a7ae7d25 Mon Sep 17 00:00:00 2001 From: Anh Uong Date: Tue, 10 Sep 2024 15:07:07 -0600 Subject: [PATCH 09/16] dep: cap transformers version (#335) - FSDP bug in accelerate v0.34 Signed-off-by: Anh Uong Signed-off-by: Angel Luu --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2675f49b4..332135020 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ classifiers=[ ] dependencies = [ "numpy>=1.26.4,<2.0", -"accelerate>=0.20.3,<0.40", +"accelerate>=0.20.3,<0.34", "transformers>4.41,<5.0", "torch>=2.2.0,<3.0", "sentencepiece>=0.1.99,<0.3", From e1543348bb42646665cbef3824f4dc1b5fd39938 Mon Sep 17 00:00:00 2001 From: Will Date: Tue, 10 Sep 2024 17:19:22 -0400 Subject: [PATCH 10/16] deps: Add protobuf to enable compatibility with certain models (#336) Signed-off-by: Will Johnson Signed-off-by: Angel Luu --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 332135020..aae1a9dd7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ dependencies = [ "tqdm>=4.66.2,<5.0", "trl>=0.9.3,<1.0", "peft>=0.8.0,<0.13", +"protobuf>=5.28.0,<6.0.0", "datasets>=2.15.0,<3.0", "simpleeval>=0.9.13,<1.0", ] From 0c6c47659775bf6f00872cba512368ad774f94d6 Mon Sep 17 00:00:00 2001 From: Angel Luu Date: Wed, 11 Sep 2024 10:23:44 -0600 Subject: [PATCH 11/16] Add more details on qLORA Signed-off-by: Angel Luu --- README.md | 43 +++++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index c164b8dc9..26c1da347 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ - [Tips on Parameters to Set](#tips-on-parameters-to-set) - [Tuning Techniques](#tuning-techniques) - [LoRA Tuning Example](#lora-tuning-example) - - [qLoRA Tuning Example](#qlora-tuning-example) + - [GPTQ-LoRA with AutoGPTQ Tuning Example](#gptq-lora-with-autogptq-tuning-example) - [Prompt Tuning](#prompt-tuning) - [Fine Tuning](#fine-tuning) - [FMS Acceleration](#fms-acceleration) @@ -434,30 +434,16 @@ Example 3: _________________________ -### qLoRA Tuning Example +### GPTQ-LoRA with AutoGPTQ Tuning Example -This method is similar to LoRA Tuning, but the base model is a quantized model. -Set `peft_method` to `"lora"`. You can pass any of LoraConfig, see section on [LoRA Example](#lora-tuning-example). -In addition, you can pass [LoRA quantization config](https://github.com/foundation-model-stack/fms-hf-tuning/blob/main/tuning/config/acceleration_configs/quantized_lora_config.py#L62). -```py -# to use auto_gptq 4bit lora base layers -auto_gptq: AutoGPTQLoraConfig = None - -# to use auto_gptq 4bit lora base layers -bnb_qlora: BNBQLoraConfig = None -``` - -```py -class AutoGPTQLoraConfig: +This method is similar to LoRA Tuning, but the base model is a quantized model. We currently only support GPTQ-LoRA model that has been quantized with 4-bit AutoGPTQ technique. Bits-and-Bytes (BNB) quantized LoRA is not yet enabled. +The qLoRA tuning technique is enabled via the [fms-acceleration](https://github.com/foundation-model-stack/fms-hf-tuning/blob/main/README.md#fms-acceleration) package. +You can see details on a sample configuration of Accelerated GPTQ-LoRA [here](https://github.com/foundation-model-stack/fms-acceleration/blob/main/sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml). - # auto_gptq supports various kernels, to select the kernel to use. - kernel: str = "triton_v2" - # allow auto_gptq to quantize a model before training commences. - # NOTE: currently this is not allowed. - from_quantized: bool = True +To use GPTQ-LoRA technique, you can set the `quantized_lora_config` defined [here](https://github.com/foundation-model-stack/fms-hf-tuning/blob/main/tuning/config/acceleration_configs/quantized_lora_config.py). See the Notes section of FMS Acceleration doc [below](https://github.com/foundation-model-stack/fms-hf-tuning/blob/main/README.md#fms-acceleration) for usage. The only kernel we are supporting currently is `triton_v2`. -``` +In addition, LoRA tuning technique is required to be used, set `peft_method` to `"lora"` and pass any arguments from [LoraConfig](https://github.com/foundation-model-stack/fms-hf-tuning/blob/main/tuning/config/peft_config.py#L21). Example command to run: @@ -469,18 +455,21 @@ python tuning/sft_trainer.py \ --output_dir $OUTPUT_PATH \ --num_train_epochs 40 \ --per_device_train_batch_size 4 \ ----learning_rate 1e-4 \ +--learning_rate 1e-4 \ --response_template "\n### Label:" \ --dataset_text_field "output" \ --peft_method "lora" \ --r 8 \ --lora_dropout 0.05 \ --lora_alpha 16 \ ---target_modules c_attn c_proj ---auto_gptq triton_v2 +--target_modules c_attn c_proj \ +--auto_gptq triton_v2 \ # setting quantized_lora_config +--torch_dtype float16 \ # need this for triton_v2 +--fp16 \ # need this for triton_v2 ``` Equally you can pass in a JSON configuration for running tuning. See [build doc](./build/README.md) for more details. The above can also be passed in as JSON: + ```json { "model_name_or_path": $MODEL_PATH, @@ -495,8 +484,10 @@ Equally you can pass in a JSON configuration for running tuning. See [build doc] "r": 8, "lora_dropout": 0.05, "lora_alpha": 16, - "target_modules": ["c_attn", "c_proj"] - "auto_gptq": ["triton_v2"] + "target_modules": ["c_attn", "c_proj"], + "auto_gptq": ["triton_v2"], // setting quantized_lora_config + "torch_dtype": "float16", // need this for triton_v2 + "fp16": true // need this for triton_v2 } ``` From 427202f65434f1447a6832f801f4ead7fffa26da Mon Sep 17 00:00:00 2001 From: Anh Uong Date: Wed, 4 Sep 2024 14:48:41 -0600 Subject: [PATCH 12/16] add enable_aim build args in all stages Signed-off-by: Anh Uong Signed-off-by: Angel Luu --- build/Dockerfile | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/build/Dockerfile b/build/Dockerfile index 116044f33..4bd9cab6a 100644 --- a/build/Dockerfile +++ b/build/Dockerfile @@ -19,6 +19,9 @@ ARG USER=tuning ARG USER_UID=1000 ARG PYTHON_VERSION=3.11 ARG WHEEL_VERSION="" +## Enable Aimstack if requested via ENABLE_AIM set to "true" +ARG ENABLE_AIM=false +ARG ENABLE_FMS_ACCELERATION=false ## Base Layer ################################################################## FROM registry.access.redhat.com/ubi9/ubi:${BASE_UBI_IMAGE_TAG} AS base @@ -105,10 +108,8 @@ FROM cuda-devel AS python-installations ARG WHEEL_VERSION ARG USER ARG USER_UID -ARG ENABLE_FMS_ACCELERATION=false - -## Enable Aimstack if requested via ENABLE_AIM set to "true" -ARG ENABLE_AIM=false +ARG ENABLE_FMS_ACCELERATION +ARG ENABLE_AIM RUN dnf install -y git && \ # perl-Net-SSLeay.x86_64 and server_key.pem are installed with git as dependencies @@ -154,6 +155,7 @@ RUN python -m pip uninstall wheel build -y && \ FROM release-base AS release ARG USER ARG PYTHON_VERSION +ARG ENABLE_AIM RUN mkdir -p /licenses COPY LICENSE /licenses/ From b15a07b5e67be7499c51716ff5132b63193d209f Mon Sep 17 00:00:00 2001 From: Abhishek Maurya <124327945+Abhishek-TAMU@users.noreply.github.com> Date: Thu, 12 Sep 2024 21:15:01 -0400 Subject: [PATCH 13/16] fix: remove lm_head post processing (#333) * fix: Removal of lm head hack Signed-off-by: Abhishek * set fms_accelerate to true by default Signed-off-by: Anh Uong --------- Signed-off-by: Abhishek Signed-off-by: Anh Uong Co-authored-by: Anh Uong Signed-off-by: Angel Luu --- build/Dockerfile | 2 +- build/accelerate_launch.py | 92 -------------------------------------- 2 files changed, 1 insertion(+), 93 deletions(-) diff --git a/build/Dockerfile b/build/Dockerfile index 4bd9cab6a..ffae818da 100644 --- a/build/Dockerfile +++ b/build/Dockerfile @@ -21,7 +21,7 @@ ARG PYTHON_VERSION=3.11 ARG WHEEL_VERSION="" ## Enable Aimstack if requested via ENABLE_AIM set to "true" ARG ENABLE_AIM=false -ARG ENABLE_FMS_ACCELERATION=false +ARG ENABLE_FMS_ACCELERATION=true ## Base Layer ################################################################## FROM registry.access.redhat.com/ubi9/ubi:${BASE_UBI_IMAGE_TAG} AS base diff --git a/build/accelerate_launch.py b/build/accelerate_launch.py index d7753728c..50d8eef0c 100644 --- a/build/accelerate_launch.py +++ b/build/accelerate_launch.py @@ -24,18 +24,13 @@ import sys import traceback from pathlib import Path -import json # Third Party from accelerate.commands.launch import launch_command -from transformers import AutoModelForCausalLM, AutoTokenizer -from peft import PeftModel -from torch import bfloat16 # Local from build.utils import ( process_accelerate_launch_args, - get_highest_checkpoint, ) from tuning.utils.config_utils import get_json_config from tuning.utils.error_logging import ( @@ -43,18 +38,10 @@ USER_ERROR_EXIT_CODE, INTERNAL_ERROR_EXIT_CODE, ) -from tuning.data import tokenizer_data_utils ERROR_LOG = "/dev/termination-log" -def get_base_model_from_adapter_config(adapter_config): - """Given path to adapter_config.json file, returns the base model name""" - with open(adapter_config, "r", encoding="utf-8") as config_file: - adapter_config = json.load(config_file) - return adapter_config.get("base_model_name_or_path") - - def main(): if not os.getenv("TERMINATION_LOG_FILE"): os.environ["TERMINATION_LOG_FILE"] = ERROR_LOG @@ -128,85 +115,6 @@ def main(): write_termination_log(f"Unhandled exception during training. {e}") sys.exit(INTERNAL_ERROR_EXIT_CODE) - # remove lm_head from granite with llama arch models - try: - checkpoint_dir = job_config.get("save_model_dir") - if not checkpoint_dir: - checkpoint_dir = os.path.join( - output_dir, get_highest_checkpoint(output_dir) - ) - - use_flash_attn = job_config.get("use_flash_attn", True) - adapter_config_path = os.path.join(checkpoint_dir, "adapter_config.json") - tokenizer = AutoTokenizer.from_pretrained(checkpoint_dir) - - if os.path.exists(adapter_config_path): - base_model_path = get_base_model_from_adapter_config(adapter_config_path) - base_model = AutoModelForCausalLM.from_pretrained( - base_model_path, - attn_implementation="flash_attention_2" if use_flash_attn else None, - torch_dtype=bfloat16 if use_flash_attn else None, - ) - - # since the peft library (PEFTModelForCausalLM) does not handle cases - # where the model's layers are modified, in our case the embedding layer - # is modified, so we resize the backbone model's embedding layer with our own - # utility before passing it along to load the PEFT model. - tokenizer_data_utils.tokenizer_and_embedding_resize( - {}, tokenizer=tokenizer, model=base_model - ) - model = PeftModel.from_pretrained( - base_model, - checkpoint_dir, - attn_implementation="flash_attention_2" if use_flash_attn else None, - torch_dtype=bfloat16 if use_flash_attn else None, - ) - else: - model = AutoModelForCausalLM.from_pretrained( - checkpoint_dir, - attn_implementation="flash_attention_2" if use_flash_attn else None, - torch_dtype=bfloat16 if use_flash_attn else None, - ) - - model_arch = model.config.model_type - # check that it is a granite model with llama architecture with tied weights - # ie. lm_head is duplicate of embeddings - - # a fine tuned model will have params_dict.get("model.embed_tokens.weight") - # a prompt adapter has params_dict.get("base_model.model.embed_tokens.weight") - # a lora adapter has params_dict.get("base_model.model.model.embed_tokens.weight") - if model_arch == "llama" and hasattr(model, "lm_head"): - if ( - # lora tuned model has an addt model layer - ( - hasattr(model.model, "model") - and model.lm_head.weight.untyped_storage().data_ptr() - == model.model.model.embed_tokens.weight.untyped_storage().data_ptr() - ) - # prompt tuned model or fine tuned model - or ( - hasattr(model.model, "embed_tokens") - and model.lm_head.weight.untyped_storage().data_ptr() - == model.model.embed_tokens.weight.untyped_storage().data_ptr() - ) - ): - - logging.info("Removing lm_head from checkpoint") - del model.lm_head.weight - - if hasattr(model, "lm_head.weight"): - logging.warning("Failed to delete lm_head.weight from model") - - logging.info("Saving checkpoint to %s", output_dir) - model.save_pretrained(checkpoint_dir) - # save tokenizer with model - tokenizer.save_pretrained(checkpoint_dir) - - except Exception as e: # pylint: disable=broad-except - logging.error(traceback.format_exc()) - write_termination_log(f"Exception encountered removing lm_head from model: {e}") - sys.exit(INTERNAL_ERROR_EXIT_CODE) - # The .complete file will signal to users that we are finished copying # files over if os.path.exists(output_dir): From 5dd5494a4e0fae9bd568bb868925b80dac6653ae Mon Sep 17 00:00:00 2001 From: Angel Luu Date: Mon, 16 Sep 2024 09:45:40 -0600 Subject: [PATCH 14/16] feat: Add deps to evaluate qLora tuned model (#312) * Add support to load qLora tuned model in run_inference.py script Signed-off-by: Angel Luu * Remove comment Signed-off-by: Angel Luu * Disable gptq by default Signed-off-by: Angel Luu * Remove the gptq-dev install in Dockerfile Signed-off-by: Angel Luu * Rename gptq-dev package from gptq Signed-off-by: Angel Luu * Add comments in run_inference.py Signed-off-by: Angel Luu * Update device to cuda Signed-off-by: Angel Luu * Add in the case that there's no adapter found Signed-off-by: Angel Luu * Use torch.float16 for quantized Signed-off-by: Angel Luu --------- Signed-off-by: Angel Luu --- pyproject.toml | 1 + scripts/run_inference.py | 73 +++++++++++++++++++++++++++++++--------- 2 files changed, 59 insertions(+), 15 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index aae1a9dd7..fcb049821 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,7 @@ dev = ["wheel>=0.42.0,<1.0", "packaging>=23.2,<25", "ninja>=1.11.1.1,<2.0", "sci flash-attn = ["flash-attn>=2.5.3,<3.0"] aim = ["aim>=3.19.0,<4.0"] fms-accel = ["fms-acceleration>=0.1"] +gptq-dev = ["auto_gptq>0.4.2", "optimum>=1.15.0"] [tool.setuptools.packages.find] diff --git a/scripts/run_inference.py b/scripts/run_inference.py index d64bf926b..7e4465cac 100644 --- a/scripts/run_inference.py +++ b/scripts/run_inference.py @@ -30,7 +30,7 @@ # Third Party from peft import PeftModel from tqdm import tqdm -from transformers import AutoModelForCausalLM, AutoTokenizer +from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig import torch # Local @@ -176,6 +176,8 @@ def load( else {} ) tokenizer = AutoTokenizer.from_pretrained(checkpoint_path) + device = "cuda" if torch.cuda.is_available() else None + print(f"Inferred device: {device}") # Apply the configs to the adapter config of this model; if no overrides # are provided, then the context manager doesn't have any effect. try: @@ -183,13 +185,36 @@ def load( try: if base_model_name_or_path is None: raise ValueError("base_model_name_or_path has to be passed") - base_model = AutoModelForCausalLM.from_pretrained( - base_model_name_or_path, - attn_implementation="flash_attention_2" - if use_flash_attn - else None, - torch_dtype=torch.bfloat16 if use_flash_attn else None, - ) + + if ( + has_quantized_config(base_model_name_or_path) + and device == "cuda" + ): + # Using GPTQConfig from HF, avail params are here + # https://huggingface.co/docs/transformers/en/main_classes/quantization#transformers.GPTQConfig + # We only support 4-bit AutoGPTQ, so setting bits to 4 + # setting exllama kernel to version 2 as it's a faster kernel + gptq_config = GPTQConfig(bits=4, exllama_config={"version": 2}) + + # Since we are using exllama kernel, we need torch.float16 as torch_dtype + base_model = AutoModelForCausalLM.from_pretrained( + base_model_name_or_path, + attn_implementation="flash_attention_2" + if use_flash_attn + else None, + device_map=device, + torch_dtype=torch.float16, + quantization_config=gptq_config, + ) + else: + base_model = AutoModelForCausalLM.from_pretrained( + base_model_name_or_path, + attn_implementation="flash_attention_2" + if use_flash_attn + else None, + torch_dtype=torch.bfloat16 if use_flash_attn else None, + ) + # since the peft library (PEFTModelForCausalLM) does not handle cases # where the model's layers are modified, in our case the embedding layer # is modified, so we resize the backbone model's embedding layer with our own @@ -211,14 +236,28 @@ def load( except FileNotFoundError: print("No adapter config found! Loading as a merged model...") # Unable to find the adapter config; fall back to loading as a merged model - model = AutoModelForCausalLM.from_pretrained( - checkpoint_path, - attn_implementation="flash_attention_2" if use_flash_attn else None, - torch_dtype=torch.bfloat16 if use_flash_attn else None, - ) + if has_quantized_config(checkpoint_path) and device == "cuda": + # Using GPTQConfig from HF, avail params are here + # https://huggingface.co/docs/transformers/en/main_classes/quantization#transformers.GPTQConfig + # We only support 4-bit AutoGPTQ, so setting bits to 4 + # setting exllama kernel to version 2 as it's a faster kernel + gptq_config = GPTQConfig(bits=4, exllama_config={"version": 2}) + + # Since we are using exllama kernel, we need torch.float16 as torch_dtype + model = AutoModelForCausalLM.from_pretrained( + checkpoint_path, + attn_implementation="flash_attention_2" if use_flash_attn else None, + device_map=device, + torch_dtype=torch.float16, + quantization_config=gptq_config, + ) + else: + model = AutoModelForCausalLM.from_pretrained( + checkpoint_path, + attn_implementation="flash_attention_2" if use_flash_attn else None, + torch_dtype=torch.bfloat16 if use_flash_attn else None, + ) - device = "cuda" if torch.cuda.is_available() else None - print(f"Inferred device: {device}") model.to(device) return cls(model, tokenizer, device) @@ -327,5 +366,9 @@ def main(): print(f"Exported results to: {args.out_file}") +def has_quantized_config(model_path: str): + return os.path.exists(os.path.join(model_path, "quantize_config.json")) + + if __name__ == "__main__": main() From cd6ba00623c2b1598f2082b775161911970c5eba Mon Sep 17 00:00:00 2001 From: Abhishek Maurya <124327945+Abhishek-TAMU@users.noreply.github.com> Date: Mon, 16 Sep 2024 12:27:01 -0400 Subject: [PATCH 15/16] feat: Add support for smoothly resuming training from a saved checkpoint (#300) * Add feature of resume training Signed-off-by: Abhishek * Remove lastcheckpoints conditions Signed-off-by: Abhishek * PR Changes Signed-off-by: Abhishek * feat:resume tuning based on value from user's flag Signed-off-by: Abhishek * test:added unit tests for resume tuning feature Signed-off-by: Abhishek * test: PR changes of resume from checkpoint feature Signed-off-by: Abhishek * fix: Modified test fn descripts, added readme Signed-off-by: Abhishek --------- Signed-off-by: Abhishek Co-authored-by: Anh Uong --- .pylintrc | 2 +- README.md | 5 + tests/test_sft_trainer.py | 208 ++++++++++++++++++++++++++++++++++++++ tuning/sft_trainer.py | 22 +++- 4 files changed, 234 insertions(+), 3 deletions(-) diff --git a/.pylintrc b/.pylintrc index e94869511..d6f8a5d6c 100644 --- a/.pylintrc +++ b/.pylintrc @@ -333,7 +333,7 @@ indent-string=' ' max-line-length=100 # Maximum number of lines in a module. -max-module-lines=1100 +max-module-lines=1200 # Allow the body of a class to be on the same line as the declaration if body # contains single statement. diff --git a/README.md b/README.md index 26c1da347..7fd8fd5d7 100644 --- a/README.md +++ b/README.md @@ -278,6 +278,11 @@ You can set `output_dir` to a local directory and set `save_model_dir` to COS to In order to achieve the fastest train time, set `save_strategy="no"`, as saving no checkpoints except for the final model will remove intermediate write operations all together. +#### Resuming tuning from checkpoints +If the output directory already contains checkpoints, tuning will automatically resume from the latest checkpoint in the directory specified by the `output_dir` flag. To start tuning from scratch and ignore existing checkpoints, set the `resume_from_checkpoint` flag to False. + +You can also use the resume_from_checkpoint flag to resume tuning from a specific checkpoint by providing the full path to the desired checkpoint as a string. This flag is passed as an argument to the [trainer.train()](https://github.com/huggingface/transformers/blob/db70426854fe7850f2c5834d633aff637f14772e/src/transformers/trainer.py#L1901) function of the SFTTrainer. + ## Tuning Techniques: ### LoRA Tuning Example diff --git a/tests/test_sft_trainer.py b/tests/test_sft_trainer.py index 251f6d6b9..2d55b7de4 100644 --- a/tests/test_sft_trainer.py +++ b/tests/test_sft_trainer.py @@ -80,6 +80,214 @@ PEFT_LORA_ARGS = peft_config.LoraConfig(r=8, lora_alpha=32, lora_dropout=0.05) +def test_resume_training_from_checkpoint(): + """ + Test tuning resumes from the latest checkpoint, creating new checkpoints and the + checkpoints created before resuming tuning is not affected. + """ + with tempfile.TemporaryDirectory() as tempdir: + train_args = copy.deepcopy(TRAIN_ARGS) + train_args.output_dir = tempdir + + sft_trainer.train(MODEL_ARGS, DATA_ARGS, train_args, None) + _validate_training(tempdir) + + # Get trainer state of latest checkpoint + init_trainer_state, _ = _get_latest_checkpoint_trainer_state(tempdir) + assert init_trainer_state is not None + + # Resume training with higher epoch and same output dir + train_args.num_train_epochs += 5 + sft_trainer.train(MODEL_ARGS, DATA_ARGS, train_args, None) + _validate_training(tempdir) + + # Get trainer state of latest checkpoint + final_trainer_state, _ = _get_latest_checkpoint_trainer_state(tempdir) + assert final_trainer_state is not None + + assert final_trainer_state["epoch"] == init_trainer_state["epoch"] + 5 + assert final_trainer_state["global_step"] > init_trainer_state["global_step"] + + # Check if loss of 1st epoch after first tuning is same after + # resuming tuning and not overwritten + assert len(init_trainer_state["log_history"]) > 0 + + init_log_history = init_trainer_state["log_history"][0] + assert init_log_history["epoch"] == 1 + + final_log_history = final_trainer_state["log_history"][0] + assert final_log_history["epoch"] == 1 + + assert init_log_history["loss"] == final_log_history["loss"] + + +def test_resume_training_from_checkpoint_with_flag_true(): + """ + Test tuning resumes from the latest checkpoint when flag is true, + creating new checkpoints and the checkpoints created before resuming + tuning is not affected. + """ + with tempfile.TemporaryDirectory() as tempdir: + train_args = copy.deepcopy(TRAIN_ARGS) + train_args.output_dir = tempdir + train_args.resume_from_checkpoint = "True" + + sft_trainer.train(MODEL_ARGS, DATA_ARGS, train_args, None) + _validate_training(tempdir) + + # Get trainer state of latest checkpoint + init_trainer_state, _ = _get_latest_checkpoint_trainer_state(tempdir) + assert init_trainer_state is not None + + # Get Training logs + init_training_logs = _get_training_logs_by_epoch(tempdir) + + # Resume training with higher epoch and same output dir + train_args.num_train_epochs += 5 + sft_trainer.train(MODEL_ARGS, DATA_ARGS, train_args, None) + _validate_training(tempdir) + + # Get trainer state of latest checkpoint + final_trainer_state, _ = _get_latest_checkpoint_trainer_state(tempdir) + assert final_trainer_state is not None + + assert final_trainer_state["epoch"] == init_trainer_state["epoch"] + 5 + assert final_trainer_state["global_step"] > init_trainer_state["global_step"] + + final_training_logs = _get_training_logs_by_epoch(tempdir) + + assert ( + init_training_logs[0]["data"]["timestamp"] + == final_training_logs[0]["data"]["timestamp"] + ) + + +def test_resume_training_from_checkpoint_with_flag_false(): + """ + Test when setting resume_from_checkpoint=False that tuning will start from scratch. + """ + with tempfile.TemporaryDirectory() as tempdir: + train_args = copy.deepcopy(TRAIN_ARGS) + train_args.output_dir = tempdir + train_args.resume_from_checkpoint = "False" + + sft_trainer.train(MODEL_ARGS, DATA_ARGS, train_args, None) + _validate_training(tempdir) + + # Get trainer state of latest checkpoint + init_trainer_state, _ = _get_latest_checkpoint_trainer_state(tempdir) + assert init_trainer_state is not None + + # Get Training log entry for epoch 1 + init_training_logs = _get_training_logs_by_epoch(tempdir, epoch=1) + assert len(init_training_logs) == 1 + + # Training again with higher epoch and same output dir + train_args.num_train_epochs += 5 + sft_trainer.train(MODEL_ARGS, DATA_ARGS, train_args, None) + _validate_training(tempdir) + + # Get Training log entry for epoch 1 + final_training_logs = _get_training_logs_by_epoch(tempdir, epoch=1) + assert len(final_training_logs) == 2 + + +def test_resume_training_from_checkpoint_with_flag_checkpoint_path_lora(): + """ + Test resume checkpoint from a specified checkpoint path for LoRA tuning. + """ + with tempfile.TemporaryDirectory() as tempdir: + train_args = copy.deepcopy(TRAIN_ARGS) + lora_config = copy.deepcopy(PEFT_LORA_ARGS) + train_args.output_dir = tempdir + + sft_trainer.train(MODEL_ARGS, DATA_ARGS, train_args, lora_config) + _validate_training(tempdir) + + # Get trainer state and checkpoint_path of second last checkpoint + init_trainer_state, checkpoint_path = _get_latest_checkpoint_trainer_state( + tempdir, checkpoint_index=-2 + ) + assert init_trainer_state is not None + + # Resume training with higher epoch and same output dir + train_args.num_train_epochs += 5 + train_args.resume_from_checkpoint = checkpoint_path + sft_trainer.train(MODEL_ARGS, DATA_ARGS, train_args, lora_config) + _validate_training(tempdir) + + # Get total_flos from trainer state of checkpoint_path and check if its same + final_trainer_state = None + trainer_state_file = os.path.join(checkpoint_path, "trainer_state.json") + with open(trainer_state_file, "r", encoding="utf-8") as f: + final_trainer_state = json.load(f) + + assert final_trainer_state["total_flos"] == init_trainer_state["total_flos"] + + +def _get_latest_checkpoint_trainer_state(dir_path: str, checkpoint_index: int = -1): + """ + Get the trainer state from the latest or specified checkpoint directory. + The trainer state is returned along with the path to the checkpoint. + + Args: + dir_path (str): The directory path where checkpoint folders are located. + checkpoint_index (int, optional): The index of the checkpoint to retrieve, + based on the checkpoint number. The default + is -1, which returns the latest checkpoint. + + Returns: + trainer_state: The trainer state loaded from `trainer_state.json` in the + checkpoint directory. + last_checkpoint: The path to the checkpoint directory. + """ + trainer_state = None + last_checkpoint = None + checkpoints = [ + os.path.join(dir_path, d) + for d in os.listdir(dir_path) + if d.startswith("checkpoint") + ] + if checkpoints: + last_checkpoint = sorted(checkpoints, key=lambda x: int(x.split("-")[-1]))[ + checkpoint_index + ] + trainer_state_file = os.path.join(last_checkpoint, "trainer_state.json") + with open(trainer_state_file, "r", encoding="utf-8") as f: + trainer_state = json.load(f) + return trainer_state, last_checkpoint + + +def _get_training_logs_by_epoch(dir_path: str, epoch: int = None): + """ + Load and optionally filter training_logs.jsonl file. + If an epoch number is specified, the function filters the logs + and returns only the entries corresponding to the specified epoch. + + Args: + dir_path (str): The directory path where the `training_logs.jsonl` file is located. + epoch (int, optional): The epoch number to filter logs by. If not specified, + all logs are returned. + + Returns: + list: A list containing the training logs. If `epoch` is specified, + only logs from the specified epoch are returned; otherwise, all logs are returned. + """ + data_list = [] + with open(f"{dir_path}/training_logs.jsonl", "r", encoding="utf-8") as file: + for line in file: + json_data = json.loads(line) + data_list.append(json_data) + + if epoch: + mod_data_list = [] + for value in data_list: + if value["data"]["epoch"] == epoch: + mod_data_list.append(value) + return mod_data_list + return data_list + + def test_run_train_requires_output_dir(): """Check fails when output dir not provided.""" updated_output_dir_train_args = copy.deepcopy(TRAIN_ARGS) diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py index 2ab8f7de0..da8fa5172 100644 --- a/tuning/sft_trainer.py +++ b/tuning/sft_trainer.py @@ -35,6 +35,7 @@ LlamaTokenizerFast, TrainerCallback, ) +from transformers.trainer_utils import get_last_checkpoint from transformers.utils import is_accelerate_available from trl import SFTConfig, SFTTrainer import transformers @@ -215,7 +216,7 @@ def train( ), ) - # add special tokens only when a custom tokenizer is not passed + # Add special tokens only when a custom tokenizer is not passed if not model_args.tokenizer_name_or_path: # TODO: understand if we need to hardcode these here or just use defaults in model if isinstance(tokenizer, (LlamaTokenizer, LlamaTokenizerFast)): @@ -366,7 +367,24 @@ def train( for x in framework.get_callbacks_and_ready_for_train(model, accelerator): trainer.add_callback(x) - trainer.train() + resume_from_checkpoint = None + # Check if resume flag is not passed (None), or if flag is true and + # output_dir has checkpoints then get last checkpoint from output_dir + if ( + training_args.resume_from_checkpoint is None + or training_args.resume_from_checkpoint.lower() == "true" + ): + resume_from_checkpoint = get_last_checkpoint(training_args.output_dir) + else: + # `training_args.resume_from_checkpoint` gives string values + # Check if flag is false OR flag has checkpoint value for resuming tuning + resume_from_checkpoint = ( + training_args.resume_from_checkpoint + if training_args.resume_from_checkpoint.lower() != "false" + else False + ) + + trainer.train(resume_from_checkpoint) return trainer From 229e230b1ed4dfaea7a88d7002e4c4f098b5c109 Mon Sep 17 00:00:00 2001 From: Hari Date: Mon, 16 Sep 2024 21:59:33 +0530 Subject: [PATCH 16/16] ci: add a github workflow to label pull requests based on their title (#298) Signed-off-by: Harikrishnan Balagopal Signed-off-by: Anh Uong --- .github/workflows/labelpr.yaml | 35 ++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 .github/workflows/labelpr.yaml diff --git a/.github/workflows/labelpr.yaml b/.github/workflows/labelpr.yaml new file mode 100644 index 000000000..c14131cbf --- /dev/null +++ b/.github/workflows/labelpr.yaml @@ -0,0 +1,35 @@ +name: Label PRs + +on: + pull_request_target: + types: [opened, edited, synchronize, reopened] + +jobs: + label_pr: + runs-on: ubuntu-latest + steps: + - uses: actions/github-script@v3 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const pr_welcome_msg = `Thanks for making a pull request! 😃\nOne of the maintainers will review and advise on the next steps.`; + // https://github.com/commitizen/conventional-commit-types + const valid_pr_types = ['feat', 'fix', 'docs', 'style', 'refactor', 'perf', 'test', 'build', 'ci', 'chore', 'revert']; + + if(context.payload.pull_request.comments === 0) { + await github.issues.createComment({ ...context.repo, issue_number: context.payload.number, body: pr_welcome_msg}); + } + + const title = context.payload.pull_request.title; + const results = /^(\w+)(\(\w+\))?!?:/.exec(title); + if (results === null) return core.setFailed(`The title does not follow conventional commits spec: https://www.conventionalcommits.org/en/v1.0.0/#summary Title: ${title}`); + + const pr_type = results[1]; + core.info(`pr_type: ${pr_type}`); + + if (!valid_pr_types.includes(pr_type)) return core.setFailed(`Unknown pull request type: ${pr_type}`); + + const labels = context.payload.pull_request.labels; + const new_labels = labels.filter(label => !valid_pr_types.includes(label.name)); // keep all labels that are not in valid_pr_types + new_labels.push({name: pr_type}); + await github.issues.update({ ...context.repo, issue_number: context.payload.number, labels: new_labels }); \ No newline at end of file