From 60ee14b39834eebe09f4618311cade56ba88bef5 Mon Sep 17 00:00:00 2001 From: Yizhen Date: Mon, 30 Sep 2024 16:57:05 +0800 Subject: [PATCH 1/7] [usability] deps streamlining --- contrib/long-context/sft_summarizer.py | 26 +++++---- examples/chatbot_gradio.py | 14 +++-- examples/vis_chatbot_gradio.py | 22 +++---- requirements.txt | 14 +---- service/app.py | 21 ++++--- setup.py | 8 +++ src/lmflow/models/hf_decoder_model.py | 68 ++++++---------------- src/lmflow/pipeline/dpo_aligner.py | 7 ++- src/lmflow/pipeline/utils/dpov2_trainer.py | 7 ++- src/lmflow/pipeline/vllm_inferencer.py | 20 +++++-- src/lmflow/utils/common.py | 42 +++++++++---- src/lmflow/utils/versioning.py | 59 +++++++++++++++++++ 12 files changed, 195 insertions(+), 113 deletions(-) create mode 100644 src/lmflow/utils/versioning.py diff --git a/contrib/long-context/sft_summarizer.py b/contrib/long-context/sft_summarizer.py index 4fc8031ec..34190dc29 100644 --- a/contrib/long-context/sft_summarizer.py +++ b/contrib/long-context/sft_summarizer.py @@ -8,21 +8,27 @@ from colorama import Fore,init from typing import Optional, List -from trl.commands.cli_utils import TrlParser import torch from datasets import load_dataset from dataclasses import dataclass, field from tqdm.rich import tqdm from transformers import AutoTokenizer, TrainingArguments, TrainerCallback -from trl import ( - ModelConfig, - SFTTrainer, - DataCollatorForCompletionOnlyLM, - SFTConfig, - get_peft_config, - get_quantization_config, - get_kbit_device_map, -) + +from lmflow.utils.versioning import is_trl_available + +if is_trl_available(): + from trl import ( + ModelConfig, + SFTTrainer, + DataCollatorForCompletionOnlyLM, + SFTConfig, + get_peft_config, + get_quantization_config, + get_kbit_device_map, + ) + from trl.commands.cli_utils import TrlParser +else: + raise ImportError("Please install trl package to use sft_summarizer.py") @dataclass class UserArguments: diff --git a/examples/chatbot_gradio.py b/examples/chatbot_gradio.py index c39a2b9bd..d522497a1 100644 --- a/examples/chatbot_gradio.py +++ b/examples/chatbot_gradio.py @@ -3,22 +3,28 @@ # Copyright 2023 Statistics and Machine Learning Research Group at HKUST. All rights reserved. """A simple shell chatbot implemented with lmflow APIs. """ +from dataclasses import dataclass, field import logging import json import os import sys sys.path.remove(os.path.abspath(os.path.dirname(sys.argv[0]))) -import torch +from typing import Optional import warnings -import gradio as gr -from dataclasses import dataclass, field + +import torch from transformers import HfArgumentParser -from typing import Optional from lmflow.datasets.dataset import Dataset from lmflow.pipeline.auto_pipeline import AutoPipeline from lmflow.models.auto_model import AutoModel from lmflow.args import ModelArguments, DatasetArguments, AutoArguments +from lmflow.utils.versioning import is_gradio_available + +if is_gradio_available(): + import gradio as gr +else: + raise ImportError("Gradio is not available. Please install it via `pip install gradio`.") MAX_BOXES = 20 diff --git a/examples/vis_chatbot_gradio.py b/examples/vis_chatbot_gradio.py index 8e0a7a5c7..e86a29c6a 100644 --- a/examples/vis_chatbot_gradio.py +++ b/examples/vis_chatbot_gradio.py @@ -3,28 +3,30 @@ # Copyright 2023 Statistics and Machine Learning Research Group at HKUST. All rights reserved. """A simple Multimodal chatbot implemented with lmflow APIs. """ -import logging +from dataclasses import dataclass, field import json +import logging import time - -from PIL import Image -from lmflow.pipeline.inferencer import Inferencer +import warnings +from typing import Optional import numpy as np -import os -import sys +from PIL import Image import torch -import warnings -import gradio as gr -from dataclasses import dataclass, field from transformers import HfArgumentParser -from typing import Optional from lmflow.datasets.dataset import Dataset from lmflow.pipeline.auto_pipeline import AutoPipeline from lmflow.models.auto_model import AutoModel from lmflow.args import (VisModelArguments, DatasetArguments, \ InferencerArguments, AutoArguments) +from lmflow.utils.versioning import is_gradio_available + +if is_gradio_available(): + import gradio as gr +else: + raise ImportError("Gradio is not available. Please install it via `pip install gradio`.") + MAX_BOXES = 20 diff --git a/requirements.txt b/requirements.txt index 0dbe372cf..66e885d6c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,23 +4,13 @@ datasets==2.14.6 tokenizers>=0.13.3 peft>=0.10.0 torch>=2.0.1 -wandb==0.14.0 +wandb deepspeed>=0.14.4 -trl==0.8.0 sentencepiece transformers>=4.31.0 -flask -flask_cors -icetk cpm_kernels==1.0.11 evaluate==0.4.0 -scikit-learn==1.2.2 -lm-eval==0.3.0 -dill<0.3.5 bitsandbytes>=0.40.0 pydantic -gradio accelerate>=0.27.2 -einops>=0.6.1 -vllm>=0.4.3 -ray>=2.22.0 \ No newline at end of file +einops>=0.6.1 \ No newline at end of file diff --git a/service/app.py b/service/app.py index cfdabd667..7580ac39e 100644 --- a/service/app.py +++ b/service/app.py @@ -1,19 +1,22 @@ +from dataclasses import dataclass, field import json -import torch import os +from typing import Optional -from flask import Flask, request, stream_with_context -from flask import render_template -from flask_cors import CORS from accelerate import Accelerator -from dataclasses import dataclass, field +import torch from transformers import HfArgumentParser -from typing import Optional -from lmflow.datasets.dataset import Dataset -from lmflow.pipeline.auto_pipeline import AutoPipeline +from lmflow.args import ModelArguments from lmflow.models.auto_model import AutoModel -from lmflow.args import ModelArguments, DatasetArguments, AutoArguments +from lmflow.utils.versioning import is_flask_available + +if is_flask_available(): + from flask import Flask, request, stream_with_context + from flask import render_template + from flask_cors import CORS +else: + raise ImportError("Flask is not available. Please install flask and flask_cors.") WINDOW_LENGTH = 512 diff --git a/setup.py b/setup.py index 6d4109139..0d39b562a 100644 --- a/setup.py +++ b/setup.py @@ -41,3 +41,11 @@ ], requires_python=">=3.9", ) + +# optionals +# lm-eval==0.3.0 +# vllm>=0.4.3 +# ray>=2.22.0 +# flask +# flask_cors +# trl==0.8.0 \ No newline at end of file diff --git a/src/lmflow/models/hf_decoder_model.py b/src/lmflow/models/hf_decoder_model.py index 21199005c..6fecdeb7b 100644 --- a/src/lmflow/models/hf_decoder_model.py +++ b/src/lmflow/models/hf_decoder_model.py @@ -22,31 +22,13 @@ import logging import os, shutil from typing import List, Union, Optional, Dict -from pathlib import Path -import ray -import ray.data import torch -import transformers -import bitsandbytes -import deepspeed -from transformers.deepspeed import HfDeepSpeedConfig -from transformers import BitsAndBytesConfig from transformers import ( - CONFIG_MAPPING, AutoConfig, - AutoTokenizer, AutoModelForCausalLM, ) -from peft import ( - LoraConfig, - PeftModel, - TaskType, - get_peft_config, - get_peft_model, - prepare_model_for_kbit_training -) -from vllm import SamplingParams +from peft import PeftModel from lmflow.datasets.dataset import Dataset from lmflow.models.hf_model_mixin import HFModelMixin @@ -63,39 +45,23 @@ tokenize_function, conversation_tokenize_function ) +from lmflow.utils.versioning import is_ray_available, is_vllm_available, is_flash_attn_available logger = logging.getLogger(__name__) -MODELS_SUPPORT_FLASH_ATTENTION = [ - "LlamaForCausalLM", - "GPTNeoForCausalLM", - "GPT2ForCausalLM", - "BloomForCausalLM" -] - -GPU_SUPPORT_FLASH_ATTENTION = { - "A100": ["LlamaForCausalLM", "GPTNeoForCausalLM", "GPT2ForCausalLM", "BloomForCausalLM"], - "A40": ["GPTNeoForCausalLM", "GPT2ForCausalLM", "BloomForCausalLM"], - "A6000": ["LlamaForCausalLM", "GPTNeoForCausalLM", "GPT2ForCausalLM", "BloomForCausalLM"] -} - -try: +if is_flash_attn_available(): import flash_attn - if int(flash_attn.__version__.split(".")[0]) == 2: - GPU_SUPPORT_FLASH_ATTENTION = { - "A100": ["LlamaForCausalLM", "GPTNeoForCausalLM", "GPT2ForCausalLM", "BloomForCausalLM"], - "A40": ["LlamaForCausalLM","GPTNeoForCausalLM", "GPT2ForCausalLM", "BloomForCausalLM"], - "A6000": ["LlamaForCausalLM", "GPTNeoForCausalLM", "GPT2ForCausalLM", "BloomForCausalLM"] - } -except Exception as e: - if e.__class__ == ModuleNotFoundError: - logger.warning( - "flash_attn is not installed. Install flash_attn for better performance." - ) - else: - logger.warning(f'An error occurred when importing flash_attn, flash attention is disabled: {e}') +else: + logger.warning("Consider install flash_attn for better performance.") + +if is_vllm_available(): + from vllm import SamplingParams + +if is_ray_available(): + import ray + import ray.data class HFDecoderModel(DecoderModel, HFModelMixin, Tunable): @@ -380,6 +346,8 @@ def inference( ) if use_vllm: + if not is_vllm_available(): + raise ImportError("vllm is not installed. Please install vllm to use VLLM inference.") res = self.__vllm_inference(inputs, **kwargs) else: res = self.__inference(inputs, **kwargs) @@ -493,7 +461,7 @@ def prepare_inputs_for_inference( enable_distributed_inference: bool = False, use_vllm: bool = False, **kwargs, - ) -> Union[List[str], ray.data.Dataset, Dict[str, torch.Tensor]]: + ) -> Union[List[str], "ray.data.Dataset", Dict[str, torch.Tensor]]: """ Prepare inputs for inference. @@ -514,6 +482,8 @@ def prepare_inputs_for_inference( The prepared inputs for inference. """ if use_vllm: + if not is_ray_available() and enable_distributed_inference: + raise ImportError("ray is not installed. Please install ray to use distributed vllm inference.") inference_inputs = self.__prepare_inputs_for_vllm_inference( dataset=dataset, apply_chat_template=apply_chat_template, @@ -534,7 +504,7 @@ def __prepare_inputs_for_vllm_inference( dataset: Dataset, apply_chat_template: bool = True, enable_distributed_inference: bool = False, - ) -> Union[List[str], ray.data.Dataset]: + ) -> Union[List[str], "ray.data.Dataset"]: if dataset.get_type() == 'text_only': if apply_chat_template: dataset = dataset.map( @@ -606,7 +576,7 @@ def preprocess_conversation(sample): inference_inputs = [sentence for sentence in inference_inputs if len(sentence) > 0] - if enable_distributed_inference: + if enable_distributed_inference: inference_inputs = ray.data.from_items(inference_inputs) # -> Dict[str, np.ndarray], {"item": array(['...', '...', '...'])} return inference_inputs diff --git a/src/lmflow/pipeline/dpo_aligner.py b/src/lmflow/pipeline/dpo_aligner.py index 42eee808a..cdc0868a0 100644 --- a/src/lmflow/pipeline/dpo_aligner.py +++ b/src/lmflow/pipeline/dpo_aligner.py @@ -8,12 +8,17 @@ from pathlib import Path from typing import Dict, Optional -from trl import DPOTrainer from datasets import Dataset, load_dataset from peft import LoraConfig from transformers import TrainingArguments from lmflow.pipeline.base_aligner import BaseAligner +from lmflow.utils.versioning import is_trl_available + +if is_trl_available(): + from trl import DPOTrainer +else: + raise ImportError("Please install trl package to use dpo_aligner.py") def get_paired_dataset( diff --git a/src/lmflow/pipeline/utils/dpov2_trainer.py b/src/lmflow/pipeline/utils/dpov2_trainer.py index 735daf635..0cb50711c 100644 --- a/src/lmflow/pipeline/utils/dpov2_trainer.py +++ b/src/lmflow/pipeline/utils/dpov2_trainer.py @@ -14,9 +14,14 @@ ) from transformers.trainer_callback import TrainerCallback from transformers.trainer_utils import EvalLoopOutput -from trl import DPOTrainer from lmflow.pipeline.utils.dpov2_dataprocessor import PreferenceDataCollatorWithPadding +from lmflow.utils.versioning import is_trl_available + +if is_trl_available(): + from trl import DPOTrainer +else: + raise ImportError("Please install trl package to use dpo_aligner.py") logger = logging.getLogger(__name__) diff --git a/src/lmflow/pipeline/vllm_inferencer.py b/src/lmflow/pipeline/vllm_inferencer.py index 93f295921..9a0f0189a 100644 --- a/src/lmflow/pipeline/vllm_inferencer.py +++ b/src/lmflow/pipeline/vllm_inferencer.py @@ -13,11 +13,7 @@ from typing import List, Union, Optional, Dict, Any import numpy as np -import ray -import ray.data -from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy from transformers import AutoTokenizer -from vllm import SamplingParams, LLM from lmflow.datasets import Dataset from lmflow.pipeline.base_pipeline import BasePipeline @@ -30,11 +26,25 @@ from lmflow.utils.common import make_shell_args_from_dataclass from lmflow.utils.constants import RETURN_CODE_ERROR_BUFFER, MEMORY_SAFE_VLLM_INFERENCE_ENV_VAR_TO_REMOVE from lmflow.utils.data_utils import VLLMInferenceResultWithInput +from lmflow.utils.versioning import is_vllm_available, is_ray_available logger = logging.getLogger(__name__) +if is_vllm_available(): + from vllm import SamplingParams, LLM +else: + raise ImportError("VLLM is not available, please install vllm.") + +if is_ray_available(): + import ray + import ray.data + from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy +else: + logger.warning("Ray is not available, distributed vllm inference will not be supported.") + + class InferencerWithOffloading(BasePipeline): def __init__( self, @@ -343,7 +353,7 @@ def inference(self) -> List[VLLMInferenceResultWithInput]: # > at interpreter shutdown, possibly due to daemon threads logger.warning( "^^^^^^^^^^ Please ignore the above error, as it comes from the subprocess. " - "This may due a kill signal with unfinished stdout/stderr writing in the subprocess. " + "This may due to a kill signal with unfinished stdout/stderr writing in the subprocess. " ) else: if cli_res.returncode != 0: diff --git a/src/lmflow/utils/common.py b/src/lmflow/utils/common.py index b4d783214..0d1a5dd7e 100644 --- a/src/lmflow/utils/common.py +++ b/src/lmflow/utils/common.py @@ -6,6 +6,7 @@ from pathlib import Path from typing import Optional, List, Union, Dict +from lmflow.utils.versioning import get_python_version logger = logging.getLogger(__name__) @@ -100,19 +101,36 @@ def create_copied_dataclass( new_default = new_default or {} new_fields = [] for field in original_fields: - new_field = ( - f"{field_prefix}{field.name}", - field.type, - Field( - default=new_default.get(f"{field_prefix}{field.name}", field.default), - default_factory=field.default_factory, - init=field.init, - repr=field.repr, - hash=field.hash, - compare=field.compare, - metadata=field.metadata, + if get_python_version().minor >= 10: + new_field = ( + f"{field_prefix}{field.name}", + field.type, + Field( + default=new_default.get(f"{field_prefix}{field.name}", field.default), + default_factory=field.default_factory, + init=field.init, + repr=field.repr, + hash=field.hash, + compare=field.compare, + metadata=field.metadata, + kw_only=False, # add in py3.10: https://docs.python.org/3/library/dataclasses.html + ) ) - ) + else: + new_field = ( + f"{field_prefix}{field.name}", + field.type, + Field( + default=new_default.get(f"{field_prefix}{field.name}", field.default), + default_factory=field.default_factory, + init=field.init, + repr=field.repr, + hash=field.hash, + compare=field.compare, + metadata=field.metadata, + ) + ) + new_fields.append(new_field) copied_dataclass = make_dataclass(f"{class_prefix}{original_dataclass.__name__}", new_fields) return copied_dataclass diff --git a/src/lmflow/utils/versioning.py b/src/lmflow/utils/versioning.py new file mode 100644 index 000000000..adc71d116 --- /dev/null +++ b/src/lmflow/utils/versioning.py @@ -0,0 +1,59 @@ +import importlib +import sys +import logging +from typing import Tuple, List, Union + + +logger = logging.getLogger(__name__) + + +def get_python_version(): + return sys.version_info + + +def _is_package_available(package_name: str, skippable: bool = False): + try: + importlib.import_module(package_name) + return True + except Exception as e: + if e.__class__ == ModuleNotFoundError: + return False + else: + if skippable: + logger.warning(f'An error occurred when importing {package_name}:\n{e}\n{package_name} is disabled.') + return False + else: + raise e + + +def _is_packages_available(packages: Union[List[str], List[Tuple[str, bool]]]): + if isinstance(packages[0], str): + return all([_is_package_available(package) for package in packages]) + elif isinstance(packages[0], tuple): + return all([_is_package_available(package, skippable) for package, skippable in packages]) + else: + raise ValueError(f"Invalid type of packages: {type(packages[0])}") + + +def is_gradio_available(): + return _is_package_available("gradio") + + +def is_ray_available(): + return _is_package_available("ray") + + +def is_vllm_available(): + return _is_package_available("vllm") + + +def is_flash_attn_available(): + return _is_package_available("flash_attn", skippable=True) + + +def is_flask_available(): + return _is_packages_available(["flask", "flask_cors"]) + + +def is_trl_available(): + return _is_package_available("trl") \ No newline at end of file From 5e586ce2abb4e70898b1e3f7637ed2d7414d36dd Mon Sep 17 00:00:00 2001 From: yizhenjia Date: Tue, 5 Nov 2024 09:49:52 +0800 Subject: [PATCH 2/7] [usability] add flash attn detect --- install.sh | 7 +------ src/lmflow/args.py | 7 +++++++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/install.sh b/install.sh index 11fa62b77..213c0157d 100644 --- a/install.sh +++ b/install.sh @@ -1,8 +1,3 @@ #!/bin/bash -pip install -e . - -gpu_state="$(nvidia-smi --query-gpu=name --format=csv,noheader)" -if [[ "${gpu_state}" == *"A100"* || "${gpu_state}" == *"A40"* || "${gpu_state}" == *"A6000"* ]]; then - pip install flash-attn\>=2.0.2 -fi +pip install -e . \ No newline at end of file diff --git a/src/lmflow/args.py b/src/lmflow/args.py index 48cf913d4..af279bd54 100644 --- a/src/lmflow/args.py +++ b/src/lmflow/args.py @@ -22,6 +22,8 @@ ) from transformers.utils.versions import require_version +from lmflow.utils.versioning import is_flash_attn_available + MODEL_CONFIG_CLASSES = list(MODEL_FOR_CAUSAL_LM_MAPPING.keys()) MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES) @@ -357,6 +359,11 @@ def __post_init__(self): if not self.use_lora: logger.warning("use_qlora is set to True, but use_lora is not set to True. Setting use_lora to True.") self.use_lora = True + + if self.use_flash_attention: + if not is_flash_attn_available(): + self.use_flash_attention = False + logger.warning("Flash attention is not available in the current environment. Disabling flash attention.") @dataclass From 9942954155452110a04b084d8e7aa76ecf590b47 Mon Sep 17 00:00:00 2001 From: yizhenjia Date: Tue, 5 Nov 2024 09:50:51 +0800 Subject: [PATCH 3/7] [dev] init toml --- pyproject.toml | 18 ++++++++++++++++++ src/lmflow/version.py | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..723e5e1e2 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,18 @@ +[build-system] +requires = ["setuptools >= 64"] +build-backend = "setuptools.build_meta" + +[tool.ruff] +target-version = "py39" +indent-width = 4 + +[tool.ruff.lint.isort] +lines-after-imports = 2 +known-first-party = ["lmflow"] + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" +docstring-code-format = true +skip-magic-trailing-comma = false +line-ending = "auto" \ No newline at end of file diff --git a/src/lmflow/version.py b/src/lmflow/version.py index 6526deb42..00ec2dcdb 100644 --- a/src/lmflow/version.py +++ b/src/lmflow/version.py @@ -1 +1 @@ -__version__ = "0.0.7" +__version__ = "0.0.9" From b13fe84cd3fae1b1651e19f76d36d79846a162b2 Mon Sep 17 00:00:00 2001 From: yizhenjia Date: Tue, 5 Nov 2024 10:41:10 +0800 Subject: [PATCH 4/7] [usability] update setup --- install.sh | 3 --- setup.py | 29 +++++++++++++++++------------ src/lmflow/datasets/__init__.py | 6 +++++- src/lmflow/datasets/dataset.py | 8 +++++++- src/lmflow/utils/versioning.py | 7 ++++++- 5 files changed, 35 insertions(+), 18 deletions(-) delete mode 100644 install.sh diff --git a/install.sh b/install.sh deleted file mode 100644 index 213c0157d..000000000 --- a/install.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -pip install -e . \ No newline at end of file diff --git a/setup.py b/setup.py index 0d39b562a..37ee0e965 100644 --- a/setup.py +++ b/setup.py @@ -1,26 +1,35 @@ import os from setuptools import find_packages from setuptools import setup -import subprocess folder = os.path.dirname(__file__) version_path = os.path.join(folder, "src", "lmflow", "version.py") __version__ = None with open(version_path) as f: - exec(f.read(), globals()) + exec(f.read(), globals()) req_path = os.path.join(folder, "requirements.txt") install_requires = [] if os.path.exists(req_path): - with open(req_path) as fp: - install_requires = [line.strip() for line in fp] + with open(req_path) as fp: + install_requires = [line.strip() for line in fp] + +extra_require = { + "multimodal": ["Pillow"], + "vllm": ["vllm>=0.4.3"], + "ray": ["ray>=2.22.0"], + "gradio": ["gradio"], + "flask": ["flask", "flask_cors"], + "flash_attn": ["flash-attn>=2.0.2"], + "trl": ["trl==0.8.0"] +} readme_path = os.path.join(folder, "README.md") readme_contents = "" if os.path.exists(readme_path): - with open(readme_path, encoding='utf-8') as fp: - readme_contents = fp.read().strip() + with open(readme_path, encoding="utf-8") as fp: + readme_contents = fp.read().strip() setup( name="lmflow", @@ -33,6 +42,7 @@ packages=find_packages("src"), package_data={}, install_requires=install_requires, + extras_require=extra_require, classifiers=[ "Intended Audience :: Science/Research/Engineering", "Topic :: Scientific/Engineering :: Artificial Intelligence", @@ -43,9 +53,4 @@ ) # optionals -# lm-eval==0.3.0 -# vllm>=0.4.3 -# ray>=2.22.0 -# flask -# flask_cors -# trl==0.8.0 \ No newline at end of file +# lm-eval==0.3.0 \ No newline at end of file diff --git a/src/lmflow/datasets/__init__.py b/src/lmflow/datasets/__init__.py index 3d8f14cc9..aa63cb48c 100644 --- a/src/lmflow/datasets/__init__.py +++ b/src/lmflow/datasets/__init__.py @@ -4,5 +4,9 @@ The `Dataset` class includes methods for loading datasets from a dictionary and a Hugging Face dataset, mapping datasets, and retrieving the backend dataset and arguments. """ +from lmflow.utils.versioning import is_multimodal_available + + from lmflow.datasets.dataset import Dataset -from lmflow.datasets.multi_modal_dataset import CustomMultiModalDataset +if is_multimodal_available(): + from lmflow.datasets.multi_modal_dataset import CustomMultiModalDataset diff --git a/src/lmflow/datasets/dataset.py b/src/lmflow/datasets/dataset.py index f452f4204..e5b546e3c 100644 --- a/src/lmflow/datasets/dataset.py +++ b/src/lmflow/datasets/dataset.py @@ -30,8 +30,10 @@ FLOAT_ONLY_DATASET_DESCRIPTION, INSTANCE_FIELDS_MAP, ) +from lmflow.utils.versioning import is_multimodal_available -from .multi_modal_dataset import CustomMultiModalDataset +if is_multimodal_available(): + from .multi_modal_dataset import CustomMultiModalDataset logger = logging.getLogger(__name__) @@ -127,6 +129,10 @@ def __init__(self, data_args: DatasetArguments=None, backend: str="huggingface", pass elif backend == "custom_multi_modal": # FIXME refactor the backend name + if not is_multimodal_available(): + raise ValueError( + 'Multimodal not available. Please install via `pip install -e ".[multimodal]"`' + ) raw_dataset = CustomMultiModalDataset(self.dataset_path, data_args) self.backend_dataset = raw_dataset else: diff --git a/src/lmflow/utils/versioning.py b/src/lmflow/utils/versioning.py index adc71d116..c6d18e7dd 100644 --- a/src/lmflow/utils/versioning.py +++ b/src/lmflow/utils/versioning.py @@ -12,6 +12,7 @@ def get_python_version(): def _is_package_available(package_name: str, skippable: bool = False): + assert isinstance(package_name, str), f"Invalid type of package_name: {type(package_name)}" try: importlib.import_module(package_name) return True @@ -56,4 +57,8 @@ def is_flask_available(): def is_trl_available(): - return _is_package_available("trl") \ No newline at end of file + return _is_package_available("trl") + + +def is_multimodal_available(): + return _is_packages_available(["PIL"]) \ No newline at end of file From a79e7330e7c6b2e3a71affffffb3ff2c0ae4d23a Mon Sep 17 00:00:00 2001 From: yizhenjia Date: Tue, 5 Nov 2024 11:40:44 +0800 Subject: [PATCH 5/7] [usability] versioning update --- src/lmflow/models/hf_decoder_model.py | 2 +- src/lmflow/models/hf_encoder_decoder_model.py | 15 ++++++++------ src/lmflow/models/hf_model_mixin.py | 10 ++++++++-- src/lmflow/models/hf_text_regression_model.py | 20 ++++++++++++++----- src/lmflow/models/vision2seq_model.py | 7 ++++++- src/lmflow/utils/versioning.py | 9 +++++++++ 6 files changed, 48 insertions(+), 15 deletions(-) diff --git a/src/lmflow/models/hf_decoder_model.py b/src/lmflow/models/hf_decoder_model.py index 6fecdeb7b..86aa6b443 100644 --- a/src/lmflow/models/hf_decoder_model.py +++ b/src/lmflow/models/hf_decoder_model.py @@ -413,7 +413,7 @@ def __inference(self, inputs, *args, **kwargs): def __vllm_inference( self, inputs: Union[str, List[str]], - sampling_params: Optional[SamplingParams] = None, + sampling_params: Optional['SamplingParams'] = None, **kwargs, ) -> List[VLLMInferenceResultWithInput]: """Perform VLLM inference process of the model. diff --git a/src/lmflow/models/hf_encoder_decoder_model.py b/src/lmflow/models/hf_encoder_decoder_model.py index 5f4db0b8e..48a9830de 100644 --- a/src/lmflow/models/hf_encoder_decoder_model.py +++ b/src/lmflow/models/hf_encoder_decoder_model.py @@ -24,6 +24,7 @@ from typing import List, Union import deepspeed +import torch from peft import ( LoraConfig, PeftModel, @@ -31,12 +32,6 @@ get_peft_config, get_peft_model, ) - -import torch -from transformers.deepspeed import HfDeepSpeedConfig, HfTrainerDeepSpeedConfig - -from transformers.testing_utils import CaptureLogger - from transformers import ( CONFIG_MAPPING, AutoConfig, @@ -47,12 +42,20 @@ AutoProcessor, LlamaConfig ) +from transformers.testing_utils import CaptureLogger from lmflow.datasets.dataset import Dataset from lmflow.models.encoder_decoder_model import EncoderDecoderModel from lmflow.models.interfaces.tunable import Tunable from lmflow.models.vision2seq_model import CustomAutoVision2SeqModel from lmflow.utils.multimodal import update_custom_config, load_llava_pretrain_model +from lmflow.utils.versioning import get_package_version + +if get_package_version("transformers") >= "4.46.0": + from transformers.integrations.deepspeed import HfDeepSpeedConfig, HfTrainerDeepSpeedConfig +else: + from transformers.deepspeed import HfDeepSpeedConfig, HfTrainerDeepSpeedConfig + logger = logging.getLogger(__name__) diff --git a/src/lmflow/models/hf_model_mixin.py b/src/lmflow/models/hf_model_mixin.py index d2b20c5fb..def6c3fbf 100644 --- a/src/lmflow/models/hf_model_mixin.py +++ b/src/lmflow/models/hf_model_mixin.py @@ -27,14 +27,17 @@ prepare_model_for_kbit_training ) from peft.utils.constants import TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING -from vllm import LLM, SamplingParams -from vllm.distributed.parallel_state import destroy_model_parallel from lmflow.models.base_model import BaseModel from lmflow.utils.constants import ( LMFLOW_LORA_TARGET_MODULES_MAPPING ) from lmflow.args import ModelArguments +from lmflow.utils.versioning import is_vllm_available + +if is_vllm_available(): + from vllm import LLM, SamplingParams + from vllm.distributed.parallel_state import destroy_model_parallel logger = logging.getLogger(__name__) @@ -429,6 +432,9 @@ def __prepare_model_for_vllm_inference( vllm_gpu_memory_utilization: float, vllm_tensor_parallel_size: int, ): + if not is_vllm_available(): + raise ImportError('VLLM is not available. Please install via `pip install -e ".[vllm]"`.') + self.backend_model_for_inference = LLM( model=model_args.model_name_or_path, tokenizer=model_args.model_name_or_path, diff --git a/src/lmflow/models/hf_text_regression_model.py b/src/lmflow/models/hf_text_regression_model.py index f2e5f9c1e..46904cdbd 100644 --- a/src/lmflow/models/hf_text_regression_model.py +++ b/src/lmflow/models/hf_text_regression_model.py @@ -8,8 +8,6 @@ from pathlib import Path from typing import List, Union, Dict, Optional -import ray -import ray.data import torch from peft import ( LoraConfig, @@ -20,7 +18,6 @@ prepare_model_for_kbit_training ) from transformers.modeling_outputs import SequenceClassifierOutputWithPast -from vllm import SamplingParams from lmflow.args import ModelArguments from lmflow.datasets.dataset import Dataset, KEY_SCORE @@ -42,6 +39,14 @@ CONVERSATION_DATASET_DESCRIPTION, ) from lmflow.utils.data_utils import RewardModelInferenceResultWithInput +from lmflow.utils.versioning import is_ray_available, is_vllm_available + +if is_ray_available(): + import ray + import ray.data + +if is_vllm_available(): + from vllm import SamplingParams logger = logging.getLogger(__name__) @@ -358,7 +363,7 @@ def __inference( def __vllm_inference( self, inputs: Union[str, List[str]], - sampling_params: Optional[SamplingParams] = None, + sampling_params: Optional['SamplingParams'] = None, **kwargs, ) -> Union[List[List[str]], List[List[List[int]]]]: """Perform VLLM inference process of the model. @@ -384,7 +389,7 @@ def prepare_inputs_for_inference( enable_distributed_inference: bool = False, use_vllm: bool = False, **kwargs, - ) -> Union[Dataset, ray.data.Dataset]: + ) -> Union[Dataset, 'ray.data.Dataset']: if use_vllm: raise NotImplementedError( "VLLM inference is not supported for text regression model." @@ -393,6 +398,11 @@ def prepare_inputs_for_inference( inference_inputs = self.tokenize(dataset) if enable_distributed_inference: + if not is_ray_available(): + raise ValueError( + 'Ray is not available. Please install ray via `pip install -e ".[ray]"`.' + ) + inference_inputs.sanity_check(drop_invalid=True) inference_inputs = inference_inputs.get_backend_dataset() inference_inputs = ray.data.from_items(inference_inputs) diff --git a/src/lmflow/models/vision2seq_model.py b/src/lmflow/models/vision2seq_model.py index 246788b51..3db93416a 100644 --- a/src/lmflow/models/vision2seq_model.py +++ b/src/lmflow/models/vision2seq_model.py @@ -22,10 +22,15 @@ PreTrainedModel, ) from transformers.modeling_outputs import CausalLMOutputWithPast -from transformers.deepspeed import is_deepspeed_zero3_enabled from lmflow.models.base_model import BaseModel from lmflow.models.vision_encoder import build_vision_tower +from lmflow.utils.versioning import get_package_version + +if get_package_version("transformers") >= "4.46.0": + from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled +else: + from transformers.deepspeed import is_deepspeed_zero3_enabled class CustomAutoVision2SeqModel(Blip2ForConditionalGeneration, BaseModel): diff --git a/src/lmflow/utils/versioning.py b/src/lmflow/utils/versioning.py index c6d18e7dd..6c6dff962 100644 --- a/src/lmflow/utils/versioning.py +++ b/src/lmflow/utils/versioning.py @@ -2,6 +2,7 @@ import sys import logging from typing import Tuple, List, Union +from importlib.metadata import version, PackageNotFoundError logger = logging.getLogger(__name__) @@ -34,6 +35,14 @@ def _is_packages_available(packages: Union[List[str], List[Tuple[str, bool]]]): return all([_is_package_available(package, skippable) for package, skippable in packages]) else: raise ValueError(f"Invalid type of packages: {type(packages[0])}") + + +def get_package_version(package_name: str): + try: + pkg_version = version(package_name) + return pkg_version + except PackageNotFoundError as e: + raise e def is_gradio_available(): From 8f85dfb9f9c92d7033eb20330bffc5109e52f435 Mon Sep 17 00:00:00 2001 From: yizhenjia Date: Tue, 5 Nov 2024 11:41:45 +0800 Subject: [PATCH 6/7] [usability] versioning update --- src/lmflow/models/hf_encoder_decoder_model.py | 4 +- src/lmflow/models/vision2seq_model.py | 4 +- src/lmflow/pipeline/auto_pipeline.py | 54 ++++++++++++------- src/lmflow/pipeline/rm_inferencer.py | 16 ++++-- src/lmflow/utils/versioning.py | 15 ++++-- 5 files changed, 60 insertions(+), 33 deletions(-) diff --git a/src/lmflow/models/hf_encoder_decoder_model.py b/src/lmflow/models/hf_encoder_decoder_model.py index 48a9830de..6eb6ff2f7 100644 --- a/src/lmflow/models/hf_encoder_decoder_model.py +++ b/src/lmflow/models/hf_encoder_decoder_model.py @@ -49,9 +49,9 @@ from lmflow.models.interfaces.tunable import Tunable from lmflow.models.vision2seq_model import CustomAutoVision2SeqModel from lmflow.utils.multimodal import update_custom_config, load_llava_pretrain_model -from lmflow.utils.versioning import get_package_version +from lmflow.utils.versioning import is_package_version_at_least -if get_package_version("transformers") >= "4.46.0": +if is_package_version_at_least("transformers", "4.46.0"): from transformers.integrations.deepspeed import HfDeepSpeedConfig, HfTrainerDeepSpeedConfig else: from transformers.deepspeed import HfDeepSpeedConfig, HfTrainerDeepSpeedConfig diff --git a/src/lmflow/models/vision2seq_model.py b/src/lmflow/models/vision2seq_model.py index 3db93416a..dc4d70a6a 100644 --- a/src/lmflow/models/vision2seq_model.py +++ b/src/lmflow/models/vision2seq_model.py @@ -25,9 +25,9 @@ from lmflow.models.base_model import BaseModel from lmflow.models.vision_encoder import build_vision_tower -from lmflow.utils.versioning import get_package_version +from lmflow.utils.versioning import is_package_version_at_least -if get_package_version("transformers") >= "4.46.0": +if is_package_version_at_least('transformers', '4.46.0'): from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled else: from transformers.deepspeed import is_deepspeed_zero3_enabled diff --git a/src/lmflow/pipeline/auto_pipeline.py b/src/lmflow/pipeline/auto_pipeline.py index 923118c72..98b08ef39 100644 --- a/src/lmflow/pipeline/auto_pipeline.py +++ b/src/lmflow/pipeline/auto_pipeline.py @@ -2,42 +2,52 @@ # coding=utf-8 """Return a pipeline automatically based on its name. """ -import pkg_resources - -def is_package_version_at_least(package_name, min_version): - try: - package_version = pkg_resources.get_distribution(package_name).version - if (pkg_resources.parse_version(package_version) - < pkg_resources.parse_version(min_version)): - return False - except pkg_resources.DistributionNotFound: - return False - return True +from lmflow.utils.versioning import ( + is_package_version_at_least, + is_vllm_available, + is_trl_available, + is_ray_available +) from lmflow.pipeline.evaluator import Evaluator from lmflow.pipeline.finetuner import Finetuner from lmflow.pipeline.inferencer import Inferencer -from lmflow.pipeline.vllm_inferencer import VLLMInferencer -from lmflow.pipeline.dpo_aligner import DPOAligner -from lmflow.pipeline.dpov2_aligner import DPOv2Aligner from lmflow.pipeline.rm_tuner import RewardModelTuner from lmflow.pipeline.rm_inferencer import RewardModelInferencer -from lmflow.pipeline.iterative_dpo_aligner import IterativeDPOAligner PIPELINE_MAPPING = { "evaluator": Evaluator, "finetuner": Finetuner, "inferencer": Inferencer, - "vllm_inferencer": VLLMInferencer, "rm_inferencer": RewardModelInferencer, - "dpo_aligner": DPOAligner, - "dpov2_aligner": DPOv2Aligner, "rm_tuner": RewardModelTuner, - "iterative_dpo_aligner": IterativeDPOAligner, } +PIPELINE_NEEDS_EXTRAS = [] if not is_package_version_at_least('transformers', '4.35.0'): from lmflow.pipeline.raft_aligner import RaftAligner PIPELINE_MAPPING['raft_aligner'] = RaftAligner +else: + PIPELINE_NEEDS_EXTRAS.append('raft_aligner') + +if is_vllm_available(): + from lmflow.pipeline.vllm_inferencer import VLLMInferencer + PIPELINE_MAPPING['vllm_inferencer'] = VLLMInferencer +else: + PIPELINE_NEEDS_EXTRAS.append('vllm_inferencer') + +if is_trl_available(): + from lmflow.pipeline.dpo_aligner import DPOAligner + from lmflow.pipeline.dpov2_aligner import DPOv2Aligner + PIPELINE_MAPPING['dpo_aligner'] = DPOAligner + PIPELINE_MAPPING['dpov2_aligner'] = DPOv2Aligner +else: + PIPELINE_NEEDS_EXTRAS.extend(['dpo_aligner', 'dpov2_aligner']) + +if is_vllm_available() and is_trl_available() and is_ray_available(): + from lmflow.pipeline.iterative_dpo_aligner import IterativeDPOAligner + PIPELINE_MAPPING['iterative_dpo_aligner'] = IterativeDPOAligner +else: + PIPELINE_NEEDS_EXTRAS.append('iterative_dpo_aligner') class AutoPipeline: @@ -54,6 +64,12 @@ def get_pipeline(self, **kwargs ): if pipeline_name not in PIPELINE_MAPPING: + if pipeline_name in PIPELINE_NEEDS_EXTRAS: + raise NotImplementedError( + f'Please install the necessary dependencies ' + f'to use pipeline "{pipeline_name}"' + ) + raise NotImplementedError( f'Pipeline "{pipeline_name}" is not supported' ) diff --git a/src/lmflow/pipeline/rm_inferencer.py b/src/lmflow/pipeline/rm_inferencer.py index 2b90a485c..890e4ee20 100644 --- a/src/lmflow/pipeline/rm_inferencer.py +++ b/src/lmflow/pipeline/rm_inferencer.py @@ -15,9 +15,6 @@ from typing import Dict, List, Union, Tuple, Any from accelerate import Accelerator -import ray -import ray.data -from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy import torch from tqdm import tqdm from transformers import AutoConfig @@ -39,6 +36,12 @@ RewardModelInferenceResultWithInput, ) from lmflow.datasets.dataset import KEY_SCORE +from lmflow.utils.versioning import is_ray_available + +if is_ray_available(): + import ray + import ray.data + from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy os.environ["TOKENIZERS_PARALLELISM"] = "false" # To avoid warnings about parallelism in tokenizers @@ -142,11 +145,14 @@ def inference( def _inference( self, model: HFTextRegressionModel, - model_input: Union[Dataset, ray.data.Dataset], + model_input: Union[Dataset, 'ray.data.Dataset'], enable_distributed_inference: bool = False, **kwargs, ): if enable_distributed_inference: + if not is_ray_available(): + raise ImportError('Ray is not installed. Please install via `pip install -e ".[ray]"`.') + inference_res = self.__distributed_inference( model=model, model_input=model_input, @@ -212,7 +218,7 @@ def __inference( def __distributed_inference( self, model: HFTextRegressionModel, - model_input: ray.data.Dataset, + model_input: 'ray.data.Dataset', num_instances: int, batch_size: int, ) -> List[RewardModelInferenceResultWithInput]: diff --git a/src/lmflow/utils/versioning.py b/src/lmflow/utils/versioning.py index 6c6dff962..bce50b6b8 100644 --- a/src/lmflow/utils/versioning.py +++ b/src/lmflow/utils/versioning.py @@ -4,6 +4,8 @@ from typing import Tuple, List, Union from importlib.metadata import version, PackageNotFoundError +import pkg_resources + logger = logging.getLogger(__name__) @@ -37,12 +39,15 @@ def _is_packages_available(packages: Union[List[str], List[Tuple[str, bool]]]): raise ValueError(f"Invalid type of packages: {type(packages[0])}") -def get_package_version(package_name: str): +def is_package_version_at_least(package_name, min_version): try: - pkg_version = version(package_name) - return pkg_version - except PackageNotFoundError as e: - raise e + package_version = pkg_resources.get_distribution(package_name).version + if (pkg_resources.parse_version(package_version) + < pkg_resources.parse_version(min_version)): + return False + except pkg_resources.DistributionNotFound: + return False + return True def is_gradio_available(): From c683af3c04c5852c73e5fac8208f7ff7100d6a72 Mon Sep 17 00:00:00 2001 From: yizhenjia Date: Tue, 5 Nov 2024 11:42:21 +0800 Subject: [PATCH 7/7] [usability] `use_auth_token` deprecation update --- src/lmflow/args.py | 14 +++++--------- src/lmflow/datasets/dataset.py | 1 - src/lmflow/models/hf_decoder_model.py | 4 ++-- src/lmflow/models/hf_model_mixin.py | 4 ++-- 4 files changed, 9 insertions(+), 14 deletions(-) diff --git a/src/lmflow/args.py b/src/lmflow/args.py index af279bd54..65572eb43 100644 --- a/src/lmflow/args.py +++ b/src/lmflow/args.py @@ -90,9 +90,8 @@ class ModelArguments: a string representing the specific model version to use (can be a branch name, tag name, or commit id). - use_auth_token : bool - a boolean indicating whether to use the token generated when running - huggingface-cli login (necessary to use this script with private models). + token : Optional[str] + Necessary when accessing a private model/dataset. torch_dtype : str a string representing the dtype to load the model under. If auto is @@ -180,13 +179,10 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: Optional[str] = field( + default=None, metadata={ - "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." - ) + "help": ("Necessary to specify when accessing a private model/dataset.") }, ) trust_remote_code: bool = field( diff --git a/src/lmflow/datasets/dataset.py b/src/lmflow/datasets/dataset.py index e5b546e3c..0defef922 100644 --- a/src/lmflow/datasets/dataset.py +++ b/src/lmflow/datasets/dataset.py @@ -120,7 +120,6 @@ def __init__(self, data_args: DatasetArguments=None, backend: str="huggingface", data_files=data_files, field=KEY_INSTANCES, split="train", - use_auth_token=None, ) self.backend_dataset = raw_dataset self._check_data_format() diff --git a/src/lmflow/models/hf_decoder_model.py b/src/lmflow/models/hf_decoder_model.py index 86aa6b443..c3cb172a2 100644 --- a/src/lmflow/models/hf_decoder_model.py +++ b/src/lmflow/models/hf_decoder_model.py @@ -618,7 +618,7 @@ def get_peft_without_qlora(self): config_kwargs = { "cache_dir": self.model_args.cache_dir, "revision": self.model_args.model_revision, - "use_auth_token": True if self.model_args.use_auth_token else None, + "token": self.model_args.token, } config = AutoConfig.from_pretrained(self.model_args.model_name_or_path, **config_kwargs) device_map = "auto" @@ -632,7 +632,7 @@ def get_peft_without_qlora(self): config=config, cache_dir=self.model_args.cache_dir, revision=self.model_args.model_revision, - use_auth_token=True if self.model_args.use_auth_token else None, + token=self.model_args.token, torch_dtype=torch_dtype, device_map=device_map, trust_remote_code = self.model_args.trust_remote_code, diff --git a/src/lmflow/models/hf_model_mixin.py b/src/lmflow/models/hf_model_mixin.py index def6c3fbf..0d2c26671 100644 --- a/src/lmflow/models/hf_model_mixin.py +++ b/src/lmflow/models/hf_model_mixin.py @@ -131,7 +131,7 @@ def __prepare_tokenizer( "cache_dir": model_args.cache_dir, "use_fast": model_args.use_fast_tokenizer, "revision": model_args.model_revision, - "use_auth_token": True if model_args.use_auth_token else None, + "token": model_args.token, "trust_remote_code": model_args.trust_remote_code, } if model_args.padding_side != 'auto': @@ -203,7 +203,7 @@ def __prepare_model_config( "attn_implementation": "flash_attention_2" if model_args.use_flash_attention else None, "cache_dir": model_args.cache_dir, "revision": model_args.model_revision, - "use_auth_token": True if model_args.use_auth_token else None, + "token": model_args.token, "trust_remote_code": model_args.trust_remote_code, "from_tf": bool(".ckpt" in model_args.model_name_or_path), }