diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml new file mode 100644 index 00000000..2ec2bbca --- /dev/null +++ b/.github/workflows/format.yml @@ -0,0 +1,69 @@ +# Copyright The FMS HF Tuning Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Format + +on: + push: + branches: [ "main", "dev" ] + pull_request: + branches: [ "main", "dev" ] + +jobs: + lint: + runs-on: ubuntu-latest + strategy: + matrix: + plugin_name: + - "framework" + # - "accelerated-peft" # enable later + + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.9 + uses: actions/setup-python@v4 + with: + python-version: 3.9 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install tox + - name: Run linter + run: | + cd plugins/${{ matrix.plugin_name }} + tox -e lint + - name: Run formatter + run: | + cd plugins/${{ matrix.plugin_name }} + tox -e fmt + - name: Run pytest + run: | + cd plugins/${{ matrix.plugin_name }} + tox -e py + + sample-config: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.9 + uses: actions/setup-python@v4 + with: + python-version: 3.9 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install tox + - name: Run Config Verification + run: tox -e verify-configs diff --git a/plugins/accelerated-peft/configs/bnb.yaml b/plugins/accelerated-peft/configs/bnb.yaml index a29eef5e..ec5c3cfa 100644 --- a/plugins/accelerated-peft/configs/bnb.yaml +++ b/plugins/accelerated-peft/configs/bnb.yaml @@ -14,3 +14,7 @@ peft: # bitsandbytes: bitsandbytes: quant_type: nf4 + + # If True, then no get_peft_model and prepare_model_for_kbit_training + # will be called. + no_peft_model: False \ No newline at end of file diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/autogptq_utils.py b/plugins/accelerated-peft/src/fms_acceleration_peft/autogptq_utils.py index 49072208..e3b2dc6d 100644 --- a/plugins/accelerated-peft/src/fms_acceleration_peft/autogptq_utils.py +++ b/plugins/accelerated-peft/src/fms_acceleration_peft/autogptq_utils.py @@ -18,7 +18,7 @@ # Third Party from peft import LoraConfig from peft.tuners.lora.gptq import QuantLinear as LoraLinearGPTQ -from transformers.utils.import_utils import _is_package_available +from typing import List, Callable import torch @@ -54,3 +54,32 @@ def create_new_module_peft( # if module cannot be found, return None which results in a raise in the call-stack return new_module + +# consider to move this somewhere more general +def patch_forward_to_view_attributes_before_call( + old_forward: Callable, + attribute_names: List[str], torch_dtype, +): + # patch old_forward to view attribtues to torch_dype + # before call + + def _forward(self, *args, **kwargs): + # perform a view on all these attributes + for attr_name in attribute_names: + + # the view should be a passthrough + # if attr.dtype == torch_dtype + attr = getattr(self, attr_name) + + # perform view + attr = attr.view(torch_dtype) + + try: + setattr(self, attr_name, attr) + except TypeError: + # this means already have attr_name as a parameter, then + # just assign this way + self.__dict__[attr_name] = attr + + return old_forward(*args, **kwargs) + return _forward diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_autogptq.py b/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_autogptq.py index 2fd2f1e9..fa6082ab 100644 --- a/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_autogptq.py +++ b/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_autogptq.py @@ -25,8 +25,10 @@ from fms_acceleration import AccelerationPlugin from peft import LoraConfig, prepare_model_for_kbit_training from peft.tuners.lora.model import LoraModel +import torch.distributed from transformers import AutoModelForCausalLM, TrainingArguments import torch +import os class AutoGPTQAccelerationPlugin(AccelerationPlugin): @@ -50,6 +52,8 @@ def model_loader(self, model_name: str, **kwargs): # guarded imports # Third Party from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig + from auto_gptq.nn_modules.qlinear.qlinear_tritonv2 import QuantLinear, QuantLinearFunction + from .autogptq_utils import patch_forward_to_view_attributes_before_call # Currently we allow only a quantized checkpoint to be loaded, we do not # implement the quantization process here. @@ -121,6 +125,43 @@ def model_loader(self, model_name: str, **kwargs): device_map=device_map, ) + # https://github.com/foundation-model-stack/fms-acceleration/pull/15 + # if FSDP distributed need to convert the AutoGPTQ model's + # parameters (in tensors) to parameters. Also need to + # store the int32 tensors in a float type + + try: + world_size = torch.distributed.get_world_size() + except ValueError: + world_size = 1 # pg not init + + if ( + world_size > 1 + and os.environ.get("ACCELERATE_USE_FSDP", "false").lower() == "true" + ): + # these parameters are to be patched for triton v2 + # consider making a map if patching more kernels + PATCH_FOR_FSDP_TRITON_V2 = ['qweight', 'qzeros'] + + # patch all the QuantLinear base layers + for mod in model.modules(): + if isinstance(mod, QuantLinear): + + # convert all patched attributes to Parameters of torch_dtype + # so FSDP can shard them + for attr_name in PATCH_FOR_FSDP_TRITON_V2: + attr = getattr(mod, attr_name) + attr = torch.nn.Parameter(attr.view(torch_dtype), requires_grad=False) + setattr(mod, attr_name, attr) + + # this patches the forward to convert them back to original + # type (i.e. int32) before the function call into the kernels + _forward = patch_forward_to_view_attributes_before_call( + mod.forward, attribute_names=PATCH_FOR_FSDP_TRITON_V2, + torch_dtype=torch.int32, # patch it back to + ) + mod.forward = MethodType(_forward, mod) + # replace AutoModelForCausalLM.from_config = _old_from_config diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_bnb.py b/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_bnb.py index fa11fe3a..dfd5fbc8 100644 --- a/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_bnb.py +++ b/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_bnb.py @@ -96,6 +96,9 @@ def __init__(self, configurations: Dict[str, Dict]): self._quant_type = self._check_config_and_maybe_check_values( key="peft.quantization.bitsandbytes.quant_type", values=["fp4", "nf4"] ) + self._no_peft_model = self._check_config_and_maybe_check_values( + key="peft.quantization.bitsandbytes.no_peft_model", values=[True, False] + ) def model_loader(self, model_name: str, **kwargs): @@ -121,6 +124,16 @@ def model_loader(self, model_name: str, **kwargs): "If running in FSDP, this is probably because accelerate is not used. " "This will most probably result in error." ) + elif ( + world_size == 1 + and self._no_peft_model == True + ): + warnings.warn( + """Running on single device and setting plugin config `no_peft_model` as `True` + PEFT preparation will be managed by SFTTrainer and will cause a slowdown in training speed + due to extraneous dtype casting when SFTTrainer prepares the model using + https://github.com/huggingface/trl/blob/e90e8d91d2265e484f229c45a5eb8982f94a2936/trl/trainer/sft_trainer.py#L210""" + ) bnb_config = BitsAndBytesConfig( load_in_4bit=True, @@ -147,7 +160,8 @@ def requires_custom_loading(self): @property def requires_agumentation(self): - return True + # will skip the augmentation if _no_peft_model == True + return not self._no_peft_model def augmentation( self, diff --git a/plugins/accelerated-peft/tests/test_peft_plugins.py b/plugins/accelerated-peft/tests/test_peft_plugins.py index 894e1ca6..42404ddc 100644 --- a/plugins/accelerated-peft/tests/test_peft_plugins.py +++ b/plugins/accelerated-peft/tests/test_peft_plugins.py @@ -122,6 +122,20 @@ def test_configure_bnb_plugin(): assert framework.requires_agumentation assert len(framework.get_callbacks_and_ready_for_train()) == 0 + # test no_peft_model is true skips plugin.augmentation + for key, correct_value in [ + ("peft.quantization.bitsandbytes.no_peft_model", True), + ("peft.quantization.bitsandbytes.no_peft_model", False), + ]: + with instantiate_framework( + update_configuration_contents( + read_configuration(CONFIG_PATH_BNB), key, correct_value + ), + require_packages_check=False, + ): + # check flags and callbacks + assert (not correct_value)==framework.requires_agumentation + # attempt to activate plugin with configuration pointing to wrong path # - raise with message that no plugins can be configured with pytest.raises(ValueError) as e: diff --git a/plugins/accelerated-peft/tox.ini b/plugins/accelerated-peft/tox.ini index 6460cdbc..b79d0691 100644 --- a/plugins/accelerated-peft/tox.ini +++ b/plugins/accelerated-peft/tox.ini @@ -18,6 +18,13 @@ commands = [testenv:lint] description = run linters +deps = + pylint>=2.16.2,<=3.1.0 +commands = pylint src tests +allowlist_externals = pylint + +[testenv:fmt] +description = format skip_install = true deps = black>=22.12 @@ -26,6 +33,7 @@ commands = black {posargs:.} isort {posargs:.} + # [testenv:build] # description = build wheel # deps = diff --git a/plugins/framework/.pylintrc b/plugins/framework/.pylintrc new file mode 100644 index 00000000..45da4212 --- /dev/null +++ b/plugins/framework/.pylintrc @@ -0,0 +1,649 @@ +[MAIN] + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Clear in-memory caches upon conclusion of linting. Useful if running pylint +# in a server-like mode. +clear-cache-post-run=no + +# Load and enable all available extensions. Use --list-extensions to see a list +# all available extensions. +#enable-all-extensions= + +# In error mode, messages with a category besides ERROR or FATAL are +# suppressed, and no reports are done by default. Error mode is compatible with +# disabling specific errors. +#errors-only= + +# Always return a 0 (non-error) status code, even if lint errors are found. +# This is primarily useful in continuous integration scripts. +#exit-zero= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-allow-list= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. (This is an alternative name to extension-pkg-allow-list +# for backward compatibility.) +extension-pkg-whitelist= + +# Return non-zero exit code if any of these messages/categories are detected, +# even if score is above --fail-under value. Syntax same as enable. Messages +# specified are enabled, while categories only check already-enabled messages. +fail-on= + +# Specify a score threshold under which the program will exit with error. +fail-under=10 + +# Interpret the stdin as a python script, whose filename needs to be passed as +# the module_or_package argument. +#from-stdin= + +# Files or directories to be skipped. They should be base names, not paths. +ignore=CVS,protobufs + +# Add files or directories matching the regular expressions patterns to the +# ignore-list. The regex matches against paths and can be in Posix or Windows +# format. Because '\\' represents the directory delimiter on Windows systems, +# it can't be used as an escape character. +ignore-paths= + +# Files or directories matching the regular expression patterns are skipped. +# The regex matches against base names, not paths. The default value ignores +# Emacs file locks +ignore-patterns=^\.# + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis). It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use, and will cap the count on Windows to +# avoid hangs. +jobs=1 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# List of plugins (as comma separated values of python module names) to load, +# usually to register additional checkers. +load-plugins= + +# Pickle collected data for later comparisons. +persistent=yes + +# Minimum Python version to use for version dependent checks. Will default to +# the version used to run pylint. +py-version=3.9 + +# Discover python modules and packages in the file system subtree. +recursive=no + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + +# In verbose mode, extra non-checker-related info will be displayed. +#verbose= + + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names. Overrides argument- +# naming-style. If left empty, argument names will be checked with the set +# naming style. +#argument-rgx= + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. If left empty, attribute names will be checked with the set naming +# style. +#attr-rgx= + +# Bad variable names which should always be refused, separated by a comma. +bad-names=foo, + bar, + baz, + toto, + tutu, + tata + +# Bad variable names regexes, separated by a comma. If names match any regex, +# they will always be refused +bad-names-rgxs= + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. If left empty, class attribute names will be checked +# with the set naming style. +#class-attribute-rgx= + +# Naming style matching correct class constant names. +class-const-naming-style=UPPER_CASE + +# Regular expression matching correct class constant names. Overrides class- +# const-naming-style. If left empty, class constant names will be checked with +# the set naming style. +#class-const-rgx= + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names. Overrides class-naming- +# style. If left empty, class names will be checked with the set naming style. +#class-rgx= + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names. Overrides const-naming- +# style. If left empty, constant names will be checked with the set naming +# style. +#const-rgx= + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names. Overrides function- +# naming-style. If left empty, function names will be checked with the set +# naming style. +#function-rgx= + +# Good variable names which should always be accepted, separated by a comma. +good-names=i, + j, + k, + ex, + Run, + _ + +# Good variable names regexes, separated by a comma. If names match any regex, +# they will always be accepted +good-names-rgxs= + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. If left empty, inline iteration names will be checked +# with the set naming style. +#inlinevar-rgx= + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names. Overrides method-naming- +# style. If left empty, method names will be checked with the set naming style. +#method-rgx= + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names. Overrides module-naming- +# style. If left empty, module names will be checked with the set naming style. +#module-rgx= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Regular expression matching correct type variable names. If left empty, type +# variable names will be checked with the set naming style. +#typevar-rgx= + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names. Overrides variable- +# naming-style. If left empty, variable names will be checked with the set +# naming style. +#variable-rgx= + + +[CLASSES] + +# Warn about protected attribute access inside special methods +check-protected-access-in-special-methods=no + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp, + __post_init__ + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict, + _fields, + _replace, + _source, + _make + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + + +[DESIGN] + +# List of regular expressions of class ancestor names to ignore when counting +# public methods (see R0903) +exclude-too-few-public-methods= + +# List of qualified class names to ignore when counting class parents (see +# R0901) +ignored-parents= + +# Maximum number of arguments for function / method. +max-args=5 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=12 + +# Maximum number of locals for function / method body. +max-locals=15 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when caught. +overgeneral-exceptions=builtins.BaseException,builtins.Exception + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=100 + +# Maximum number of lines in a module. +max-module-lines=1100 + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + + +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow explicit reexports by alias from a package __init__. +allow-reexport-from-package=no + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Deprecated modules which should not be used, separated by a comma. +deprecated-modules= + +# Output a graph (.gv or any supported image format) of external dependencies +# to the given file (report RP0402 must not be disabled). +ext-import-graph= + +# Output a graph (.gv or any supported image format) of all (i.e. internal and +# external) dependencies to the given file (report RP0402 must not be +# disabled). +import-graph= + +# Output a graph (.gv or any supported image format) of internal dependencies +# to the given file (report RP0402 must not be disabled). +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Couples of modules and preferred modules, separated by a comma. +preferred-modules= + + +[LOGGING] + +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE, +# UNDEFINED. +confidence=HIGH, + CONTROL_FLOW, + INFERENCE, + INFERENCE_FAILURE, + UNDEFINED + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then re-enable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=raw-checker-failed, + bad-inline-option, + locally-disabled, + file-ignored, + suppressed-message, + useless-suppression, + deprecated-pragma, + # Added messages + use-symbolic-message-instead, + invalid-name, + missing-class-docstring, + missing-module-docstring, + missing-function-docstring, + consider-using-f-string, + inconsistent-return-statements, + no-member, + too-many-arguments, + too-many-locals, + too-many-branches, + too-many-statements, + cyclic-import, + too-few-public-methods, + protected-access, + fixme, + logging-format-interpolation, + logging-too-many-args, + attribute-defined-outside-init, + abstract-method, + pointless-statement, + wrong-import-order, + duplicate-code, + unbalanced-tuple-unpacking, + unused-argument + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable=c-extension-no-member + + +[METHOD_ARGS] + +# List of qualified names (i.e., library.method) which require a timeout +# parameter e.g. 'requests.api.get,requests.api.post' +timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME, + XXX, + TODO + +# Regular expression of note tags to take in consideration. +notes-rgx= + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit,argparse.parse_error + + +[REPORTS] + +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'fatal', 'error', 'warning', 'refactor', +# 'convention', and 'info' which contain the number of messages in each +# category, as well as 'statement' which is the total number of statements +# analyzed. This score is used by the global evaluation report (RP0004). +evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +msg-template= + +# Set the output format. Available formats are text, parseable, colorized, json +# and msvs (visual studio). You can also give a reporter class, e.g. +# mypackage.mymodule.MyReporterClass. +output-format=text + +# Tells whether to display a full report or only the messages. +reports=yes + +# Activate the evaluation score. +score=yes + + +[SIMILARITIES] + +# Comments are removed from the similarity computation +ignore-comments=yes + +# Docstrings are removed from the similarity computation +ignore-docstrings=yes + +# Imports are removed from the similarity computation +ignore-imports=yes + +# Signatures are removed from the similarity computation +ignore-signatures=yes + +# Minimum lines number of a similarity. +min-similarity-lines=4 + + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# Spelling dictionary name. Available dictionaries: none. To make it work, +# install the 'python-enchant' package. +spelling-dict= + +# List of comma separated words that should be considered directives if they +# appear at the beginning of a comment and should not be checked. +spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy: + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains the private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. +spelling-store-unknown-words=no + + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=no + +# This flag controls whether the implicit-str-concat should generate a warning +# on implicit string concatenation in sequences defined over several lines. +check-str-concat-over-line-jumps=no + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of symbolic message names to ignore for Mixin members. +ignored-checks-for-mixins=no-member, + not-async-context-manager, + not-context-manager, + attribute-defined-outside-init + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + +# Regex pattern to define which classes are considered mixins. +mixin-class-rgx=.*[Mm]ixin + +# List of decorators that change the signature of a decorated function. +signature-mutators= + + +[VARIABLES] + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of names allowed to shadow builtins +allowed-redefined-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_, + _cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io diff --git a/plugins/framework/pyproject.toml b/plugins/framework/pyproject.toml index 534a8eeb..8200f0bf 100644 --- a/plugins/framework/pyproject.toml +++ b/plugins/framework/pyproject.toml @@ -22,7 +22,7 @@ classifiers=[ "Programming Language :: Python :: 3.11", ] dependencies = [ - "torch<2.3", + "torch>2.2,<2.3", "transformers<4.40", "peft", "accelerate" diff --git a/plugins/framework/src/fms_acceleration/__init__.py b/plugins/framework/src/fms_acceleration/__init__.py index c396c568..e39cd055 100644 --- a/plugins/framework/src/fms_acceleration/__init__.py +++ b/plugins/framework/src/fms_acceleration/__init__.py @@ -12,19 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Standard +# use importlib to load the packages, if they are installed +import importlib + # Local -from .framework import AccelerationFramework from .constants import PLUGIN_PREFIX, PLUGINS +from .framework import AccelerationFramework from .framework_plugin import ( AccelerationPlugin, AccelerationPluginConfigError, get_relevant_configuration_sections, ) -# Standard -# use importlib to load the packages, if they are installed -import importlib - for postfix in PLUGINS: plugin_name = f"{PLUGIN_PREFIX}{postfix}" if importlib.util.find_spec(plugin_name): diff --git a/plugins/framework/src/fms_acceleration/cli.py b/plugins/framework/src/fms_acceleration/cli.py index a29fecef..04ad263a 100644 --- a/plugins/framework/src/fms_acceleration/cli.py +++ b/plugins/framework/src/fms_acceleration/cli.py @@ -13,22 +13,24 @@ # limitations under the License. -import argparse +# Standard +from typing import List, Union import os -import sys import subprocess -from typing import List, Union -import yaml - -from .constants import PLUGIN_PREFIX, PLUGINS +import sys +# Third Party from pip._internal.cli.main import main as pipmain - from transformers.utils.import_utils import _is_package_available +import yaml + +# Local +from .constants import PLUGIN_PREFIX, PLUGINS GITHUB_URL = "github.com/foundation-model-stack/fms-acceleration.git" -REPO_CACHE_DIR = '.fms/repository' +REPO_CACHE_DIR = ".fms/repository" + # TODO: make a version that fetches the def install_plugin( @@ -36,26 +38,29 @@ def install_plugin( ): "function to install plugin. Inputs should contain a pkg_name." - pkg_name = [x for x in args if not x.startswith('-')] - assert len(pkg_name) == 1,\ - "Please specify exactly one plugin to install" + pkg_name = [x for x in args if not x.startswith("-")] + assert len(pkg_name) == 1, "Please specify exactly one plugin to install" pkg_name = pkg_name[0] # take the flags - args = [x for x in args if x.startswith('-')] + args = [x for x in args if x.startswith("-")] if os.path.exists(pkg_name): - pipmain(['install', *args, pkg_name]) - return + pipmain(["install", *args, pkg_name]) + return if pkg_name.startswith(PLUGIN_PREFIX): pkg_name = pkg_name.replace(PLUGIN_PREFIX, "") # otherwise should be an internet install - pipmain([ - 'install', *args, - f'git+https://{GITHUB_URL}#subdirectory=plugins/accelerated-{pkg_name}' - ]) + pipmain( + [ + "install", + *args, + f"git+https://{GITHUB_URL}#subdirectory=plugins/accelerated-{pkg_name}", + ] + ) + def list_plugins(): print( @@ -75,13 +80,14 @@ def list_plugins(): print(f"{i+1}. {full_name} [{name}] {postfix}") + def get_benchmark_artifacts(dest_dir: str): if not os.path.exists(dest_dir): os.makedirs(dest_dir) - - if not os.path.exists(os.path.join(dest_dir, '.git')): - command = f"""cd {dest_dir} && git init && git remote add -f origin https://{GITHUB_URL} && \ + if not os.path.exists(os.path.join(dest_dir, ".git")): + command = f"""cd {dest_dir} && git init && \ + git remote add -f origin https://{GITHUB_URL} && \ git config --global init.defaultBranch main && \ git config core.sparsecheckout true && \ echo scripts/benchmarks >> .git/info/sparse-checkout && \ @@ -91,63 +97,73 @@ def get_benchmark_artifacts(dest_dir: str): command = f"cd {dest_dir} && git fetch origin && " command += "git pull origin main " - out = subprocess.run(command, shell=True, capture_output=True) + out = subprocess.run(command, shell=True, capture_output=True, check=False) if out.returncode != 0: - raise RuntimeError(f"could not get benchmark artifacts with error code {out.returncode}") - return out + raise RuntimeError( + f"could not get benchmark artifacts with error code {out.returncode}" + ) + return out + def list_sample_configs( - configs_dir: str, - contents_file: str = 'sample-configurations/CONTENTS.yaml', + configs_dir: str, + contents_file: str = "sample-configurations/CONTENTS.yaml", get_artifacts: bool = True, ): if get_artifacts: get_benchmark_artifacts(REPO_CACHE_DIR) - with open(os.path.join(configs_dir, contents_file)) as f: - for i, entry in enumerate(yaml.safe_load(f)['framework_configs']): - shortname = entry['shortname'] - plugins = entry['plugins'] - filename = entry['filename'] - print (f"{i+1}. {shortname} ({filename}) - plugins: {plugins}") + with open(os.path.join(configs_dir, contents_file), encoding="utf-8") as f: + for i, entry in enumerate(yaml.safe_load(f)["framework_configs"]): + shortname = entry["shortname"] + plugins = entry["plugins"] + filename = entry["filename"] + print(f"{i+1}. {shortname} ({filename}) - plugins: {plugins}") + def list_arguments( - scenario_dir: str, + scenario_dir: str, config_shortnames: Union[str, List[str]], - scenario_file: str = 'scripts/benchmarks/scenarios.yaml', - ignored_fields = ['model_name_or_path'], + scenario_file: str = "scripts/benchmarks/scenarios.yaml", + ignored_fields: List = None, get_artifacts: bool = True, ): + if ignored_fields is None: + ignored_fields = ["model_name_or_path"] + if get_artifacts: get_benchmark_artifacts(REPO_CACHE_DIR) if isinstance(config_shortnames, str): config_shortnames = [config_shortnames] - with open(os.path.join(scenario_dir, scenario_file)) as f: - scenarios = yaml.safe_load(f)['scenarios'] + with open(os.path.join(scenario_dir, scenario_file), encoding="utf-8") as f: + scenarios = yaml.safe_load(f)["scenarios"] found = 0 - print (f"Searching for configuration shortnames: {config_shortnames}") + print(f"Searching for configuration shortnames: {config_shortnames}") for scn in scenarios: - if 'framework_config' not in scn: + if "framework_config" not in scn: continue - hit_sn = [x for x in config_shortnames if x in scn['framework_config']] + hit_sn = [x for x in config_shortnames if x in scn["framework_config"]] if len(hit_sn) > 0: found += 1 - name = scn['name'] - arguments = scn['arguments'] + name = scn["name"] + arguments = scn["arguments"] hit_sn = ", ".join(hit_sn) - print (f"{found}. scenario: {name}\n configs: {hit_sn}\n arguments:") + print(f"{found}. scenario: {name}\n configs: {hit_sn}\n arguments:") lines = [] for key, val in arguments.items(): if key not in ignored_fields: lines.append(f" --{key} {val}") - - print (" \\\n".join(lines)) - print ("\n") + + print(" \\\n".join(lines)) + print("\n") if not found: - print(f"ERROR: Could not list arguments for configuration shortname '{config_shortnames}'") + print( + f"ERROR: Could not list arguments for configuration shortname '{config_shortnames}'" + ) + def cli(): # not using argparse since its so simple @@ -157,31 +173,30 @@ def cli(): ) argv = sys.argv if len(argv) == 1: - print (message) + print(message) return - else: + if len(argv) > 1: command = argv[1] if len(argv) > 2: variadic = sys.argv[2:] else: variadic = [] - if command == 'install': + if command == "install": assert len(variadic) >= 1, "Please provide the acceleration plugin name" install_plugin(*variadic) - elif command == 'plugins': + elif command == "plugins": assert len(variadic) == 0, "list does not require arguments" list_plugins() - elif command == 'configs': + elif command == "configs": assert len(variadic) == 0, "list-config does not require arguments" list_sample_configs(REPO_CACHE_DIR) - elif command == 'arguments': + elif command == "arguments": assert len(variadic) >= 1, "Please provide the config shortname" list_arguments(REPO_CACHE_DIR, *variadic) else: - raise NotImplementedError( - f"Unknown fms_acceleration.cli command '{command}'" - ) + raise NotImplementedError(f"Unknown fms_acceleration.cli command '{command}'") + -if __name__ == '__main__': - cli() \ No newline at end of file +if __name__ == "__main__": + cli() diff --git a/plugins/framework/src/fms_acceleration/framework.py b/plugins/framework/src/fms_acceleration/framework.py index 529b6bd5..6d545ac7 100644 --- a/plugins/framework/src/fms_acceleration/framework.py +++ b/plugins/framework/src/fms_acceleration/framework.py @@ -13,7 +13,7 @@ # limitations under the License. # Standard -from typing import Callable, Dict, List, Optional, Set, Tuple +from typing import Callable, List, Optional, Set, Tuple # Third Party from accelerate import Accelerator @@ -23,19 +23,20 @@ import torch import yaml -# want to use the transformers logger, but a bit of pain -logger = logging.get_logger(__name__) # pylint: disable=invalid-name -logger.setLevel(logging._get_default_logging_level()) -logger.addHandler(logging._default_handler) - -# First Party +# Local +from .constants import KEY_PLUGINS from .framework_plugin import ( PLUGIN_REGISTRATIONS, AccelerationPlugin, PluginRegistration, get_relevant_configuration_sections, ) -from .constants import KEY_PLUGINS + +# want to use the transformers logger, but a bit of pain +logger = logging.get_logger(__name__) # pylint: disable=invalid-name +logger.setLevel(logging._get_default_logging_level()) +logger.addHandler(logging._default_handler) + def check_plugin_packages(plugin: AccelerationPlugin): if plugin.require_packages is None: @@ -47,13 +48,14 @@ def check_plugin_packages(plugin: AccelerationPlugin): missing_packages.append(package_name) return len(missing_packages) == 0, missing_packages + def log_initialization_message( active_class_names: Set[str], registered_plugins: List[PluginRegistration], # list of regs - logger: Callable = None, + logging_func: Callable = None, ): - if logger is None: - logger = print + if logging_func is None: + logging_func = print def _registration_display(reg: PluginRegistration): return ( @@ -62,36 +64,33 @@ def _registration_display(reg: PluginRegistration): f"Version: {reg.package_version}." ) - logger("***** FMS AccelerationFramework *****") + logging_func("***** FMS AccelerationFramework *****") for reg in registered_plugins: if reg.plugin.__name__ in active_class_names: - logger(_registration_display(reg)) + logging_func(_registration_display(reg)) class AccelerationFramework: - - active_plugins: List[Tuple[str, AccelerationPlugin]] = list() - plugins_require_custom_loading: List = list() + active_plugins: List[Tuple[str, AccelerationPlugin]] = [] + plugins_require_custom_loading: List = [] def __init__( self, configuration_file: Optional[str], require_packages_check: bool = True ): - - with open(configuration_file, "r") as f: + with open(configuration_file, "r", encoding="utf-8") as f: contents = yaml.safe_load(f) if KEY_PLUGINS not in contents or contents[KEY_PLUGINS] is None: raise ValueError(f"Configuration file must contain a '{KEY_PLUGINS}' body") # pepare the plugin configurations - plugin_configs = {k: v for k, v in contents[KEY_PLUGINS].items()} + plugin_configs = dict(contents[KEY_PLUGINS].items()) # relevant sections are returned following plugin precedence, i.e., # they follow the registration order. for selected_configs, cls in get_relevant_configuration_sections( plugin_configs ): - # then the model is to be installed # get the plugin plugin_name = str(cls.__name__) @@ -108,7 +107,7 @@ def __init__( # check if already activated, if so, will not reactivate again # maintain uniqueness of activated plugins - if any([x == plugin_name for x, _ in self.active_plugins]): + if any(x == plugin_name for x, _ in self.active_plugins): continue # activate plugin @@ -123,15 +122,16 @@ def __init__( "framework configuration file." ) - assert ( - len(self.plugins_require_custom_loading) <= 1 - ), f"Can load at most 1 plugin with custom model loading, but tried to '{self.plugins_require_custom_loading}'." + assert len(self.plugins_require_custom_loading) <= 1, ( + "Can load at most 1 plugin with custom model loading, " + f"but tried to '{self.plugins_require_custom_loading}'." + ) def model_loader(self, model_name: str, **kwargs): - if len(self.plugins_require_custom_loading) == 0: raise NotImplementedError( - f"Attempted model loading, but none of activated plugins '{list(self.active_plugins)}' " + "Attempted model loading, but none " + f"of activated plugins '{list(self.active_plugins)}' " "require custom loading." ) @@ -152,10 +152,9 @@ def augmentation( # NOTE: this assumes that augmentation order does not matter for plugin_name, plugin in self.active_plugins: - # check the model arcs at augmentation if plugin.restricted_model_archs and not any( - [x in model_archs for x in plugin.restricted_model_archs] + x in model_archs for x in plugin.restricted_model_archs ): raise ValueError( f"Model architectures in '{model_archs}' are supported for '{plugin_name}'." @@ -174,16 +173,16 @@ def requires_custom_loading(self): @property def requires_agumentation(self): - return any([x.requires_agumentation for _, x in self.active_plugins]) + return any(x.requires_agumentation for _, x in self.active_plugins) def get_callbacks_and_ready_for_train( self, model: torch.nn.Module = None, accelerator: Accelerator = None ): # show the initialized message log_initialization_message( - set([x for x, _ in self.active_plugins]), + {x for x, _ in self.active_plugins}, PLUGIN_REGISTRATIONS, - logger=logger.info, + logging_func=logger.info, ) cbks = [] diff --git a/plugins/framework/src/fms_acceleration/framework_plugin.py b/plugins/framework/src/fms_acceleration/framework_plugin.py index 1d17a863..fc6da973 100644 --- a/plugins/framework/src/fms_acceleration/framework_plugin.py +++ b/plugins/framework/src/fms_acceleration/framework_plugin.py @@ -36,7 +36,7 @@ class PluginRegistration: package_version: str = None -PLUGIN_REGISTRATIONS: List[PluginRegistration] = list() +PLUGIN_REGISTRATIONS: List[PluginRegistration] = [] def _trace_key_path(configuration: Dict, key: str): @@ -85,7 +85,6 @@ def get_relevant_configuration_sections(configuration: Dict) -> Dict: class AccelerationPlugin: - # will be triggered if the configuration_paths are found in the # acceleration framework configuration file (under KEY_PLUGINS) @staticmethod @@ -94,13 +93,18 @@ def register_plugin( configuration_and_paths: List[str], **kwargs, ): - global PLUGIN_REGISTRATIONS + + # pylint: disable=trailing-whitespace + # removed because of src/fms_acceleration/framework_plugin.py:96:8: + # W0602: Using global for 'PLUGIN_REGISTRATIONS' but no assignment + # is done (global-variable-not-assigned) + # global PLUGIN_REGISTRATIONS # get the package metadata pkg_name = sys.modules[plugin.__module__].__package__ try: package_version = importlib.metadata.version(pkg_name) - except importlib.metadata.PackageNotFoundError: + except importlib.metadata.PackageNotFoundError: package_version = None PLUGIN_REGISTRATIONS.append( @@ -116,7 +120,6 @@ def register_plugin( require_packages: Optional[Set] = None def __init__(self, configurations: Dict[str, Dict]): - # will pass in a list of dictionaries keyed by "configuration_keys" # to be used for initialization self.configurations = configurations @@ -153,13 +156,15 @@ def _check_config_and_maybe_check_values(self, key: str, values: List[Any] = Non # if the tree is a dict if len(t.keys()) > 1: raise AccelerationPluginConfigError( - f"{self.__class__.__name__}: '{key}' found but amongst multiple '{t.keys()}' exist. Ambiguous check in expected set '{values}'." + f"{self.__class__.__name__}: '{key}' found but amongst multiple " + "'{t.keys()}' exist. Ambiguous check in expected set '{values}'." ) t = list(t.keys())[0] # otherwise take the first value if t not in values: raise AccelerationPluginConfigError( - f"{self.__class__.__name__}: Value at '{key}' was '{t}'. Not found in expected set '{values}'." + f"{self.__class__.__name__}: Value at '{key}' was '{t}'. " + "Not found in expected set '{values}'." ) else: # if nothing to check against, we still want to ensure its a valid diff --git a/plugins/framework/src/fms_acceleration/utils/test_utils.py b/plugins/framework/src/fms_acceleration/utils/test_utils.py index aa796707..3cc4004f 100644 --- a/plugins/framework/src/fms_acceleration/utils/test_utils.py +++ b/plugins/framework/src/fms_acceleration/utils/test_utils.py @@ -51,7 +51,7 @@ def update_configuration_contents( def read_configuration(path: str) -> Dict: "helper function to read yaml config into json" - with open(path) as f: + with open(path, encoding="utf-8") as f: return yaml.safe_load(f) @@ -69,13 +69,16 @@ def build_framework_and_maybe_instantiate( plugins_to_be_registered: List[ Tuple[List[str], Type[AccelerationPlugin]] # and_paths, plugin_class ], - configuration_contents: Dict = {}, + configuration_contents: Dict = None, instantiate: bool = True, reset_registrations: bool = True, require_packages_check: bool = True, ): "helper function to register plugins and instantiate an acceleration framework for testing" + if configuration_contents is None: + configuration_contents = {} + # empty out if reset_registrations: old_registrations = [] @@ -93,7 +96,9 @@ def build_framework_and_maybe_instantiate( ) if instantiate: - yield configure_framework_from_json(configuration_contents, require_packages_check) + yield configure_framework_from_json( + configuration_contents, require_packages_check + ) else: yield @@ -104,9 +109,11 @@ def build_framework_and_maybe_instantiate( AccelerationFramework.active_plugins = old_active_plugins AccelerationFramework.plugins_require_custom_loading = old_custom_loading_plugins -# alias because default instantiate=True + +# alias because default instantiate=True build_framework_and_instantiate = build_framework_and_maybe_instantiate + def instantiate_framework( configuration_contents: Dict, require_packages_check: bool = True, @@ -122,8 +129,12 @@ def instantiate_framework( ) -def create_noop_model_with_archs(class_name: str = "ModelNoop", archs: List[str] = []): +def create_noop_model_with_archs( + class_name: str = "ModelNoop", archs: List[str] = None +): "helper function to create a dummy model with mocked architectures" + if archs is None: + archs = [] config = type("Config", (object,), {"architectures": archs}) return type(class_name, (torch.nn.Module,), {"config": config}) @@ -131,8 +142,8 @@ def create_noop_model_with_archs(class_name: str = "ModelNoop", archs: List[str] def create_plugin_cls( class_name: str = "PluginNoop", - restricted_models: Set = {}, - require_pkgs: Set = {}, + restricted_models: Set = None, + require_pkgs: Set = None, requires_custom_loading: bool = False, requires_agumentation: bool = False, agumentation: Callable = None, @@ -140,6 +151,11 @@ def create_plugin_cls( ): "helper function to create plugin class" + if restricted_models is None: + restricted_models = set() + if require_pkgs is None: + require_pkgs = set() + attributes = { "restricted_model_archs": restricted_models, "require_packages": require_pkgs, diff --git a/plugins/framework/tests/test_framework.py b/plugins/framework/tests/test_framework.py index eff1600a..b59ff62f 100644 --- a/plugins/framework/tests/test_framework.py +++ b/plugins/framework/tests/test_framework.py @@ -15,103 +15,19 @@ # SPDX-License-Identifier: Apache-2.0 # https://spdx.dev/learn/handling-license-info/ -# Standard -from contextlib import contextmanager -from tempfile import NamedTemporaryFile -from typing import Callable, Dict, List, Set, Tuple, Type - # Third Party -import pytest +import pytest # pylint: disable=(import-error import torch -import yaml # First Party -from fms_acceleration.framework import KEY_PLUGINS, AccelerationFramework -from fms_acceleration.framework_plugin import PLUGIN_REGISTRATIONS, AccelerationPlugin - -# ----------------------------- HELPER ------------------------------------- - - -@contextmanager -def build_framework_and_instantiate( - plugins_to_be_registered: List[ - Tuple[List[str], Type[AccelerationPlugin]] # and_paths, plugin_class - ], - configuration_contents: Dict, -): - "helper function to instantiate an acceleration framework for testing" - - # empty out - old_registrations = [] - old_registrations.extend(PLUGIN_REGISTRATIONS) - PLUGIN_REGISTRATIONS.clear() - old_active_plugins = AccelerationFramework.active_plugins - old_custom_loading_plugins = AccelerationFramework.plugins_require_custom_loading - AccelerationFramework.active_plugins = [] - AccelerationFramework.plugins_require_custom_loading = [] - - for path, plugin in plugins_to_be_registered: - AccelerationPlugin.register_plugin( - plugin, - configuration_and_paths=path, - ) - - with NamedTemporaryFile("w") as f: - yaml.dump({KEY_PLUGINS: configuration_contents}, f) - yield AccelerationFramework(f.name) - - # put back - PLUGIN_REGISTRATIONS.clear() - PLUGIN_REGISTRATIONS.extend(old_registrations) - AccelerationFramework.active_plugins = old_active_plugins - AccelerationFramework.plugins_require_custom_loading = old_custom_loading_plugins - - -def create_noop_model_with_archs(class_name: str = "ModelNoop", archs: List[str] = []): - "helper function to create a dummy model with mocked architectures" - - config = type("Config", (object,), {"architectures": archs}) - return type(class_name, (torch.nn.Module,), {"config": config}) - - -def create_plugin_cls( - class_name: str = "PluginNoop", - restricted_models: Set = {}, - require_pkgs: Set = {}, - requires_custom_loading: bool = False, - requires_agumentation: bool = False, - agumentation: Callable = None, - model_loader: Callable = None, -): - "helper function to create plugin class" - - attributes = { - "restricted_model_archs": restricted_models, - "require_packages": require_pkgs, - "requires_custom_loading": requires_custom_loading, - "requires_agumentation": requires_agumentation, - } - - if agumentation is not None: - attributes["augmentation"] = agumentation - - if model_loader is not None: - attributes["model_loader"] = model_loader - - return type(class_name, (AccelerationPlugin,), attributes) - - -def dummy_augmentation(self, model, train_args, modifiable_args): - "dummy augmentation implementation" - return model, modifiable_args - - -def dummy_custom_loader(self, model_name, **kwargs): - "dummy custom loader returning dummy model" - return create_noop_model_with_archs(archs=["DummyModel"]) - - -# ----------------------------- TESTS ------------------------------------- +from fms_acceleration.framework_plugin import PLUGIN_REGISTRATIONS +from fms_acceleration.utils.test_utils import ( + build_framework_and_instantiate, + create_noop_model_with_archs, + create_plugin_cls, + dummy_augmentation, + dummy_custom_loader, +) def test_config_with_empty_body_raises(): @@ -208,7 +124,6 @@ def test_single_plugin(): plugins_to_be_registered=[(["dummy"], incomplete_plugin)], configuration_contents={"dummy": {"key1": 1}}, ) as framework: - # check 1. assert len(PLUGIN_REGISTRATIONS) == 1 assert len(framework.active_plugins) == 1 @@ -300,7 +215,6 @@ def test_two_plugins(): ], configuration_contents={"dummy": {"key1": 1}, "dummy2": {"key1": 1}}, ) as framework: - # check 1. assert len(PLUGIN_REGISTRATIONS) == 2 @@ -357,7 +271,11 @@ def test_plugin_registration_order(): "test that plugin registration order determines their activation order" # build a set of hooks that register the activation order - def hook_builder(act_order=[]): + def hook_builder(act_order=None): + + if act_order is None: + act_order = [] + def _hook( self, model, @@ -391,7 +309,6 @@ def _hook( plugins_to_be_registered=[([k], v) for k, v in plugins_to_be_installed], configuration_contents={k: {"key1": 1} for k, _ in plugins_to_be_installed}, ) as framework: - # trigger augmentation of active plugins and check order of activation framework.augmentation(model, None, None) for c, (n, _) in zip(plugin_activation_order, plugins_to_be_installed): diff --git a/plugins/framework/tox.ini b/plugins/framework/tox.ini index a5db281a..52513f9a 100644 --- a/plugins/framework/tox.ini +++ b/plugins/framework/tox.ini @@ -8,6 +8,13 @@ commands = pytest {posargs:tests} [testenv:lint] description = run linters +deps = + pylint>=2.16.2,<=3.1.0 +commands = pylint src tests +allowlist_externals = pylint + +[testenv:fmt] +description = format skip_install = true deps = black>=22.12 diff --git a/sample-configurations/CONTENTS.yaml b/sample-configurations/CONTENTS.yaml index 33c24253..8d45bedf 100644 --- a/sample-configurations/CONTENTS.yaml +++ b/sample-configurations/CONTENTS.yaml @@ -14,4 +14,9 @@ framework_configs: - shortname: accelerated-peft-bnb plugins: - accelerated-peft - filename: accelerated-peft-bnb-nf4-sample-configuration.yaml \ No newline at end of file + filename: accelerated-peft-bnb-nf4-sample-configuration.yaml + + - shortname: baseline-peft-bnb + plugins: + - accelerated-peft + filename: baseline-peft-bnb-nf4-sample-configuration.yaml \ No newline at end of file diff --git a/sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml b/sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml index e920931c..19fb71fb 100644 --- a/sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml +++ b/sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml @@ -18,3 +18,7 @@ plugins: # bitsandbytes: bitsandbytes: quant_type: nf4 + + # If True, then no get_peft_model and prepare_model_for_kbit_training + # will be called. + no_peft_model: false diff --git a/sample-configurations/baseline-peft-bnb-nf4-sample-configuration.yaml b/sample-configurations/baseline-peft-bnb-nf4-sample-configuration.yaml new file mode 100644 index 00000000..244de5e7 --- /dev/null +++ b/sample-configurations/baseline-peft-bnb-nf4-sample-configuration.yaml @@ -0,0 +1,24 @@ +# FMS Acceleration Plugin Configuration. +# +# Each stanza incorporates various configurations for +# different fine-tuning / training tasks. +plugins: + # PEFT-related acceleration + peft: + + # quantization-releated acceleration + # e.g., kernels for quantized base weights + quantization: + + # For loading BitsAndBytes quantized layers + # to serve as 4bit base-weights for LoRA PEFT-tuning. + # NOTE: currently AutoGPTQ is not properly integrated into huggingface / + # bitsandbytes, thus recommended quant_type to be either "nf4" + # or "fp4". + # bitsandbytes: + bitsandbytes: + quant_type: nf4 + + # If True, then no get_peft_model and prepare_model_for_kbit_training + # will be called. + no_peft_model: true diff --git a/scripts/benchmarks/README.md b/scripts/benchmarks/README.md index fdc6d7bc..115719b7 100644 --- a/scripts/benchmarks/README.md +++ b/scripts/benchmarks/README.md @@ -51,6 +51,11 @@ A `scenario` has the following key components: The best way is via `tox` which manages the dependencies, including installing the correct version [fms-hf-tuning](https://github.com/foundation-model-stack/fms-hf-tuning). +- install the `setup_requirements.txt` to get `tox`: + ``` + pip install -r setup_requirements.txt + ``` + - run a *small* representative set of benches: ``` tox -e run-benches @@ -59,7 +64,9 @@ The best way is via `tox` which manages the dependencies, including installing t ``` tox -e run-benches -- "1 2" ``` -ationFramework` to demonstrate the various plugins. + +Note: +- `tox` command above accepts environment variables `DRY_RUN, NO_DATA_PROCESSING, NO_OVERWRITE`. See `scripts/run_benchmarks.sh` ## Running Benchmarks @@ -82,3 +89,98 @@ Alternatively run [`benchmark.py`](./benchmark.py) directly. To see the help do: ``` python benchmark.py --help ``` + +Note: +- in `run_benchmarks.sh` we will clear the `RESULT_DIR` if it exists, to avoid contaimination with old results. To protect against overwrite, then always run with `NO_OVERWRITE=true`. + +## Logging GPU Memory + +There are 2 ways to benchmark memory in `run_benchmarks.sh`: +- Setting the environment variable `MEMORY_LOGGING=nvidia` will use Nvidia `nvidia-smi`'s API +- Setting the environment variable `MEMORY_LOGGING=huggingface` (default) will use HuggingFace `HFTrainer`'s API + +Both approaches will print out the memory values to the benchmark report. + - For Nvidia, the result column will be `nvidia_mem_reserved` + - For Torch/HF, the result column will be `peak_torch_mem_alloc_in_bytes` and `torch_mem_alloc_in_bytes` + +### Nvidia-SMI `nvidia-smi` +`nvidia-smi` is a command line utility (CLI) based on the Nvidia Manage Library (NVML)`. A separate process call is used to start, log and finally terminate the CLI for every experiment. + +The keyword `memory.used` is passed to `--query-gpu` argument to log the memory usage at some interval. The list of keywords that can be logged can be referenced from `nvidia-smi --help-query-gpu` + +Since it runs on a separate process, it is less likely to affect the training. However, it is a coarser approach than HF as NVML's definition of used memory takes the sum of (memory allocated + memory reserved). Refer to their [documentation](https://docs.nvidia.com/deploy/nvml-api/structnvmlMemory__t.html#structnvmlMemory__t:~:text=Sum%20of%20Reserved%20and%20Allocated%20device%20memory%20(in%20bytes).%20Note%20that%20the%20driver/GPU%20always%20sets%20aside%20a%20small%20amount%20of%20memory%20for%20bookkeeping) here. + +After every experiment, + - the logged values are calibrated to remove any existing foreign memory values + - the peak values for each gpu device are taken + - the values are finally averaged across all devices. + +### Torch/HuggingFace `HFTrainer` +HFTrainer has a feature to log memory through the `skip_memory_metrics=False` training argument. In their [documentation](https://huggingface.co/docs/transformers/en/main_classes/trainer#transformers.TrainingArguments.skip_memory_metrics), it is mentioned that setting this argument to `False` will affect training speed. In our tests so far (below), we do not see significant difference in throughput (tokens/sec) when using this argument. + +The HFTrainer API is more granular than `nvidia-smi` as it uses `torch.cuda` to pinpoint memory usage inside the trainer + - It reports the allocated memory by calling `torch.cuda.memory_allocated()` and `torch.cuda.max_memory_allocated()` inside its probes + - It has memory logging probes at different stages of the Trainer - `init`, `train`, `evaluate`, `predict` + +##### NOTE: +- When in distributed mode, the Trainer will only log the rank 0 memory. +- For stability purposes, it only tracks the outer level of train, evaluate and predict methods. i.e. if eval is called during train, there won't be a nested invocation of the memory probe. +- Any GPU memory incurred outside of the defined Trainer stages won't be tracked. + +### Additional Details + +#### Calculating Memory from HFTrainer Output Metrics + +This is an example of the memory values that HFTrainer will produce in the outputs of `train()` +``` +output_metrics = { + 'train_runtime': 191.2491, + 'train_samples_per_second': 0.209, + 'train_steps_per_second': 0.052, + 'train_tokens_per_second': 428.342, + 'train_loss': 1.0627506256103516, + 'init_mem_cpu_alloc_delta': 4096, + 'init_mem_gpu_alloc_delta': 0, + 'init_mem_cpu_peaked_delta': 0, + 'init_mem_gpu_peaked_delta': 0, + 'train_mem_cpu_alloc_delta': 839086080, + 'train_mem_gpu_alloc_delta': -17491768832, + 'train_mem_cpu_peaked_delta': 0, + 'train_mem_gpu_peaked_delta': 26747825664, + 'before_init_mem_cpu': 5513297920, + 'before_init_mem_gpu': 36141687296, + 'epoch': 0.01 +} +``` + +We refer to the keys of the memory metrics in this order + - `before_init_mem_X` as stage0 + - `init_mem_X` as stage1 + - `train_mem_X` as stage2 + - ... + +We currently compute the memory values in the report by taking the largest of sums. For example: + +For allocated memory value +``` +max([ + stage0_mem + stage1_allocated_delta, + stage0_mem + stage1_allocated_delta + stage2_allocated_delta, + ... +]) +``` + +For peak memory value +``` +max([ + stage0_mem + stage1_allocated_delta + stage1_peaked_delta, + stage0_mem + stage1_allocated_delta + stage2_allocated_delta + stage2_peaked_delta, + ... +]) +``` + +Notice that we do not include `stage0_mem` alone when computing the max value. This is to avoid misleading comparisons between GPTQ-LoRA and others. GPTQ-LoRA + FSDP currently does not support low-memory mode as mentioned [here](https://github.com/foundation-model-stack/fms-acceleration/issues/18). The `stage0_mem` value of GPTQ-LoRA + FSDP will reflect a larger than expected value as it is loaded fully before the trainer is initialized and then subsequently will be sharded internally in `trainer.prepare`. This might cause some misleading comparisons when other variants are loaded in low-memory mode and have smaller `stage0_mem` memory consumption than GPTQ-LoRA + FSDP. Once low-memory mode is supported for GPTQ-LoRA, we will include `stage0_mem` back inside the max computation + +We compare memory values between Nvidia-SMI and Torch in this PR - [Memory Benchmarking](https://github.com/foundation-model-stack/fms-acceleration/pull/14). + + diff --git a/scripts/benchmarks/benchmark.py b/scripts/benchmarks/benchmark.py index 021426cd..afbf61cf 100644 --- a/scripts/benchmarks/benchmark.py +++ b/scripts/benchmarks/benchmark.py @@ -1,17 +1,20 @@ +# Standard +from itertools import product +from typing import Any, Callable, Dict, List, Tuple, Union import argparse import json import os import re import subprocess import warnings -from itertools import product -from typing import Callable, Dict, List, Tuple, Any, Union +# Third Party +from tqdm import tqdm +from transformers import AutoConfig, HfArgumentParser, TrainingArguments import datasets import pandas as pd +import torch import yaml -from tqdm import tqdm -from transformers import HfArgumentParser, TrainingArguments, AutoConfig """ This benchmarking script @@ -45,22 +48,20 @@ FILE_RESULTS = "results.json" FILE_SHELL_COMMAND = "command.sh" FILE_SCRIPT_ARGS = "script.json" -FILE_SUMMARY_CSV = 'summary.csv' +FILE_SUMMARY_CSV = "raw_summary.csv" DIR_BENCHMARKS = os.path.dirname(os.path.realpath(__file__)) -DIR_PREFIX_EXPERIMENT = 'exp' -DIR_NAME_RESULTS_DEFAULT = 'benchmark_results' -DIR_SAMP_CONFIGS = os.path.join(DIR_BENCHMARKS, '../../sample-configurations') +DIR_PREFIX_EXPERIMENT = "exp" +DIR_NAME_RESULTS_DEFAULT = "benchmark_results" +DIR_SAMP_CONFIGS = os.path.join(DIR_BENCHMARKS, "../../sample-configurations") # read list of sample configurations from contents file FRAMEWORK_CONFIG_KEYPAIRS = [] -with open(os.path.join(DIR_SAMP_CONFIGS, 'CONTENTS.yaml')) as f: - configs = yaml.safe_load(f)['framework_configs'] +with open(os.path.join(DIR_SAMP_CONFIGS, "CONTENTS.yaml")) as f: + configs = yaml.safe_load(f)["framework_configs"] for d in configs: - FRAMEWORK_CONFIG_KEYPAIRS.append(d['shortname']) - FRAMEWORK_CONFIG_KEYPAIRS.append( - os.path.join(DIR_SAMP_CONFIGS, d['filename']) - ) + FRAMEWORK_CONFIG_KEYPAIRS.append(d["shortname"]) + FRAMEWORK_CONFIG_KEYPAIRS.append(os.path.join(DIR_SAMP_CONFIGS, d["filename"])) # regex to capture the start and end of tracebacks REGEX_START_OF_TRACEBACK = "Traceback\s\(most\srecent\scall\slast\)" @@ -72,6 +73,68 @@ "torch.distributed.elastic.multiprocessing.errors.ChildFailedError" ] +FILE_MEM = "gpu_memory_logs.csv" +GPU_LOG_USED_MEM_COLUMN_NAME = "memory.used [MiB]" +GPU_LOG_METRIC_SUFFIX = " MiB" +GPU_TABLE = "timestamp,name,index,memory.used" +RESULT_FIELD_RESERVED_GPU_MEM = "nvidia_mem_reserved" +RESULT_FIELD_DEVICE_NAME = "gpu_device_name" + +HF_TRAINER_LOG_GPU_STAGE_BEFORE_INIT = "before_init_mem_gpu" +HF_TRAINER_LOG_GPU_STAGE_INIT = "init_mem_gpu" +HF_TRAINER_LOG_GPU_STAGE_TRAIN = "train_mem_gpu" +KEYWORD_PEAKED_DELTA = "peaked_delta" +KEYWORD_ALLOC_DELTA = "alloc_delta" +HF_ARG_SKIP_MEMORY_METRIC = "--skip_memory_metrics" +RESULT_FIELD_ALLOCATED_GPU_MEM = "torch_mem_alloc_in_bytes" +RESULT_FIELD_PEAK_ALLOCATED_GPU_MEM = "peak_torch_mem_alloc_in_bytes" + + +def extract_gpu_memory_metrics(output_metrics) -> Tuple[float]: + """ + This function computes the gpu summary metrics from the output metrics of Trainer + when `skip_memory_metrics` is set to `False` in transformers.TrainingArguments + + This function is called only when `--skip_memory_metrics` exist in the experiment arg + and is set to False. The memory key values are expected to be inside output_metrics. If + output_metrics is empty, return peak=0 and usage=0 + + Returns + - gpu_peak value in Bytes + - gpu_usage value in Bytes + """ + # Assumes train stage is always called + # this is a tuple of stage names, and a bool to say if it should be included in the summarized number + # we exclude the model loading stages for now, due to + # https://github.com/foundation-model-stack/fms-acceleration/issues/18 + # we will renable the loading stages later on once this issue is addressed + if len(output_metrics.keys()) < 1: + return 0, 0 + + trainer_stage_order = [ + (HF_TRAINER_LOG_GPU_STAGE_BEFORE_INIT, False), + (HF_TRAINER_LOG_GPU_STAGE_INIT, False), + (HF_TRAINER_LOG_GPU_STAGE_TRAIN, True), + ] + alloc_running_sum = 0 + list_of_alloc_running_sums = [] + list_of_peak_running_sums = [] + for STAGE_NAME, include in trainer_stage_order: + delta_key = f"{STAGE_NAME}_{KEYWORD_ALLOC_DELTA}" + alloc_running_sum += ( + output_metrics[delta_key] + if delta_key in output_metrics + else output_metrics[STAGE_NAME] + ) + peak_delta = output_metrics.get(f"{STAGE_NAME}_{KEYWORD_PEAKED_DELTA}", 0) + if include: + list_of_alloc_running_sums.append(alloc_running_sum) + list_of_peak_running_sums.append(alloc_running_sum + peak_delta) + + max_alloc_running_sum = max(list_of_alloc_running_sums) + max_peak_running_sum = max(list_of_peak_running_sums) + return max_peak_running_sum, max_alloc_running_sum + def get_hf_arguments_with_no_value(dataclass_types): """this function will return a map (str, bool) of true/false arguments. @@ -203,7 +266,7 @@ def cartesian_product_on_dict(variable_matrices: Dict) -> List[Dict]: list_of_products.append( { name: arg - for name, arg in zip(variable_matrices.keys(), arg_combinations) + for name, arg in zip(variable_matrices.keys(), arg_combinations) } ) return list_of_products @@ -223,14 +286,14 @@ def convert_args_to_dict(experiment_arguments: List[Any]): argument_dict[current_key] = item else: # otherwise it was from a list, so make into sequence - argument_dict[current_key] = v + ' ' + item + argument_dict[current_key] = v + " " + item return argument_dict class ScenarioMatrix: - matrix_args = ['model_name_or_path'] + matrix_args = ["model_name_or_path"] def __init__(self, scenario: Dict, acceleration_config_map: Dict = None) -> None: assert "arguments" in scenario.keys(), "Missing `arguments` key in `scenario`" @@ -246,7 +309,7 @@ def __init__(self, scenario: Dict, acceleration_config_map: Dict = None) -> None setattr(self, key, val) def preload_models(self): - for model_name in self.arguments['model_name_or_path']: + for model_name in self.arguments["model_name_or_path"]: print(f"Scenario '{self.name}' preloading model '{model_name}'") # just preload the config AutoConfig.from_pretrained(model_name) @@ -292,8 +355,15 @@ def __init__( self.stderr_filename = os.path.join(self.save_dir, FILE_STDERR) self.command_filename = os.path.join(self.save_dir, FILE_SHELL_COMMAND) self.results_filename = os.path.join(self.save_dir, FILE_RESULTS) + self.gpu_log_filename = os.path.join(self.save_dir, FILE_MEM) - def run(self, run_cmd: str, environment_variables: Dict = None): + def run( + self, + run_cmd: str, + environment_variables: Dict = None, + log_nvidia_smi: bool = False, + memory_log_interval_secs: int = 1, + ): # form the command line commands = [] @@ -302,14 +372,47 @@ def run(self, run_cmd: str, environment_variables: Dict = None): commands.extend([str(x) for x in c]) else: commands.append(str(c)) - + # will save the command line in str self.shell_command = run_cmd.split() + commands self.environment = environment_variables self.experiment_args_str = commands os.makedirs(self.save_dir, exist_ok=True) + + if log_nvidia_smi: + """ + Opens a parallel process to log the device memory of the main experiment process. + - Logs memory at intervals to a csv file in `self.save_dir` + - Terminates at the end of experiment + - GPU log is read and aggregated when the experiment ends & results are saved in Experiment.write_result, + + NOTE: This feature assumes the following + 1. Experiment is the only process on the gpu devices - + there are no other processes running on the device in parallel. + + Can log more info from nvidia-smi by expanding GPU_Table argument + e.g. "timestamp,name,index,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used" + Use `nvidia-smi --help-query-gpu` for more reference + """ + nvidia_logging_cmd = [ + "nvidia-smi", + "--query-gpu", + GPU_TABLE, + "--format", + "csv", + "--id", + str(environment_variables["CUDA_VISIBLE_DEVICES"]), + "--loop", + str(memory_log_interval_secs), + ] + memory_process = subprocess.Popen( + nvidia_logging_cmd, + stdout=open(self.gpu_log_filename, "w"), + text=True, + ) + subprocess.run( - self.shell_command, + self.shell_command, capture_output=False, stdout=open(self.stdout_filename, "w"), stderr=open(self.stderr_filename, "w"), @@ -317,6 +420,9 @@ def run(self, run_cmd: str, environment_variables: Dict = None): env={**os.environ.copy(), **environment_variables}, ) + if log_nvidia_smi: + memory_process.terminate() + def get_experiment_final_metrics( self, final_metrics_keys: List[str] = ["train_loss", "train_runtime"] ): @@ -374,19 +480,76 @@ def maybe_get_experiment_error_traceback(self): return None if len(results) == 0 else results + def get_peak_mem_usage_by_device_id(self): + """ + This function retrieves the raw measurements of reserved GPU memory per device across the experiment - + computing the peak value for each gpu and then performing a simple calibration (subtracts peak values by the first reading). + Returns: + - pd.Series of peak memory usage per device id + - the device name as string - e.g. "NVIDIA A100-SXM4-80GB" + + Example: For 2 devices with GPU Indices 0,1 - it will return the max measurement value (in MiB) of each device as a Series: + + - pd.Series + index + 0 52729.0 + 1 52783.0 + Name: memory.used [MiB], dtype: float64 + """ + + # group the gpu readings into device ids + gpu_logs = pd.read_csv(self.gpu_log_filename, skipinitialspace=True) + # assume that all the devices have the same device name + device_name = gpu_logs.name.iloc[-1] + # extract and convert the gpu memory usage as float values + gpu_logs[GPU_LOG_USED_MEM_COLUMN_NAME] = gpu_logs[ + GPU_LOG_USED_MEM_COLUMN_NAME + ].apply(lambda x: float(x.replace(GPU_LOG_METRIC_SUFFIX, ""))) + mem_usage_by_device_id = gpu_logs.groupby("index")[GPU_LOG_USED_MEM_COLUMN_NAME] + # Calibrate values by subtracting out the initial values of the GPU readings + # to ensure no existing memory is counted in addition with the experiment + initial_values = mem_usage_by_device_id.first() + peak_values = mem_usage_by_device_id.max() + return peak_values.sub(initial_values), device_name + def write_result(self): "Function to write a json result file" # save some basic args save_result = ConfigUtils.convert_args_to_dict(self.experiment_args_str) - save_result['num_gpus'] = self.num_gpus + save_result["num_gpus"] = self.num_gpus + + # if a gpu log file exist, process the raw nvidia logs and write to result + if os.path.isfile(self.gpu_log_filename): + # Add GPU info and measurements into the result saving + peak_mem_usage_by_device_id, device_name = ( + self.get_peak_mem_usage_by_device_id() + ) + save_result[RESULT_FIELD_DEVICE_NAME] = device_name + # Memory usage is averaged across all devices in the final result + save_result[RESULT_FIELD_RESERVED_GPU_MEM] = ( + peak_mem_usage_by_device_id.mean() + ) + + # process gpu mem from output metrics and write to result + # check if HF_ARG_SKIP_MEMORY_METRIC is set to False in experiment arg + # this arg is specified explicitly inside `def generate_list_of_experiments`` + argument_idx = self.experiment_arg.index(HF_ARG_SKIP_MEMORY_METRIC) + write_memory_metric = not self.experiment_arg[argument_idx + 1] + if write_memory_metric: + peak_gpu_mem, gpu_allocated_mem = extract_gpu_memory_metrics( + self.get_experiment_final_metrics() + ) + save_result[RESULT_FIELD_PEAK_ALLOCATED_GPU_MEM] = peak_gpu_mem + save_result[RESULT_FIELD_ALLOCATED_GPU_MEM] = gpu_allocated_mem # if there is an error we save the error message else we save the final result maybe_error_messages = self.maybe_get_experiment_error_traceback() if maybe_error_messages is None: other_results = self.get_experiment_final_metrics() save_result = { - **save_result, **self.get_experiment_final_metrics(), + **save_result, + **self.get_experiment_final_metrics(), } else: other_results = {"error_messages": maybe_error_messages} @@ -394,26 +557,25 @@ def write_result(self): # combine the final thing save_result = {**save_result, **other_results} - with open(self.results_filename, 'w') as f: + with open(self.results_filename, "w") as f: json.dump(save_result, f, indent=4, sort_keys=True) - # NOTE: can be improved. Not sure if this really gets parity with + # NOTE: can be improved. Not sure if this really gets parity with # subprocess.run def write_shell_command(self): def _escape(x: str): # if there is is whitespace we just escape with single quotes # not sure if this is the best thing to do - return x if not re.search(r"\s", x) else f"\'{x}\'" + return x if not re.search(r"\s", x) else f"'{x}'" "Write a shell script to repro the run" - with open(self.command_filename, 'w') as f: + with open(self.command_filename, "w") as f: f.write("#!/bin/bash\n\n") for key, val in self.environment.items(): - f.write(f"{key}={val}\n") - f.write(" ".join([ - _escape(x) for x in self.shell_command - ])) + f.write(f"export {key}={val}\n") + f.write(" ".join([_escape(x) for x in self.shell_command])) + class DryRunExperiment(Experiment): @@ -423,6 +585,7 @@ def __init__(self, *args, **kwargs): def run(self, run_cmd: str, environment_variables: Dict = None): def _dummy(*args, **kwargs): pass + _old = subprocess.run subprocess.run = _dummy super().run(run_cmd, environment_variables) @@ -436,6 +599,7 @@ def get_experiment_final_metrics( def maybe_get_experiment_error_traceback(self): return None + def prepare_arguments(args): defaults = ConfigUtils.read_yaml(args.defaults_config_path) defaults["training_data_path"] = args.dataset_save_path @@ -451,18 +615,15 @@ def prepare_arguments(args): } experiment_factor = 1 for k, v in experiment_matrices.items(): - print (f"Experiment has matrix '{k}' of len {len(v)}") + print(f"Experiment has matrix '{k}' of len {len(v)}") experiment_factor *= len(v) - print (f"Experiment matrices will product by factor of '{experiment_factor}'") + print(f"Experiment matrices will product by factor of '{experiment_factor}'") for scenario_config in scenarios: _scn_name = scenario_config["name"] # if a `run_only_scenarios` list exist, filter out any scenario not in the list - if ( - args.run_only_scenarios - and _scn_name not in args.run_only_scenarios - ): - print (f"Skipping scenario '{_scn_name}'") + if args.run_only_scenarios and _scn_name not in args.run_only_scenarios: + print(f"Skipping scenario '{_scn_name}'") continue scenario = ScenarioMatrix(scenario_config, acceleration_config_map) scenario_matrices, scenario_constants = ( @@ -470,7 +631,7 @@ def prepare_arguments(args): ) scn_factor = 1 for k, v in scenario_matrices.items(): - print (f"Scenario '{_scn_name}' has matrix '{k}' of len {len(v)}") + print(f"Scenario '{_scn_name}' has matrix '{k}' of len {len(v)}") scn_factor *= len(v) # update defaults with scenario constants @@ -478,7 +639,9 @@ def prepare_arguments(args): # Remove any empty variables and combine matrices to dictionary to cartesian product on combined_matrices = {**scenario_matrices, **experiment_matrices} products = ConfigUtils.cartesian_product_on_dict(combined_matrices) - print (f"Scenario '{_scn_name}' will add to the total products by: ----> '{experiment_factor} x {scn_factor}' = '{len(products)}'\n") + print( + f"Scenario '{_scn_name}' will add to the total products by: ----> '{experiment_factor} x {scn_factor}' = '{len(products)}'\n" + ) if args.preload_models and len(products) > 0: scenario.preload_models() for num_gpus, experiment_arg in ConfigUtils.build_args_from_products( @@ -492,6 +655,7 @@ def generate_list_of_experiments( output_dir: str = "results", hf_products_dir: str = "hf", dry_run: bool = False, + log_memory_in_trainer: bool = False, ) -> List[Experiment]: """Construct list of experiments to be run. Takes in default_config and any matrices in scenario and experiment_config @@ -503,6 +667,8 @@ def generate_list_of_experiments( expr_arg_w_outputdir = exp_arg + [ "--output_dir", os.path.join(experiment_output_dir, hf_products_dir), + HF_ARG_SKIP_MEMORY_METRIC, + not log_memory_in_trainer, ] expr_cls = Experiment if not dry_run else DryRunExperiment _expr = expr_cls( @@ -515,7 +681,7 @@ def generate_list_of_experiments( return experiments -def gather_report(result_dir: Union[str, List[str]], raw: bool=True): +def gather_report(result_dir: Union[str, List[str]], raw: bool = True): def _gather(rdir): @@ -524,26 +690,28 @@ def _gather(rdir): # map from config file to tag fcm = convert_keypairs_to_map( - script_args['acceleration_framework_config_keypairs'] + script_args["acceleration_framework_config_keypairs"] ) - fcm = {v:k for k,v in fcm.items()} + fcm = {v: k for k, v in fcm.items()} experiment_stats = {} - exper_dirs = [x for x in os.listdir(rdir) if x.startswith(DIR_PREFIX_EXPERIMENT)] + exper_dirs = [ + x for x in os.listdir(rdir) if x.startswith(DIR_PREFIX_EXPERIMENT) + ] for tag in exper_dirs: try: with open(os.path.join(rdir, tag, FILE_RESULTS)) as f: - tag = tag.replace(DIR_PREFIX_EXPERIMENT + '_', '') + tag = tag.replace(DIR_PREFIX_EXPERIMENT + "_", "") tag = int(tag) experiment_stats[tag] = json.load(f) except FileNotFoundError: pass df = pd.DataFrame.from_dict(experiment_stats, orient="index").sort_index() try: - df['framework_config'] = df['acceleration_framework_config_file'].map( - lambda x : fcm.get(x, 'none') + df["framework_config"] = df["acceleration_framework_config_file"].map( + lambda x: fcm.get(x, "none") ) - except KeyError: + except KeyError: pass return df @@ -564,23 +732,39 @@ def _nunique(series): # if unique does not work, then return number of non-na # elements return len(series) - series.isna().sum() - u = df.apply(_nunique) # columns that are unique - return df.loc[:,u != 1], df.iloc[0][u == 1].to_dict() + + u = df.apply(_nunique) # columns that are unique + return df.loc[:, u != 1], df.iloc[0][u == 1].to_dict() + def compress(df): - return df.loc[:,df.apply(pd.Series.nunique) != 1] + return df.loc[:, df.apply(pd.Series.nunique) != 1] + def main(args): + # Gathers available gpu device ids that will be used for benchmarking. + # If "CUDA_VISIBLE_DEVICES" is specified, it will return the specified device ids + # if no gpu ids are specified, it will default to the enumeration of available ids + assert torch.cuda.device_count() > 0, "No device detected for memory logging!" + available_gpus_indices = os.environ.get("CUDA_VISIBLE_DEVICES") + if available_gpus_indices: + available_gpus_indices = available_gpus_indices.split(",") + else: + available_gpus_indices = [str(i) for i in range(torch.cuda.device_count())] + + if args.dry_run and args.log_nvidia_smi: + args.log_nvidia_smi = False + # 1. Prepares a standard BenchmarkDataset # TODO: consider caching the json file if not args.no_data_processing: benchmark_dataset = BenchmarkDataset(args.dataset_name, format_fn) benchmark_dataset.save_to_path(args.dataset_save_path) - # dump out the script arguments + # dump out the script arguments os.makedirs(args.results_output_path, exist_ok=True) - with open(os.path.join(args.results_output_path, FILE_SCRIPT_ARGS), 'w') as f: + with open(os.path.join(args.results_output_path, FILE_SCRIPT_ARGS), "w") as f: json.dump(vars(args), f, indent=4, sort_keys=True) # 2. Prepares a list of experiment arguments from a set of configs @@ -589,10 +773,14 @@ def main(args): # 3. Builds a list of experiment objects to run based on the set of experiment arguments experiment_stats = {} experiment: Experiment - for experiment in tqdm(generate_list_of_experiments( - experiment_args, output_dir=args.results_output_path, - dry_run=args.dry_run, - )): + for experiment in tqdm( + generate_list_of_experiments( + experiment_args, + output_dir=args.results_output_path, + dry_run=args.dry_run, + log_memory_in_trainer=args.log_memory_hf, + ) + ): if experiment.num_gpus > 1: prefix = COMMAND_ACCELERATE.format( accelerate_config_path=args.accelerate_config, @@ -602,10 +790,20 @@ def main(args): else: prefix = COMMAND_PYTHON - device_ids = ",".join([str(i) for i in range(experiment.num_gpus)]) + assert experiment.num_gpus <= len( + available_gpus_indices + ), "Experiment requires more gpus than is available on the platform." + """ + Experiment will take only the ids from the available gpu indices, + this ensures that whatever GPUs are exposed to benchmark.py are the only + devices that each experiment can have access to. + """ + device_ids = ",".join(available_gpus_indices[: experiment.num_gpus]) + experiment.run( f"{prefix} {FMS_TRAINER}", environment_variables={"CUDA_VISIBLE_DEVICES": device_ids}, + log_nvidia_smi=args.log_nvidia_smi, ) # write results and store pointers to files @@ -618,9 +816,7 @@ def main(args): with open(path) as f: experiment_stats[tag] = json.load(f) df = pd.DataFrame.from_dict(experiment_stats, orient="index") - df.to_csv( - os.path.join(args.results_output_path, FILE_SUMMARY_CSV), index=None - ) + df.to_csv(os.path.join(args.results_output_path, FILE_SUMMARY_CSV), index=None) # TO CREATE THE checked in CSV FILE DO # df, constant = gather_report(..., raw=False) @@ -635,6 +831,7 @@ def main(args): # index=False # ) + if __name__ == "__main__": parser = argparse.ArgumentParser( @@ -723,17 +920,32 @@ def main(args): "--process_port", type=int, default=29500, help="accelerate process port" ) parser.add_argument( - "--no_data_processing", action='store_true', - help="skip the json data prep (useful for re-runs)" + "--no_data_processing", + action="store_true", + help="skip the json data prep (useful for re-runs)", ) parser.add_argument( - "--dry_run", action='store_true', - help="perform a dry run only. Useful for debuging benchmark scenarios." + "--dry_run", + action="store_true", + help="perform a dry run only. Useful for debuging benchmark scenarios.", ) parser.add_argument( - "--preload_models", action='store_true', + "--preload_models", + action="store_true", help="ensures 'model_name_or_paths 'specified in scenarios.yaml work. " - "Useful to check model paths specified correctly before lengthly benchmark runs." + "Useful to check model paths specified correctly before lengthly benchmark runs.", + ) + parser.add_argument( + "--log_nvidia_smi", + action="store_true", + help="Use `nvidia-smi` API to log reserved memory of benchmarks", ) + + parser.add_argument( + "--log_memory_hf", + action="store_true", + help="Uses memory logging from HF Trainer Arguments API to log gpu memory, for distributed runs only rank 0 is measured", + ) + args = parser.parse_args() main(args) diff --git a/scripts/benchmarks/display_bench_results.py b/scripts/benchmarks/display_bench_results.py new file mode 100644 index 00000000..b590f26c --- /dev/null +++ b/scripts/benchmarks/display_bench_results.py @@ -0,0 +1,58 @@ +# Standard +import argparse + +# First Party +# import this because of alot of internal contants +from scripts.benchmarks.benchmark import gather_report, DIR_SAMP_CONFIGS +from typing import List + +def main(*directories: str, output_filename: str = "results.csv", remove_columns: List[str] = None): + "gather outputs from a list of directories and output to a csv" + + df, constant = gather_report(*directories, raw=False) + # filter result columns to keep by the inverse of remove_columns + if remove_columns: + df = df[df.columns[~df.columns.isin(remove_columns)]] + + errors = [] + try: + # remove error messages if any + errors = df.error_messages + errors = errors.loc[errors.isna() == False] + df = df.loc[df.error_messages.isna()] + except: + pass + df = df.reset_index().drop("output_dir", axis=1) + df.reindex(sorted(df.columns), axis=1).to_csv(output_filename, index=False) + print("***************** Report Created ******************") + print(f"Total lines: '{len(df)}'") + print(f"Number columns included: '{len(df.columns)}'") + print(f"Number columns excluded: '{len(constant)}'") + print(f"Excluding number of exceptions caught: '{len(errors)}'") + print(f"Written report to '{output_filename}'") + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser( + prog="Acceleration Benchmarking Reporting Tool", + description="This script gathers a set benchmarks to produce a CSV report", + ) + parser.add_argument( + "bench_outputs", + nargs="+", + help="list of directories from which to gather bench outputs.", + ) + parser.add_argument( + "--result_file", + default="results.csv", + help="name of final csv report file.", + ) + parser.add_argument( + "--remove_columns", + nargs="*", + help="list of columns to ignore from results.csv", + ) + + args = parser.parse_args() + main(args.bench_outputs, output_filename=args.result_file, remove_columns=args.remove_columns) diff --git a/scripts/benchmarks/refs/a100_80gb.csv b/scripts/benchmarks/refs/a100_80gb.csv index 93dd0a28..4434d864 100644 --- a/scripts/benchmarks/refs/a100_80gb.csv +++ b/scripts/benchmarks/refs/a100_80gb.csv @@ -1,49 +1,61 @@ -acceleration_framework_config_file,epoch,fp16,framework_config,index,learning_rate,lora_alpha,lora_dropout,model_name_or_path,num_gpus,output_dir,peft_method,per_device_train_batch_size,r,target_modules,train_loss,train_runtime,train_samples_per_second,train_steps_per_second,train_tokens_per_second -,0.15,,none,0,2e-5,,,mistralai/Mistral-7B-v0.1,1,,,4,,,0.8943243026733398,561.4936,0.712,0.178,2917.932 -,0.15,,none,1,2e-5,,,mistralai/Mistral-7B-v0.1,2,,,2,,,0.8696886157989502,306.2728,1.306,0.327,2674.74 -,0.29,,none,2,2e-5,,,mistralai/Mistral-7B-v0.1,1,,,8,,,1.0190681648254394,1094.7748,0.731,0.091,2993.127 -,0.29,,none,3,2e-5,,,mistralai/Mistral-7B-v0.1,2,,,4,,,0.8909366416931153,572.0158,1.399,0.175,2864.256 -,,,none,4,2e-5,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,,4,,,,,,, -,,,none,5,2e-5,,,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,,2,,,,,,, -,,,none,6,2e-5,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,,8,,,,,,, -,,,none,7,2e-5,,,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,,4,,,,,,, -,,,none,8,2e-5,,,NousResearch/Llama-2-70b-hf,1,,,4,,,,,,, -,,,none,9,2e-5,,,NousResearch/Llama-2-70b-hf,2,,,2,,,,,,, -,,,none,10,2e-5,,,NousResearch/Llama-2-70b-hf,1,,,8,,,,,,, -,,,none,11,2e-5,,,NousResearch/Llama-2-70b-hf,2,,,4,,,,,,, -,0.15,,none,12,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,1,,lora,4,16,q_proj k_proj v_proj o_proj,0.8808393669128418,458.0185,0.873,0.218,3577.148 -,0.15,,none,13,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,2,,lora,2,16,q_proj k_proj v_proj o_proj,0.8548675441741943,259.6061,1.541,0.385,3155.55 -,0.29,,none,14,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,1,,lora,8,16,q_proj k_proj v_proj o_proj,1.007005090713501,915.9053,0.873,0.109,3577.662 -,0.29,,none,15,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,2,,lora,4,16,q_proj k_proj v_proj o_proj,0.8773036098480225,480.6995,1.664,0.208,3408.367 -,,,none,16,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,lora,4,16,q_proj k_proj v_proj o_proj,,,,, -,0.15,,none,17,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,lora,2,16,q_proj k_proj v_proj o_proj,0.862400369644165,535.3534,0.747,0.187,1530.204 -,,,none,18,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,lora,8,16,q_proj k_proj v_proj o_proj,,,,, -,0.29,,none,19,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,lora,4,16,q_proj k_proj v_proj o_proj,0.8798200416564942,924.5333,0.865,0.108,1772.137 -,,,none,20,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,1,,lora,4,16,q_proj k_proj v_proj o_proj,,,,, -,,,none,21,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,2,,lora,2,16,q_proj k_proj v_proj o_proj,,,,, -,,,none,22,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,1,,lora,8,16,q_proj k_proj v_proj o_proj,,,,, -,,,none,23,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,2,,lora,4,16,q_proj k_proj v_proj o_proj,,,,, -sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,0.15,True,accelerated-peft-bnb,24,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,1,,lora,4,16,q_proj k_proj v_proj o_proj,0.8661054801940918,481.8265,0.83,0.208,3400.394 -sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,0.15,True,accelerated-peft-bnb,25,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,2,,lora,2,16,q_proj k_proj v_proj o_proj,0.8560933685302734,271.0715,1.476,0.369,3022.081 -sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,0.29,True,accelerated-peft-bnb,26,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,1,,lora,8,16,q_proj k_proj v_proj o_proj,0.8718929100036621,951.8817,0.84,0.105,3442.445 -sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,0.29,True,accelerated-peft-bnb,27,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,2,,lora,4,16,q_proj k_proj v_proj o_proj,0.8511034965515136,498.9262,1.603,0.2,3283.852 -sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,0.15,True,accelerated-peft-bnb,28,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,lora,4,16,q_proj k_proj v_proj o_proj,0.8973640727996827,908.6145,0.44,0.11,1803.185 -sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,0.15,True,accelerated-peft-bnb,29,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,lora,2,16,q_proj k_proj v_proj o_proj,0.8554682540893555,548.0391,0.73,0.182,1494.784 -sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,0.29,True,accelerated-peft-bnb,30,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,lora,8,16,q_proj k_proj v_proj o_proj,0.8935444927215577,1714.3117,0.467,0.058,1911.438 -sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,0.29,True,accelerated-peft-bnb,31,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,lora,4,16,q_proj k_proj v_proj o_proj,0.8596937179565429,954.0851,0.838,0.105,1717.247 -sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,0.14,True,accelerated-peft-bnb,32,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,1,,lora,4,16,q_proj k_proj v_proj o_proj,1.000812177658081,3696.2907,0.108,0.027,443.255 -sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,0.14,True,accelerated-peft-bnb,33,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,2,,lora,2,16,q_proj k_proj v_proj o_proj,0.9307080173492431,1960.7862,0.204,0.051,417.792 -sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,,True,accelerated-peft-bnb,34,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,1,,lora,8,16,q_proj k_proj v_proj o_proj,,,,, -sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,0.28,True,accelerated-peft-bnb,35,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,2,,lora,4,16,q_proj k_proj v_proj o_proj,0.9387501430511475,3809.1796,0.21,0.026,430.119 -sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,0.15,True,accelerated-peft-autogptq,36,2e-4,16,0.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,,lora,4,16,q_proj k_proj v_proj o_proj,0.9700051403045654,478.8299,0.835,0.209,3421.675 -sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,0.15,True,accelerated-peft-autogptq,37,2e-4,16,0.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,,lora,2,16,q_proj k_proj v_proj o_proj,0.9695001697540283,270.0251,1.481,0.37,3033.792 -sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,0.29,True,accelerated-peft-autogptq,38,2e-4,16,0.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,,lora,8,16,q_proj k_proj v_proj o_proj,0.9514076042175293,946.5715,0.845,0.106,3461.756 -sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,0.29,True,accelerated-peft-autogptq,39,2e-4,16,0.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,,lora,4,16,q_proj k_proj v_proj o_proj,0.9824443531036376,496.6611,1.611,0.201,3298.829 -sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,0.15,True,accelerated-peft-autogptq,40,2e-4,16,0.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,,lora,4,16,q_proj k_proj v_proj o_proj,0.9041421699523926,872.5836,0.458,0.115,1877.643 -sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,0.15,True,accelerated-peft-autogptq,41,2e-4,16,0.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,,lora,2,16,q_proj k_proj v_proj o_proj,0.9010070323944092,499.3435,0.801,0.2,1640.554 -sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,0.29,True,accelerated-peft-autogptq,42,2e-4,16,0.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,,lora,8,16,q_proj k_proj v_proj o_proj,0.9001609039306641,1666.1579,0.48,0.06,1966.68 -sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,0.29,True,accelerated-peft-autogptq,43,2e-4,16,0.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,,lora,4,16,q_proj k_proj v_proj o_proj,0.8965495491027832,897.4939,0.891,0.111,1825.528 -sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,0.14,True,accelerated-peft-autogptq,44,2e-4,16,0.0,TheBloke/Nous-Hermes-Llama2-70B-GPTQ,1,,lora,4,16,q_proj k_proj v_proj o_proj,0.9533391189575195,3621.8261,0.11,0.028,452.368 -sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,0.14,True,accelerated-peft-autogptq,45,2e-4,16,0.0,TheBloke/Nous-Hermes-Llama2-70B-GPTQ,2,,lora,2,16,q_proj k_proj v_proj o_proj,0.9467405033111572,1886.6815,0.212,0.053,434.202 -sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,,True,accelerated-peft-autogptq,46,2e-4,16,0.0,TheBloke/Nous-Hermes-Llama2-70B-GPTQ,1,,lora,8,16,q_proj k_proj v_proj o_proj,,,,, -sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,,True,accelerated-peft-autogptq,47,2e-4,16,0.0,TheBloke/Nous-Hermes-Llama2-70B-GPTQ,2,,lora,4,16,q_proj k_proj v_proj o_proj,,,,, +epoch,fp16,framework_config,index,learning_rate,lora_alpha,lora_dropout,model_name_or_path,num_gpus,nvidia_mem_reserved,peak_torch_mem_alloc_in_bytes,peft_method,per_device_train_batch_size,r,target_modules,torch_mem_alloc_in_bytes,train_loss,train_runtime,train_samples_per_second,train_steps_per_second,train_tokens_per_second +0.04,,none,0,2e-5,,,mistralai/Mistral-7B-v0.1,1,77705.0,72971724288.0,,4,,,44004763136.0,0.9278398831685384,177.1092,0.678,0.169,2775.237 +0.04,,none,1,2e-5,,,mistralai/Mistral-7B-v0.1,2,44706.0,36762859520.0,,2,,,29521119232.0,0.8970902442932129,91.086,1.317,0.329,2698.11 +0.09,,none,2,2e-5,,,mistralai/Mistral-7B-v0.1,1,74383.0,72972117504.0,,8,,,44005156352.0,0.9879656155904134,322.458,0.744,0.093,3048.583 +0.09,,none,3,2e-5,,,mistralai/Mistral-7B-v0.1,2,53907.0,36763056128.0,,4,,,29521315840.0,0.9259945551554362,167.7727,1.431,0.179,2929.678 +,,none,4,2e-5,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,81043.0,,,4,,,,,,,, +,,none,5,2e-5,,,mistralai/Mixtral-8x7B-Instruct-v0.1,2,79353.0,,,2,,,,,,,, +,,none,6,2e-5,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,81043.0,,,8,,,,,,,, +,,none,7,2e-5,,,mistralai/Mixtral-8x7B-Instruct-v0.1,2,79827.0,,,4,,,,,,,, +,,none,8,2e-5,,,NousResearch/Llama-2-70b-hf,1,80837.0,,,4,,,,,,,, +,,none,9,2e-5,,,NousResearch/Llama-2-70b-hf,2,80830.0,,,2,,,,,,,, +,,none,10,2e-5,,,NousResearch/Llama-2-70b-hf,1,80837.0,,,8,,,,,,,, +,,none,11,2e-5,,,NousResearch/Llama-2-70b-hf,2,80834.5,,,4,,,,,,,, +0.04,,none,12,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,1,29731.0,26108963328.0,lora,4,16,q_proj k_proj v_proj o_proj,15119590912.0,0.9096682230631511,136.624,0.878,0.22,3597.611 +0.04,,none,13,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,2,18697.0,15123161088.0,lora,2,16,q_proj k_proj v_proj o_proj,7850391552.0,0.8918854713439941,82.0311,1.463,0.366,2995.936 +0.09,,none,14,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,1,43195.0,37098695168.0,lora,8,16,q_proj k_proj v_proj o_proj,15119984128.0,0.962119706471761,270.6301,0.887,0.111,3632.412 +0.09,,none,15,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,2,26235.0,21433753600.0,lora,4,16,q_proj k_proj v_proj o_proj,7850588160.0,0.9218235015869141,143.8184,1.669,0.209,3417.643 +,,none,16,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,80955.0,,lora,4,16,q_proj k_proj v_proj o_proj,,,,,, +0.04,,none,17,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,62617.0,57540387840.0,lora,2,16,q_proj k_proj v_proj o_proj,47311452160.0,0.9361546834309896,179.3128,0.669,0.167,1370.566 +,,none,18,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,80955.0,,lora,8,16,q_proj k_proj v_proj o_proj,,,,,, +0.09,,none,19,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,69848.0,64347637760.0,lora,4,16,q_proj k_proj v_proj o_proj,47311648768.0,0.9383139928181966,280.8919,0.854,0.107,1749.855 +,,none,20,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,1,80917.0,,lora,4,16,q_proj k_proj v_proj o_proj,,,,,, +,,none,21,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,2,80894.0,,lora,2,16,q_proj k_proj v_proj o_proj,,,,,, +,,none,22,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,1,80917.0,,lora,8,16,q_proj k_proj v_proj o_proj,,,,,, +,,none,23,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,2,80979.0,,lora,4,16,q_proj k_proj v_proj o_proj,,,,,, +0.04,True,baseline-peft-bnb,24,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,1,27023.0,22825932800.0,lora,4,16,q_proj k_proj v_proj o_proj,5368221184.0,0.9589527130126954,178.8061,0.671,0.168,2748.9 +0.04,True,baseline-peft-bnb,25,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,2,13530.0,9974622720.0,lora,2,16,q_proj k_proj v_proj o_proj,2727018496.0,0.9154380798339844,87.3652,1.374,0.343,2813.02 +0.09,True,baseline-peft-bnb,26,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,1,47145.0,40278956032.0,lora,8,16,q_proj k_proj v_proj o_proj,5368614400.0,0.9702634493509928,341.2286,0.703,0.088,2880.884 +0.09,True,baseline-peft-bnb,27,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,2,21502.0,16587205120.0,lora,4,16,q_proj k_proj v_proj o_proj,2727215104.0,0.914565912882487,149.9341,1.601,0.2,3278.241 +0.04,True,baseline-peft-bnb,28,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,48313.0,46419968512.0,lora,4,16,q_proj k_proj v_proj o_proj,25726225920.0,0.9744932492574055,351.8623,0.341,0.085,1396.91 +0.04,True,baseline-peft-bnb,29,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,25549.0,21922782720.0,lora,2,16,q_proj k_proj v_proj o_proj,13219233792.0,0.9303209940592448,171.4299,0.7,0.175,1433.589 +0.09,True,baseline-peft-bnb,30,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,69931.0,67089150464.0,lora,8,16,q_proj k_proj v_proj o_proj,25726619136.0,0.9745417594909668,629.837,0.381,0.048,1560.785 +0.09,True,baseline-peft-bnb,31,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,32957.0,29384115200.0,lora,4,16,q_proj k_proj v_proj o_proj,13219430400.0,0.9310146331787109,300.5119,0.799,0.1,1635.609 +,True,baseline-peft-bnb,32,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,1,80893.0,,lora,4,16,q_proj k_proj v_proj o_proj,,,,,, +0.04,True,baseline-peft-bnb,33,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,2,52634.0,46524471808.0,lora,2,16,q_proj k_proj v_proj o_proj,19172741120.0,1.0399916648864747,584.3145,0.205,0.051,420.595 +,True,baseline-peft-bnb,34,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,1,79557.0,,lora,8,16,q_proj k_proj v_proj o_proj,,,,,, +,True,baseline-peft-bnb,35,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,2,80749.0,,lora,4,16,q_proj k_proj v_proj o_proj,,,,,, +0.04,True,accelerated-peft-bnb,36,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,1,19931.0,15860019712.0,lora,4,16,q_proj k_proj v_proj o_proj,4843384320.0,0.9652111371358235,143.3569,0.837,0.209,3428.645 +0.04,True,accelerated-peft-bnb,37,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,2,13497.0,9974622720.0,lora,2,16,q_proj k_proj v_proj o_proj,2727018496.0,0.9277165730794271,86.4307,1.388,0.347,2843.435 +0.09,True,accelerated-peft-bnb,38,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,1,34355.0,26849751552.0,lora,8,16,q_proj k_proj v_proj o_proj,4843777536.0,0.9493892669677735,279.7156,0.858,0.107,3514.427 +0.09,True,accelerated-peft-bnb,39,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,2,21479.0,16587205120.0,lora,4,16,q_proj k_proj v_proj o_proj,2727215104.0,0.9110882759094239,149.3914,1.607,0.201,3290.15 +0.04,True,accelerated-peft-bnb,40,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,38405.0,36218024448.0,lora,4,16,q_proj k_proj v_proj o_proj,25201389056.0,0.9741149584452311,278.5888,0.431,0.108,1764.32 +0.04,True,accelerated-peft-bnb,41,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,25592.0,21906697728.0,lora,2,16,q_proj k_proj v_proj o_proj,13219233792.0,0.9300654411315918,172.7359,0.695,0.174,1422.75 +0.09,True,accelerated-peft-bnb,42,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,50875.0,47207756288.0,lora,8,16,q_proj k_proj v_proj o_proj,25201782272.0,0.9748441060384114,512.2298,0.469,0.059,1919.139 +0.09,True,accelerated-peft-bnb,43,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,32957.0,29369087488.0,lora,4,16,q_proj k_proj v_proj o_proj,13219430400.0,0.9301350593566895,287.6381,0.834,0.104,1708.814 +0.04,True,accelerated-peft-bnb,44,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,1,72829.0,68159977472.0,lora,4,16,q_proj k_proj v_proj o_proj,37346815488.0,1.118430455525716,1075.2044,0.112,0.028,457.141 +0.04,True,accelerated-peft-bnb,45,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,2,52632.0,46524471808.0,lora,2,16,q_proj k_proj v_proj o_proj,19172741120.0,1.040946865081787,586.651,0.205,0.051,418.92 +,True,accelerated-peft-bnb,46,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,1,80405.0,,lora,8,16,q_proj k_proj v_proj o_proj,,,,,, +,True,accelerated-peft-bnb,47,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,2,80954.0,,lora,4,16,q_proj k_proj v_proj o_proj,,,,,, +0.04,True,accelerated-peft-autogptq,48,2e-4,16,0.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,20453.0,15890329088.0,lora,4,16,q_proj k_proj v_proj o_proj,4873693696.0,1.3805528958638509,151.0359,0.795,0.199,3254.326 +0.04,True,accelerated-peft-autogptq,49,2e-4,16,0.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,17198.0,9952175616.0,lora,2,16,q_proj k_proj v_proj o_proj,3005709312.0,1.1706618309020995,87.4109,1.373,0.343,2811.548 +0.09,True,accelerated-peft-autogptq,50,2e-4,16,0.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,34247.0,26880060928.0,lora,8,16,q_proj k_proj v_proj o_proj,4874086912.0,1.2741642634073893,282.6391,0.849,0.106,3478.076 +0.09,True,accelerated-peft-autogptq,51,2e-4,16,0.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,24783.0,16262768128.0,lora,4,16,q_proj k_proj v_proj o_proj,3005905920.0,1.043952751159668,152.5473,1.573,0.197,3222.083 +0.04,True,accelerated-peft-autogptq,52,2e-4,16,0.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,37461.0,35528093184.0,lora,4,16,q_proj k_proj v_proj o_proj,24511457792.0,0.9936613400777181,263.6066,0.455,0.114,1864.597 +0.04,True,accelerated-peft-autogptq,53,2e-4,16,0.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,46641.0,25708175360.0,lora,2,16,q_proj k_proj v_proj o_proj,12788874240.0,0.9420519828796386,167.065,0.718,0.18,1471.045 +0.09,True,accelerated-peft-autogptq,54,2e-4,16,0.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,49925.0,46517825024.0,lora,8,16,q_proj k_proj v_proj o_proj,24511851008.0,0.9855653127034505,498.9022,0.481,0.06,1970.406 +0.09,True,accelerated-peft-autogptq,55,2e-4,16,0.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,52358.0,27739090432.0,lora,4,16,q_proj k_proj v_proj o_proj,12789070848.0,0.9389812151590983,281.8034,0.852,0.106,1744.195 +0.04,True,accelerated-peft-autogptq,56,2e-4,16,0.0,TheBloke/Llama-2-70B-GPTQ,1,71565.0,65895347200.0,lora,4,16,q_proj k_proj v_proj o_proj,36290144768.0,1.0755928039550782,1060.8387,0.113,0.028,463.331 +0.04,True,accelerated-peft-autogptq,57,2e-4,16,0.0,TheBloke/Llama-2-70B-GPTQ,2,80387.0,45397678592.0,lora,2,16,q_proj k_proj v_proj o_proj,18649885696.0,1.0256956418355305,576.0422,0.208,0.052,426.635 +,True,accelerated-peft-autogptq,58,2e-4,16,0.0,TheBloke/Llama-2-70B-GPTQ,1,80293.0,,lora,8,16,q_proj k_proj v_proj o_proj,,,,,, +0.08,True,accelerated-peft-autogptq,59,2e-4,16,0.0,TheBloke/Llama-2-70B-GPTQ,2,80363.0,70667573760.0,lora,4,16,q_proj k_proj v_proj o_proj,18650082304.0,1.0266701062520345,1089.3291,0.22,0.028,451.214 diff --git a/scripts/benchmarks/scenarios.yaml b/scripts/benchmarks/scenarios.yaml index e79a74e6..248eacb2 100644 --- a/scripts/benchmarks/scenarios.yaml +++ b/scripts/benchmarks/scenarios.yaml @@ -32,6 +32,23 @@ scenarios: - 'mistralai/Mixtral-8x7B-Instruct-v0.1' - 'NousResearch/Llama-2-70b-hf' + - name: baseline-peft-bnb + framework_config: + - baseline-peft-bnb + arguments: + fp16: True + learning_rate: 2e-4 + torch_dtype: float16 + peft_method: lora + r: 16 + lora_alpha: 16 + lora_dropout: 0.0 + target_modules: ["q_proj", "k_proj", "v_proj", "o_proj"] + model_name_or_path: + - 'mistralai/Mistral-7B-v0.1' + - 'mistralai/Mixtral-8x7B-Instruct-v0.1' + - 'NousResearch/Llama-2-70b-hf' + - name: accelerated-peft-bnb framework_config: - accelerated-peft-bnb @@ -64,4 +81,4 @@ scenarios: model_name_or_path: - 'TheBloke/Mistral-7B-v0.1-GPTQ' - 'TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ' - - TheBloke/Nous-Hermes-Llama2-70B-GPTQ \ No newline at end of file + - 'TheBloke/Llama-2-70B-GPTQ' \ No newline at end of file diff --git a/scripts/generate_sample_configurations.py b/scripts/generate_sample_configurations.py index cd354cbf..67ad4058 100644 --- a/scripts/generate_sample_configurations.py +++ b/scripts/generate_sample_configurations.py @@ -141,6 +141,7 @@ def read_configuration(path: str) -> Dict: # specified key path, with the value. KEY_AUTO_GPTQ = "auto_gptq" KEY_BNB_NF4 = "bnb-nf4" +KEY_BNB_NF4_BASELINE = "baseline-bnb-nf4" CONFIGURATIONS = { KEY_AUTO_GPTQ: "plugins/accelerated-peft/configs/autogptq.yaml", @@ -148,6 +149,13 @@ def read_configuration(path: str) -> Dict: "plugins/accelerated-peft/configs/bnb.yaml", [("peft.quantization.bitsandbytes.quant_type", "nf4")], ), + KEY_BNB_NF4_BASELINE: ( + "plugins/accelerated-peft/configs/bnb.yaml", + [ + ("peft.quantization.bitsandbytes.quant_type", "nf4"), + ("peft.quantization.bitsandbytes.no_peft_model", True), + ], + ), } # list of (tag, combi) tuples @@ -157,7 +165,8 @@ def read_configuration(path: str) -> Dict: # config. COMBINATIONS = [ ("accelerated-peft-autogptq", (KEY_AUTO_GPTQ,)), - # ("accelerated-peft-bnb-nf4", (KEY_BNB_NF4,)), + ("accelerated-peft-bnb-nf4", (KEY_BNB_NF4,)), + ("baseline-peft-bnb-nf4", (KEY_BNB_NF4_BASELINE,)), ] diff --git a/scripts/run_benchmarks.sh b/scripts/run_benchmarks.sh index a281cc53..e08125b3 100644 --- a/scripts/run_benchmarks.sh +++ b/scripts/run_benchmarks.sh @@ -27,9 +27,18 @@ SCNTAG_PEFT_AUTOGPTQ=accelerated-peft-gptq # data will be cached in here DATA_CACHE=data/cache.json +# final result placed here +BENCH_RESULT_FILE=benchmarks.csv + +# freeze the pip requirements here +PIP_REQUIREMENTS_FILE=requirements.txt + +# ------------- DROP COLUMNS FRO RESULTS ----------------- # env inputs DRY_RUN=${DRY_RUN:-"false"} NO_DATA_PROCESSING=${NO_DATA_PROCESSING:-"false"} +NO_OVERWRITE=${NO_OVERWRITE:-"false"} +MEMORY_LOGGING=${MEMORY_LOGGING:-"huggingface"} # inputs NUM_GPUS_MATRIX=${1-"1 2"} @@ -41,12 +50,27 @@ echo "NUM_GPUS_MATRIX: $NUM_GPUS_MATRIX" echo "RESULT_DIR: $RESULT_DIR" echo "SCENARIOS_CONFIG: $SCENARIOS_CONFIG" echo "SCENARIOS_FILTER: $SCENARIOS_FILTER" +echo "MEMORY_LOGGING: $MEMORY_LOGGING" + +if [ -n "$RESULT_DIR" ]; then + echo "The results directory is not empty. " + if [ "$NO_OVERWRITE" = "true" ]; then + echo "Results dir $RESULT_DIR is not empty, but NO_OVERWRITE=true" + echo "If intending to overwrite please delete the folder manually" + echo "or do not set NO_OVERWRITE" + exit 1 + fi + echo "Deleting $RESULT_DIR" + rm -rf $RESULT_DIR +fi # tag on the directories SCENARIOS_CONFIG=$WORKING_DIR/$SCENARIOS_CONFIG DEFAULTS_CONFIG=$WORKING_DIR/$DEFAULTS_CONFIG ACCELERATE_CONFIG=$WORKING_DIR/$ACCELERATE_CONFIG DATA_CACHE=$RESULT_DIR/$DATA_CACHE +BENCH_RESULT_FILE=$RESULT_DIR/$BENCH_RESULT_FILE +PIP_REQUIREMENTS_FILE=$RESULT_DIR/$PIP_REQUIREMENTS_FILE # ------------- EXTRA ARGS ----------------- @@ -65,6 +89,17 @@ if [ "$NO_DATA_PROCESSING" = "true" ]; then EXTRA_ARGS="$EXTRA_ARGS --no_data_processing" fi +if [ "$MEMORY_LOGGING" = "huggingface" ]; then + EXTRA_ARGS="$EXTRA_ARGS --log_memory_hf" +elif [ "$MEMORY_LOGGING" = "nvidia" ]; then + EXTRA_ARGS="$EXTRA_ARGS --log_nvidia_smi" +elif [ "$MEMORY_LOGGING" = "all" ]; then + EXTRA_ARGS="$EXTRA_ARGS --log_nvidia_smi --log_memory_hf" +fi + +# dump out the environment +pip freeze > $PIP_REQUIREMENTS_FILE + # run the bench python $WORKING_DIR/benchmark.py \ --num_gpus $NUM_GPUS_MATRIX \ @@ -73,3 +108,24 @@ python $WORKING_DIR/benchmark.py \ --defaults_config_path $DEFAULTS_CONFIG \ --dataset_save_path $DATA_CACHE \ --results_output_path $RESULT_DIR $EXTRA_ARGS + +# produce the final CSV for checkin +# need to set PYTHONPATH because there is an import inside +# this will write to the BENCH_RESULT_FILE +# Remove the columns with values already represented by other metrics in the summary report +PYTHONPATH=. \ + python $WORKING_DIR/display_bench_results.py benchmark_outputs \ + --result_file $BENCH_RESULT_FILE \ + --remove_columns \ + 'before_init_mem_cpu' \ + 'before_init_mem_gpu' \ + 'init_mem_cpu_alloc_delta' \ + 'init_mem_cpu_peaked_delta' \ + 'init_mem_gpu_alloc_delta' \ + 'init_mem_gpu_peaked_delta' \ + 'train_mem_cpu_alloc_delta' \ + 'train_mem_cpu_peaked_delta' \ + 'train_mem_gpu_alloc_delta' \ + 'train_mem_gpu_peaked_delta' \ + 'acceleration_framework_config_file' + diff --git a/scripts/verify_generated_configurations.sh b/scripts/verify_generated_configurations.sh new file mode 100755 index 00000000..83344796 --- /dev/null +++ b/scripts/verify_generated_configurations.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +OUTPUT_DIR=${1:-sample-configurations} + +GIT_DIFF=$(git diff HEAD -- $OUTPUT_DIR) +echo "git diff of configurations with HEAD:" +echo "$GIT_DIFF" + +function echoWarning() { + LIGHT_YELLOW='\033[1;33m' + NC='\033[0m' # No Color + echo -e "${LIGHT_YELLOW}${1}${NC}" +} + +if [ ! -z "$GIT_DIFF" ]; then + echoWarning "At least one of the configs in the plugins should have changed." + echoWarning "Please run 'tox -e gen-configs' to ensure that the sample-configurations are correctly generated!" + echoWarning "After that commit the generated sample-configurations to remove this error." + exit 1 +fi + +echo "sample configurations up to date with configs in plugin directories" diff --git a/tox.ini b/tox.ini index b9f48607..d719cb3e 100644 --- a/tox.ini +++ b/tox.ini @@ -9,22 +9,30 @@ skip_install = true commands = python scripts/generate_sample_configurations.py {posargs:sample-configurations} +[testenv:verify-configs] +description = verify that sample configurations for all plugins are properly generated +skip_install = true +commands = + bash scripts/verify_generated_configurations.sh {posargs:sample-configurations} +allowlist_externals = bash + # put this here first, consider moving it later [testenv:run-benches] description = run benchmarks skip_install = true +deps = + packaging # this is required for flash-attn dep as fms_hf_tuning did not specify + -e {toxinidir}/plugins/framework # install the framework here as the flash attention deps requires torch +passenv = * # will pass the parent env, otherwise there are too many envs e.g. TRANSFORMERS that need to be set commands = # need a version of fms-hf-tuning that has integrated the framework # NOTE: have to install this first coz havnt merged # - this repo has a lot of pins, so we just install it first - pip install "fms-hf-tuning[flash-attn] @ git+https://github.com/fabianlim/fms-hf-tuning.git@acceleration-framework" + pip install "fms-hf-tuning[flash-attn] @ git+https://github.com/fabianlim/fms-hf-tuning.git@"{env:FHT_BRANCH:main} # some models need this for tokenizers pip install protobuf - # install the framework - pip install -e {toxinidir}/plugins/framework - # install the plugins for test # NOTE: when there are more plugins install here python -m fms_acceleration.cli install -e {toxinidir}/plugins/accelerated-peft