diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
new file mode 100644
index 00000000..2ec2bbca
--- /dev/null
+++ b/.github/workflows/format.yml
@@ -0,0 +1,69 @@
+# Copyright The FMS HF Tuning Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Format
+
+on:
+  push:
+    branches: [ "main", "dev" ]
+  pull_request:
+    branches: [ "main", "dev" ]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        plugin_name:
+          - "framework"
+          # - "accelerated-peft" # enable later
+
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python 3.9
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.9
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install tox
+      - name: Run linter
+        run: |
+          cd plugins/${{ matrix.plugin_name }}
+          tox -e lint
+      - name: Run formatter
+        run: |
+          cd plugins/${{ matrix.plugin_name }}
+          tox -e fmt
+      - name: Run pytest
+        run: |
+          cd plugins/${{ matrix.plugin_name }}
+          tox -e py
+
+  sample-config:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python 3.9
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.9
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install tox
+      - name: Run Config Verification
+        run: tox -e verify-configs
diff --git a/plugins/accelerated-peft/configs/bnb.yaml b/plugins/accelerated-peft/configs/bnb.yaml
index a29eef5e..ec5c3cfa 100644
--- a/plugins/accelerated-peft/configs/bnb.yaml
+++ b/plugins/accelerated-peft/configs/bnb.yaml
@@ -14,3 +14,7 @@ peft:
     # bitsandbytes:
     bitsandbytes:
       quant_type: nf4 
+
+      # If True, then no get_peft_model and prepare_model_for_kbit_training
+      # will be called. 
+      no_peft_model: False
\ No newline at end of file
diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/autogptq_utils.py b/plugins/accelerated-peft/src/fms_acceleration_peft/autogptq_utils.py
index 49072208..e3b2dc6d 100644
--- a/plugins/accelerated-peft/src/fms_acceleration_peft/autogptq_utils.py
+++ b/plugins/accelerated-peft/src/fms_acceleration_peft/autogptq_utils.py
@@ -18,7 +18,7 @@
 # Third Party
 from peft import LoraConfig
 from peft.tuners.lora.gptq import QuantLinear as LoraLinearGPTQ
-from transformers.utils.import_utils import _is_package_available
+from typing import List, Callable
 import torch
 
 
@@ -54,3 +54,32 @@ def create_new_module_peft(
 
     # if module cannot be found, return None which results in a raise in the call-stack
     return new_module
+
+# consider to move this somewhere more general
+def patch_forward_to_view_attributes_before_call(
+    old_forward: Callable,
+    attribute_names: List[str], torch_dtype,
+):
+    # patch old_forward to view attribtues to torch_dype
+    # before call
+
+    def _forward(self, *args, **kwargs):
+        # perform a view on all these attributes
+        for attr_name in attribute_names:
+
+            # the view should be a passthrough 
+            # if attr.dtype == torch_dtype
+            attr = getattr(self, attr_name)
+
+            # perform view
+            attr = attr.view(torch_dtype)
+
+            try:
+                setattr(self, attr_name, attr)
+            except TypeError:
+                # this means already have attr_name as a parameter, then
+                # just assign this way
+                self.__dict__[attr_name] = attr
+        
+        return old_forward(*args, **kwargs)
+    return _forward
diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_autogptq.py b/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_autogptq.py
index 2fd2f1e9..fa6082ab 100644
--- a/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_autogptq.py
+++ b/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_autogptq.py
@@ -25,8 +25,10 @@
 from fms_acceleration import AccelerationPlugin
 from peft import LoraConfig, prepare_model_for_kbit_training
 from peft.tuners.lora.model import LoraModel
+import torch.distributed
 from transformers import AutoModelForCausalLM, TrainingArguments
 import torch
+import os
 
 
 class AutoGPTQAccelerationPlugin(AccelerationPlugin):
@@ -50,6 +52,8 @@ def model_loader(self, model_name: str, **kwargs):
         # guarded imports
         # Third Party
         from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
+        from auto_gptq.nn_modules.qlinear.qlinear_tritonv2 import QuantLinear, QuantLinearFunction
+        from .autogptq_utils import patch_forward_to_view_attributes_before_call
 
         # Currently we allow only a quantized checkpoint to be loaded, we do not
         # implement the quantization process here.
@@ -121,6 +125,43 @@ def model_loader(self, model_name: str, **kwargs):
             device_map=device_map,
         )
 
+        # https://github.com/foundation-model-stack/fms-acceleration/pull/15
+        # if FSDP distributed need to convert the AutoGPTQ model's 
+        # parameters (in tensors) to parameters. Also need to
+        # store the int32 tensors in a float type
+
+        try:
+            world_size = torch.distributed.get_world_size()
+        except ValueError:
+            world_size = 1  # pg not init
+
+        if (
+            world_size > 1
+            and os.environ.get("ACCELERATE_USE_FSDP", "false").lower() == "true"
+        ):
+            # these parameters are to be patched for triton v2
+            # consider making a map if patching more kernels
+            PATCH_FOR_FSDP_TRITON_V2 = ['qweight', 'qzeros']
+
+            # patch all the QuantLinear base layers
+            for mod in model.modules():
+                if isinstance(mod, QuantLinear):
+
+                    # convert all patched attributes to Parameters of torch_dtype
+                    # so FSDP can shard them
+                    for attr_name in PATCH_FOR_FSDP_TRITON_V2:
+                        attr = getattr(mod, attr_name)
+                        attr = torch.nn.Parameter(attr.view(torch_dtype), requires_grad=False)
+                        setattr(mod, attr_name, attr)
+
+                    # this patches the forward to convert them back to original 
+                    # type (i.e. int32) before the function call into the kernels
+                    _forward = patch_forward_to_view_attributes_before_call(
+                        mod.forward, attribute_names=PATCH_FOR_FSDP_TRITON_V2,
+                        torch_dtype=torch.int32, # patch it back to 
+                    )
+                    mod.forward = MethodType(_forward, mod)
+
         # replace
         AutoModelForCausalLM.from_config = _old_from_config
 
diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_bnb.py b/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_bnb.py
index fa11fe3a..dfd5fbc8 100644
--- a/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_bnb.py
+++ b/plugins/accelerated-peft/src/fms_acceleration_peft/framework_plugin_bnb.py
@@ -96,6 +96,9 @@ def __init__(self, configurations: Dict[str, Dict]):
         self._quant_type = self._check_config_and_maybe_check_values(
             key="peft.quantization.bitsandbytes.quant_type", values=["fp4", "nf4"]
         )
+        self._no_peft_model = self._check_config_and_maybe_check_values(
+            key="peft.quantization.bitsandbytes.no_peft_model", values=[True, False]
+        )
 
     def model_loader(self, model_name: str, **kwargs):
 
@@ -121,6 +124,16 @@ def model_loader(self, model_name: str, **kwargs):
                 "If running in FSDP, this is probably because accelerate is not used. "
                 "This will most probably result in error."
             )
+        elif (
+            world_size == 1
+            and self._no_peft_model == True
+        ):
+            warnings.warn(
+                """Running on single device and setting plugin config `no_peft_model` as `True`
+                PEFT preparation will be managed by SFTTrainer and will cause a slowdown in training speed 
+                due to extraneous dtype casting when SFTTrainer prepares the model using
+                https://github.com/huggingface/trl/blob/e90e8d91d2265e484f229c45a5eb8982f94a2936/trl/trainer/sft_trainer.py#L210"""
+            )            
 
         bnb_config = BitsAndBytesConfig(
             load_in_4bit=True,
@@ -147,7 +160,8 @@ def requires_custom_loading(self):
 
     @property
     def requires_agumentation(self):
-        return True
+        # will skip the augmentation if _no_peft_model == True
+        return not self._no_peft_model
 
     def augmentation(
         self,
diff --git a/plugins/accelerated-peft/tests/test_peft_plugins.py b/plugins/accelerated-peft/tests/test_peft_plugins.py
index 894e1ca6..42404ddc 100644
--- a/plugins/accelerated-peft/tests/test_peft_plugins.py
+++ b/plugins/accelerated-peft/tests/test_peft_plugins.py
@@ -122,6 +122,20 @@ def test_configure_bnb_plugin():
             assert framework.requires_agumentation
             assert len(framework.get_callbacks_and_ready_for_train()) == 0
 
+    # test no_peft_model is true skips plugin.augmentation
+    for key, correct_value in [
+        ("peft.quantization.bitsandbytes.no_peft_model", True),
+        ("peft.quantization.bitsandbytes.no_peft_model", False),
+    ]:
+        with instantiate_framework(
+            update_configuration_contents(
+                read_configuration(CONFIG_PATH_BNB), key, correct_value
+            ),
+            require_packages_check=False,
+        ):
+            # check flags and callbacks
+            assert (not correct_value)==framework.requires_agumentation
+
     # attempt to activate plugin with configuration pointing to wrong path
     # - raise with message that no plugins can be configured
     with pytest.raises(ValueError) as e:
diff --git a/plugins/accelerated-peft/tox.ini b/plugins/accelerated-peft/tox.ini
index 6460cdbc..b79d0691 100644
--- a/plugins/accelerated-peft/tox.ini
+++ b/plugins/accelerated-peft/tox.ini
@@ -18,6 +18,13 @@ commands =
 
 [testenv:lint]
 description = run linters
+deps =
+    pylint>=2.16.2,<=3.1.0
+commands = pylint src tests
+allowlist_externals = pylint
+
+[testenv:fmt]
+description = format 
 skip_install = true
 deps =
     black>=22.12
@@ -26,6 +33,7 @@ commands =
     black {posargs:.}
     isort {posargs:.}
 
+
 # [testenv:build]
 # description = build wheel
 # deps =
diff --git a/plugins/framework/.pylintrc b/plugins/framework/.pylintrc
new file mode 100644
index 00000000..45da4212
--- /dev/null
+++ b/plugins/framework/.pylintrc
@@ -0,0 +1,649 @@
+[MAIN]
+
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks=no
+
+# Clear in-memory caches upon conclusion of linting. Useful if running pylint
+# in a server-like mode.
+clear-cache-post-run=no
+
+# Load and enable all available extensions. Use --list-extensions to see a list
+# all available extensions.
+#enable-all-extensions=
+
+# In error mode, messages with a category besides ERROR or FATAL are
+# suppressed, and no reports are done by default. Error mode is compatible with
+# disabling specific errors.
+#errors-only=
+
+# Always return a 0 (non-error) status code, even if lint errors are found.
+# This is primarily useful in continuous integration scripts.
+#exit-zero=
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code.
+extension-pkg-allow-list=
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code. (This is an alternative name to extension-pkg-allow-list
+# for backward compatibility.)
+extension-pkg-whitelist=
+
+# Return non-zero exit code if any of these messages/categories are detected,
+# even if score is above --fail-under value. Syntax same as enable. Messages
+# specified are enabled, while categories only check already-enabled messages.
+fail-on=
+
+# Specify a score threshold under which the program will exit with error.
+fail-under=10
+
+# Interpret the stdin as a python script, whose filename needs to be passed as
+# the module_or_package argument.
+#from-stdin=
+
+# Files or directories to be skipped. They should be base names, not paths.
+ignore=CVS,protobufs
+
+# Add files or directories matching the regular expressions patterns to the
+# ignore-list. The regex matches against paths and can be in Posix or Windows
+# format. Because '\\' represents the directory delimiter on Windows systems,
+# it can't be used as an escape character.
+ignore-paths=
+
+# Files or directories matching the regular expression patterns are skipped.
+# The regex matches against base names, not paths. The default value ignores
+# Emacs file locks
+ignore-patterns=^\.#
+
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis). It
+# supports qualified module names, as well as Unix pattern matching.
+ignored-modules=
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
+# number of processors available to use, and will cap the count on Windows to
+# avoid hangs.
+jobs=1
+
+# Control the amount of potential inferred values when inferring a single
+# object. This can help the performance when dealing with large functions or
+# complex, nested conditions.
+limit-inference-results=100
+
+# List of plugins (as comma separated values of python module names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# Minimum Python version to use for version dependent checks. Will default to
+# the version used to run pylint.
+py-version=3.9
+
+# Discover python modules and packages in the file system subtree.
+recursive=no
+
+# When enabled, pylint would attempt to guess common misconfiguration and emit
+# user-friendly hints instead of false-positive error messages.
+suggestion-mode=yes
+
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension=no
+
+# In verbose mode, extra non-checker-related info will be displayed.
+#verbose=
+
+
+[BASIC]
+
+# Naming style matching correct argument names.
+argument-naming-style=snake_case
+
+# Regular expression matching correct argument names. Overrides argument-
+# naming-style. If left empty, argument names will be checked with the set
+# naming style.
+#argument-rgx=
+
+# Naming style matching correct attribute names.
+attr-naming-style=snake_case
+
+# Regular expression matching correct attribute names. Overrides attr-naming-
+# style. If left empty, attribute names will be checked with the set naming
+# style.
+#attr-rgx=
+
+# Bad variable names which should always be refused, separated by a comma.
+bad-names=foo,
+          bar,
+          baz,
+          toto,
+          tutu,
+          tata
+
+# Bad variable names regexes, separated by a comma. If names match any regex,
+# they will always be refused
+bad-names-rgxs=
+
+# Naming style matching correct class attribute names.
+class-attribute-naming-style=any
+
+# Regular expression matching correct class attribute names. Overrides class-
+# attribute-naming-style. If left empty, class attribute names will be checked
+# with the set naming style.
+#class-attribute-rgx=
+
+# Naming style matching correct class constant names.
+class-const-naming-style=UPPER_CASE
+
+# Regular expression matching correct class constant names. Overrides class-
+# const-naming-style. If left empty, class constant names will be checked with
+# the set naming style.
+#class-const-rgx=
+
+# Naming style matching correct class names.
+class-naming-style=PascalCase
+
+# Regular expression matching correct class names. Overrides class-naming-
+# style. If left empty, class names will be checked with the set naming style.
+#class-rgx=
+
+# Naming style matching correct constant names.
+const-naming-style=UPPER_CASE
+
+# Regular expression matching correct constant names. Overrides const-naming-
+# style. If left empty, constant names will be checked with the set naming
+# style.
+#const-rgx=
+
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=-1
+
+# Naming style matching correct function names.
+function-naming-style=snake_case
+
+# Regular expression matching correct function names. Overrides function-
+# naming-style. If left empty, function names will be checked with the set
+# naming style.
+#function-rgx=
+
+# Good variable names which should always be accepted, separated by a comma.
+good-names=i,
+           j,
+           k,
+           ex,
+           Run,
+           _
+
+# Good variable names regexes, separated by a comma. If names match any regex,
+# they will always be accepted
+good-names-rgxs=
+
+# Include a hint for the correct naming format with invalid-name.
+include-naming-hint=no
+
+# Naming style matching correct inline iteration names.
+inlinevar-naming-style=any
+
+# Regular expression matching correct inline iteration names. Overrides
+# inlinevar-naming-style. If left empty, inline iteration names will be checked
+# with the set naming style.
+#inlinevar-rgx=
+
+# Naming style matching correct method names.
+method-naming-style=snake_case
+
+# Regular expression matching correct method names. Overrides method-naming-
+# style. If left empty, method names will be checked with the set naming style.
+#method-rgx=
+
+# Naming style matching correct module names.
+module-naming-style=snake_case
+
+# Regular expression matching correct module names. Overrides module-naming-
+# style. If left empty, module names will be checked with the set naming style.
+#module-rgx=
+
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=
+
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx=^_
+
+# List of decorators that produce properties, such as abc.abstractproperty. Add
+# to this list to register other decorators that produce valid properties.
+# These decorators are taken in consideration only for invalid-name.
+property-classes=abc.abstractproperty
+
+# Regular expression matching correct type variable names. If left empty, type
+# variable names will be checked with the set naming style.
+#typevar-rgx=
+
+# Naming style matching correct variable names.
+variable-naming-style=snake_case
+
+# Regular expression matching correct variable names. Overrides variable-
+# naming-style. If left empty, variable names will be checked with the set
+# naming style.
+#variable-rgx=
+
+
+[CLASSES]
+
+# Warn about protected attribute access inside special methods
+check-protected-access-in-special-methods=no
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,
+                      __new__,
+                      setUp,
+                      __post_init__
+
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,
+                  _fields,
+                  _replace,
+                  _source,
+                  _make
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls
+
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=mcs
+
+
+[DESIGN]
+
+# List of regular expressions of class ancestor names to ignore when counting
+# public methods (see R0903)
+exclude-too-few-public-methods=
+
+# List of qualified class names to ignore when counting class parents (see
+# R0901)
+ignored-parents=
+
+# Maximum number of arguments for function / method.
+max-args=5
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Maximum number of boolean expressions in an if statement (see R0916).
+max-bool-expr=5
+
+# Maximum number of branch for function / method body.
+max-branches=12
+
+# Maximum number of locals for function / method body.
+max-locals=15
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+# Maximum number of return / yield for function / method body.
+max-returns=6
+
+# Maximum number of statements in function / method body.
+max-statements=50
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when caught.
+overgeneral-exceptions=builtins.BaseException,builtins.Exception
+
+
+[FORMAT]
+
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=^\s*(# )?<?https?://\S+>?$
+
+# Number of spaces of indent required inside a hanging or continued line.
+indent-after-paren=4
+
+# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
+# tab).
+indent-string='    '
+
+# Maximum number of characters on a single line.
+max-line-length=100
+
+# Maximum number of lines in a module.
+max-module-lines=1100
+
+# Allow the body of a class to be on the same line as the declaration if body
+# contains single statement.
+single-line-class-stmt=no
+
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=no
+
+
+[IMPORTS]
+
+# List of modules that can be imported at any level, not just the top level
+# one.
+allow-any-import-level=
+
+# Allow explicit reexports by alias from a package __init__.
+allow-reexport-from-package=no
+
+# Allow wildcard imports from modules that define __all__.
+allow-wildcard-with-all=no
+
+# Deprecated modules which should not be used, separated by a comma.
+deprecated-modules=
+
+# Output a graph (.gv or any supported image format) of external dependencies
+# to the given file (report RP0402 must not be disabled).
+ext-import-graph=
+
+# Output a graph (.gv or any supported image format) of all (i.e. internal and
+# external) dependencies to the given file (report RP0402 must not be
+# disabled).
+import-graph=
+
+# Output a graph (.gv or any supported image format) of internal dependencies
+# to the given file (report RP0402 must not be disabled).
+int-import-graph=
+
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+
+# Force import order to recognize a module as part of a third party library.
+known-third-party=enchant
+
+# Couples of modules and preferred modules, separated by a comma.
+preferred-modules=
+
+
+[LOGGING]
+
+# The type of string formatting that logging methods do. `old` means using %
+# formatting, `new` is for `{}` formatting.
+logging-format-style=old
+
+# Logging modules to check that the string format arguments are in logging
+# function parameter format.
+logging-modules=logging
+
+
+[MESSAGES CONTROL]
+
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE,
+# UNDEFINED.
+confidence=HIGH,
+           CONTROL_FLOW,
+           INFERENCE,
+           INFERENCE_FAILURE,
+           UNDEFINED
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once). You can also use "--disable=all" to
+# disable everything first and then re-enable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use "--disable=all --enable=classes
+# --disable=W".
+disable=raw-checker-failed,
+        bad-inline-option,
+        locally-disabled,
+        file-ignored,
+        suppressed-message,
+        useless-suppression,
+        deprecated-pragma,
+        # Added messages
+        use-symbolic-message-instead,
+        invalid-name,
+        missing-class-docstring,
+        missing-module-docstring,
+        missing-function-docstring,
+        consider-using-f-string,
+        inconsistent-return-statements,
+        no-member,
+        too-many-arguments,
+        too-many-locals,
+        too-many-branches,
+        too-many-statements,
+        cyclic-import,
+        too-few-public-methods,
+        protected-access,
+        fixme,
+        logging-format-interpolation,
+        logging-too-many-args,
+        attribute-defined-outside-init,
+        abstract-method,
+        pointless-statement,
+        wrong-import-order,
+        duplicate-code,
+        unbalanced-tuple-unpacking,
+        unused-argument
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once). See also the "--disable" option for examples.
+enable=c-extension-no-member
+
+
+[METHOD_ARGS]
+
+# List of qualified names (i.e., library.method) which require a timeout
+# parameter e.g. 'requests.api.get,requests.api.post'
+timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,
+      XXX,
+      TODO
+
+# Regular expression of note tags to take in consideration.
+notes-rgx=
+
+
+[REFACTORING]
+
+# Maximum number of nested blocks for function / method body
+max-nested-blocks=5
+
+# Complete name of functions that never returns. When checking for
+# inconsistent-return-statements if a never returning function is called then
+# it will be considered as an explicit return statement and no message will be
+# printed.
+never-returning-functions=sys.exit,argparse.parse_error
+
+
+[REPORTS]
+
+# Python expression which should return a score less than or equal to 10. You
+# have access to the variables 'fatal', 'error', 'warning', 'refactor',
+# 'convention', and 'info' which contain the number of messages in each
+# category, as well as 'statement' which is the total number of statements
+# analyzed. This score is used by the global evaluation report (RP0004).
+evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10))
+
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details.
+msg-template=
+
+# Set the output format. Available formats are text, parseable, colorized, json
+# and msvs (visual studio). You can also give a reporter class, e.g.
+# mypackage.mymodule.MyReporterClass.
+output-format=text
+
+# Tells whether to display a full report or only the messages.
+reports=yes
+
+# Activate the evaluation score.
+score=yes
+
+
+[SIMILARITIES]
+
+# Comments are removed from the similarity computation
+ignore-comments=yes
+
+# Docstrings are removed from the similarity computation
+ignore-docstrings=yes
+
+# Imports are removed from the similarity computation
+ignore-imports=yes
+
+# Signatures are removed from the similarity computation
+ignore-signatures=yes
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+
+[SPELLING]
+
+# Limits count of emitted suggestions for spelling mistakes.
+max-spelling-suggestions=4
+
+# Spelling dictionary name. Available dictionaries: none. To make it work,
+# install the 'python-enchant' package.
+spelling-dict=
+
+# List of comma separated words that should be considered directives if they
+# appear at the beginning of a comment and should not be checked.
+spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:
+
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+
+# A path to a file that contains the private dictionary; one word per line.
+spelling-private-dict-file=
+
+# Tells whether to store unknown words to the private dictionary (see the
+# --spelling-private-dict-file option) instead of raising a message.
+spelling-store-unknown-words=no
+
+
+[STRING]
+
+# This flag controls whether inconsistent-quotes generates a warning when the
+# character used as a quote delimiter is used inconsistently within a module.
+check-quote-consistency=no
+
+# This flag controls whether the implicit-str-concat should generate a warning
+# on implicit string concatenation in sequences defined over several lines.
+check-str-concat-over-line-jumps=no
+
+
+[TYPECHECK]
+
+# List of decorators that produce context managers, such as
+# contextlib.contextmanager. Add to this list to register other decorators that
+# produce valid context managers.
+contextmanager-decorators=contextlib.contextmanager
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E1101 when accessed. Python regular
+# expressions are accepted.
+generated-members=
+
+# Tells whether to warn about missing members when the owner of the attribute
+# is inferred to be None.
+ignore-none=yes
+
+# This flag controls whether pylint should warn about no-member and similar
+# checks whenever an opaque object is returned when inferring. The inference
+# can return multiple potential results while evaluating a Python object, but
+# some branches might not be evaluated, which results in partial inference. In
+# that case, it might be useful to still emit no-member and other checks for
+# the rest of the inferred objects.
+ignore-on-opaque-inference=yes
+
+# List of symbolic message names to ignore for Mixin members.
+ignored-checks-for-mixins=no-member,
+                          not-async-context-manager,
+                          not-context-manager,
+                          attribute-defined-outside-init
+
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace
+
+# Show a hint with possible names when a member name was not found. The aspect
+# of finding the hint is based on edit distance.
+missing-member-hint=yes
+
+# The minimum edit distance a name should have in order to be considered a
+# similar match for a missing member name.
+missing-member-hint-distance=1
+
+# The total number of similar names that should be taken in consideration when
+# showing a hint for a missing member.
+missing-member-max-choices=1
+
+# Regex pattern to define which classes are considered mixins.
+mixin-class-rgx=.*[Mm]ixin
+
+# List of decorators that change the signature of a decorated function.
+signature-mutators=
+
+
+[VARIABLES]
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid defining new builtins when possible.
+additional-builtins=
+
+# Tells whether unused global variables should be treated as a violation.
+allow-global-unused-variables=yes
+
+# List of names allowed to shadow builtins
+allowed-redefined-builtins=
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,
+          _cb
+
+# A regular expression matching the name of dummy variables (i.e. expected to
+# not be used).
+dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
+
+# Argument names that match this expression will be ignored.
+ignored-argument-names=_.*|^ignored_|^unused_
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
diff --git a/plugins/framework/pyproject.toml b/plugins/framework/pyproject.toml
index 534a8eeb..8200f0bf 100644
--- a/plugins/framework/pyproject.toml
+++ b/plugins/framework/pyproject.toml
@@ -22,7 +22,7 @@ classifiers=[
     "Programming Language :: Python :: 3.11",
 ]
 dependencies = [
-  "torch<2.3",
+  "torch>2.2,<2.3",
   "transformers<4.40",
   "peft",
   "accelerate"
diff --git a/plugins/framework/src/fms_acceleration/__init__.py b/plugins/framework/src/fms_acceleration/__init__.py
index c396c568..e39cd055 100644
--- a/plugins/framework/src/fms_acceleration/__init__.py
+++ b/plugins/framework/src/fms_acceleration/__init__.py
@@ -12,19 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Standard
+# use importlib to load the packages, if they are installed
+import importlib
+
 # Local
-from .framework import AccelerationFramework
 from .constants import PLUGIN_PREFIX, PLUGINS
+from .framework import AccelerationFramework
 from .framework_plugin import (
     AccelerationPlugin,
     AccelerationPluginConfigError,
     get_relevant_configuration_sections,
 )
 
-# Standard
-# use importlib to load the packages, if they are installed
-import importlib
-
 for postfix in PLUGINS:
     plugin_name = f"{PLUGIN_PREFIX}{postfix}"
     if importlib.util.find_spec(plugin_name):
diff --git a/plugins/framework/src/fms_acceleration/cli.py b/plugins/framework/src/fms_acceleration/cli.py
index a29fecef..04ad263a 100644
--- a/plugins/framework/src/fms_acceleration/cli.py
+++ b/plugins/framework/src/fms_acceleration/cli.py
@@ -13,22 +13,24 @@
 # limitations under the License.
 
 
-import argparse
+# Standard
+from typing import List, Union
 import os
-import sys
 import subprocess
-from typing import List, Union
-import yaml
-
-from .constants import PLUGIN_PREFIX, PLUGINS
+import sys
 
+# Third Party
 from pip._internal.cli.main import main as pipmain
-
 from transformers.utils.import_utils import _is_package_available
+import yaml
+
+# Local
+from .constants import PLUGIN_PREFIX, PLUGINS
 
 GITHUB_URL = "github.com/foundation-model-stack/fms-acceleration.git"
 
-REPO_CACHE_DIR = '.fms/repository'
+REPO_CACHE_DIR = ".fms/repository"
+
 
 # TODO: make a version that fetches the
 def install_plugin(
@@ -36,26 +38,29 @@ def install_plugin(
 ):
     "function to install plugin. Inputs should contain a pkg_name."
 
-    pkg_name = [x for x in args if not x.startswith('-')]
-    assert len(pkg_name) == 1,\
-        "Please specify exactly one plugin to install"
+    pkg_name = [x for x in args if not x.startswith("-")]
+    assert len(pkg_name) == 1, "Please specify exactly one plugin to install"
     pkg_name = pkg_name[0]
 
     # take the flags
-    args = [x for x in args if x.startswith('-')]
+    args = [x for x in args if x.startswith("-")]
 
     if os.path.exists(pkg_name):
-        pipmain(['install', *args, pkg_name])
-        return 
+        pipmain(["install", *args, pkg_name])
+        return
 
     if pkg_name.startswith(PLUGIN_PREFIX):
         pkg_name = pkg_name.replace(PLUGIN_PREFIX, "")
 
     # otherwise should be an internet install
-    pipmain([
-        'install', *args, 
-        f'git+https://{GITHUB_URL}#subdirectory=plugins/accelerated-{pkg_name}' 
-    ])
+    pipmain(
+        [
+            "install",
+            *args,
+            f"git+https://{GITHUB_URL}#subdirectory=plugins/accelerated-{pkg_name}",
+        ]
+    )
+
 
 def list_plugins():
     print(
@@ -75,13 +80,14 @@ def list_plugins():
 
         print(f"{i+1}. {full_name} [{name}] {postfix}")
 
+
 def get_benchmark_artifacts(dest_dir: str):
     if not os.path.exists(dest_dir):
         os.makedirs(dest_dir)
 
-
-    if not os.path.exists(os.path.join(dest_dir, '.git')):
-        command = f"""cd {dest_dir} && git init && git remote add -f origin https://{GITHUB_URL} && \
+    if not os.path.exists(os.path.join(dest_dir, ".git")):
+        command = f"""cd {dest_dir} && git init && \
+            git remote add -f origin https://{GITHUB_URL} && \
             git config --global init.defaultBranch main && \
             git config core.sparsecheckout true && \
             echo scripts/benchmarks >> .git/info/sparse-checkout && \
@@ -91,63 +97,73 @@ def get_benchmark_artifacts(dest_dir: str):
         command = f"cd {dest_dir} && git fetch origin && "
     command += "git pull origin main "
 
-    out = subprocess.run(command, shell=True, capture_output=True)
+    out = subprocess.run(command, shell=True, capture_output=True, check=False)
     if out.returncode != 0:
-        raise RuntimeError(f"could not get benchmark artifacts with error code {out.returncode}")
-    return out 
+        raise RuntimeError(
+            f"could not get benchmark artifacts with error code {out.returncode}"
+        )
+    return out
+
 
 def list_sample_configs(
-    configs_dir: str, 
-    contents_file: str = 'sample-configurations/CONTENTS.yaml', 
+    configs_dir: str,
+    contents_file: str = "sample-configurations/CONTENTS.yaml",
     get_artifacts: bool = True,
 ):
     if get_artifacts:
         get_benchmark_artifacts(REPO_CACHE_DIR)
-    with open(os.path.join(configs_dir, contents_file)) as f:
-        for i, entry in enumerate(yaml.safe_load(f)['framework_configs']):
-            shortname = entry['shortname']
-            plugins = entry['plugins']
-            filename = entry['filename']
-            print (f"{i+1}. {shortname} ({filename}) - plugins: {plugins}")
+    with open(os.path.join(configs_dir, contents_file), encoding="utf-8") as f:
+        for i, entry in enumerate(yaml.safe_load(f)["framework_configs"]):
+            shortname = entry["shortname"]
+            plugins = entry["plugins"]
+            filename = entry["filename"]
+            print(f"{i+1}. {shortname} ({filename}) - plugins: {plugins}")
+
 
 def list_arguments(
-    scenario_dir: str, 
+    scenario_dir: str,
     config_shortnames: Union[str, List[str]],
-    scenario_file: str = 'scripts/benchmarks/scenarios.yaml',
-    ignored_fields = ['model_name_or_path'],
+    scenario_file: str = "scripts/benchmarks/scenarios.yaml",
+    ignored_fields: List = None,
     get_artifacts: bool = True,
 ):
+    if ignored_fields is None:
+        ignored_fields = ["model_name_or_path"]
+
     if get_artifacts:
         get_benchmark_artifacts(REPO_CACHE_DIR)
 
     if isinstance(config_shortnames, str):
         config_shortnames = [config_shortnames]
 
-    with open(os.path.join(scenario_dir, scenario_file)) as f:
-        scenarios = yaml.safe_load(f)['scenarios']
+    with open(os.path.join(scenario_dir, scenario_file), encoding="utf-8") as f:
+        scenarios = yaml.safe_load(f)["scenarios"]
         found = 0
-        print (f"Searching for configuration shortnames: {config_shortnames}")
+        print(f"Searching for configuration shortnames: {config_shortnames}")
         for scn in scenarios:
-            if 'framework_config' not in scn:
+            if "framework_config" not in scn:
                 continue
 
-            hit_sn = [x for x in config_shortnames if x in scn['framework_config']]
+            hit_sn = [x for x in config_shortnames if x in scn["framework_config"]]
             if len(hit_sn) > 0:
                 found += 1
-                name = scn['name']
-                arguments = scn['arguments']
+                name = scn["name"]
+                arguments = scn["arguments"]
                 hit_sn = ", ".join(hit_sn)
-                print (f"{found}. scenario: {name}\n   configs: {hit_sn}\n   arguments:")
+                print(f"{found}. scenario: {name}\n   configs: {hit_sn}\n   arguments:")
                 lines = []
                 for key, val in arguments.items():
                     if key not in ignored_fields:
                         lines.append(f"      --{key} {val}")
-                    
-                print (" \\\n".join(lines))
-                print ("\n")
+
+                print(" \\\n".join(lines))
+                print("\n")
 
         if not found:
-            print(f"ERROR: Could not list arguments for configuration shortname '{config_shortnames}'")
+            print(
+                f"ERROR: Could not list arguments for configuration shortname '{config_shortnames}'"
+            )
+
 
 def cli():
     # not using argparse since its so simple
@@ -157,31 +173,30 @@ def cli():
     )
     argv = sys.argv
     if len(argv) == 1:
-        print (message)
+        print(message)
         return
-    else:
+    if len(argv) > 1:
         command = argv[1]
         if len(argv) > 2:
             variadic = sys.argv[2:]
         else:
             variadic = []
 
-    if command == 'install':
+    if command == "install":
         assert len(variadic) >= 1, "Please provide the acceleration plugin name"
         install_plugin(*variadic)
-    elif command == 'plugins':
+    elif command == "plugins":
         assert len(variadic) == 0, "list does not require arguments"
         list_plugins()
-    elif command == 'configs':
+    elif command == "configs":
         assert len(variadic) == 0, "list-config does not require arguments"
         list_sample_configs(REPO_CACHE_DIR)
-    elif command == 'arguments':
+    elif command == "arguments":
         assert len(variadic) >= 1, "Please provide the config shortname"
         list_arguments(REPO_CACHE_DIR, *variadic)
     else:
-        raise NotImplementedError(
-            f"Unknown fms_acceleration.cli command '{command}'"
-        )
+        raise NotImplementedError(f"Unknown fms_acceleration.cli command '{command}'")
+
 
-if __name__ == '__main__':
-    cli()
\ No newline at end of file
+if __name__ == "__main__":
+    cli()
diff --git a/plugins/framework/src/fms_acceleration/framework.py b/plugins/framework/src/fms_acceleration/framework.py
index 529b6bd5..6d545ac7 100644
--- a/plugins/framework/src/fms_acceleration/framework.py
+++ b/plugins/framework/src/fms_acceleration/framework.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 # Standard
-from typing import Callable, Dict, List, Optional, Set, Tuple
+from typing import Callable, List, Optional, Set, Tuple
 
 # Third Party
 from accelerate import Accelerator
@@ -23,19 +23,20 @@
 import torch
 import yaml
 
-# want to use the transformers logger, but a bit of pain
-logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
-logger.setLevel(logging._get_default_logging_level())
-logger.addHandler(logging._default_handler)
-
-# First Party
+# Local
+from .constants import KEY_PLUGINS
 from .framework_plugin import (
     PLUGIN_REGISTRATIONS,
     AccelerationPlugin,
     PluginRegistration,
     get_relevant_configuration_sections,
 )
-from .constants import KEY_PLUGINS
+
+# want to use the transformers logger, but a bit of pain
+logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
+logger.setLevel(logging._get_default_logging_level())
+logger.addHandler(logging._default_handler)
+
 
 def check_plugin_packages(plugin: AccelerationPlugin):
     if plugin.require_packages is None:
@@ -47,13 +48,14 @@ def check_plugin_packages(plugin: AccelerationPlugin):
             missing_packages.append(package_name)
     return len(missing_packages) == 0, missing_packages
 
+
 def log_initialization_message(
     active_class_names: Set[str],
     registered_plugins: List[PluginRegistration],  # list of regs
-    logger: Callable = None,
+    logging_func: Callable = None,
 ):
-    if logger is None:
-        logger = print
+    if logging_func is None:
+        logging_func = print
 
     def _registration_display(reg: PluginRegistration):
         return (
@@ -62,36 +64,33 @@ def _registration_display(reg: PluginRegistration):
             f"Version: {reg.package_version}."
         )
 
-    logger("***** FMS AccelerationFramework *****")
+    logging_func("***** FMS AccelerationFramework *****")
     for reg in registered_plugins:
         if reg.plugin.__name__ in active_class_names:
-            logger(_registration_display(reg))
+            logging_func(_registration_display(reg))
 
 
 class AccelerationFramework:
-
-    active_plugins: List[Tuple[str, AccelerationPlugin]] = list()
-    plugins_require_custom_loading: List = list()
+    active_plugins: List[Tuple[str, AccelerationPlugin]] = []
+    plugins_require_custom_loading: List = []
 
     def __init__(
         self, configuration_file: Optional[str], require_packages_check: bool = True
     ):
-
-        with open(configuration_file, "r") as f:
+        with open(configuration_file, "r", encoding="utf-8") as f:
             contents = yaml.safe_load(f)
 
         if KEY_PLUGINS not in contents or contents[KEY_PLUGINS] is None:
             raise ValueError(f"Configuration file must contain a '{KEY_PLUGINS}' body")
 
         # pepare the plugin configurations
-        plugin_configs = {k: v for k, v in contents[KEY_PLUGINS].items()}
+        plugin_configs = dict(contents[KEY_PLUGINS].items())
 
         # relevant sections are returned following plugin precedence, i.e.,
         # they follow the registration order.
         for selected_configs, cls in get_relevant_configuration_sections(
             plugin_configs
         ):
-
             # then the model is to be installed
             # get the plugin
             plugin_name = str(cls.__name__)
@@ -108,7 +107,7 @@ def __init__(
 
             # check if already activated, if so, will not reactivate again
             # maintain uniqueness of activated plugins
-            if any([x == plugin_name for x, _ in self.active_plugins]):
+            if any(x == plugin_name for x, _ in self.active_plugins):
                 continue
 
             # activate plugin
@@ -123,15 +122,16 @@ def __init__(
                 "framework configuration file."
             )
 
-        assert (
-            len(self.plugins_require_custom_loading) <= 1
-        ), f"Can load at most 1 plugin with custom model loading, but tried to '{self.plugins_require_custom_loading}'."
+        assert len(self.plugins_require_custom_loading) <= 1, (
+            "Can load at most 1 plugin with custom model loading, "
+            f"but tried to '{self.plugins_require_custom_loading}'."
+        )
 
     def model_loader(self, model_name: str, **kwargs):
-
         if len(self.plugins_require_custom_loading) == 0:
             raise NotImplementedError(
-                f"Attempted model loading, but none of activated plugins '{list(self.active_plugins)}' "
+                "Attempted model loading, but none "
+                f"of activated plugins '{list(self.active_plugins)}' "
                 "require custom loading."
             )
 
@@ -152,10 +152,9 @@ def augmentation(
 
         # NOTE: this assumes that augmentation order does not matter
         for plugin_name, plugin in self.active_plugins:
-
             # check the model arcs at augmentation
             if plugin.restricted_model_archs and not any(
-                [x in model_archs for x in plugin.restricted_model_archs]
+                x in model_archs for x in plugin.restricted_model_archs
             ):
                 raise ValueError(
                     f"Model architectures in '{model_archs}' are supported for '{plugin_name}'."
@@ -174,16 +173,16 @@ def requires_custom_loading(self):
 
     @property
     def requires_agumentation(self):
-        return any([x.requires_agumentation for _, x in self.active_plugins])
+        return any(x.requires_agumentation for _, x in self.active_plugins)
 
     def get_callbacks_and_ready_for_train(
         self, model: torch.nn.Module = None, accelerator: Accelerator = None
     ):
         # show the initialized message
         log_initialization_message(
-            set([x for x, _ in self.active_plugins]),
+            {x for x, _ in self.active_plugins},
             PLUGIN_REGISTRATIONS,
-            logger=logger.info,
+            logging_func=logger.info,
         )
 
         cbks = []
diff --git a/plugins/framework/src/fms_acceleration/framework_plugin.py b/plugins/framework/src/fms_acceleration/framework_plugin.py
index 1d17a863..fc6da973 100644
--- a/plugins/framework/src/fms_acceleration/framework_plugin.py
+++ b/plugins/framework/src/fms_acceleration/framework_plugin.py
@@ -36,7 +36,7 @@ class PluginRegistration:
     package_version: str = None
 
 
-PLUGIN_REGISTRATIONS: List[PluginRegistration] = list()
+PLUGIN_REGISTRATIONS: List[PluginRegistration] = []
 
 
 def _trace_key_path(configuration: Dict, key: str):
@@ -85,7 +85,6 @@ def get_relevant_configuration_sections(configuration: Dict) -> Dict:
 
 
 class AccelerationPlugin:
-
     # will be triggered if the configuration_paths are found in the
     # acceleration framework configuration file (under KEY_PLUGINS)
     @staticmethod
@@ -94,13 +93,18 @@ def register_plugin(
         configuration_and_paths: List[str],
         **kwargs,
     ):
-        global PLUGIN_REGISTRATIONS
+
+        # pylint: disable=trailing-whitespace
+        # removed because of src/fms_acceleration/framework_plugin.py:96:8:
+        # W0602: Using global for 'PLUGIN_REGISTRATIONS' but no assignment
+        # is done (global-variable-not-assigned)
+        # global PLUGIN_REGISTRATIONS
 
         # get the package metadata
         pkg_name = sys.modules[plugin.__module__].__package__
         try:
             package_version = importlib.metadata.version(pkg_name)
-        except importlib.metadata.PackageNotFoundError: 
+        except importlib.metadata.PackageNotFoundError:
             package_version = None
 
         PLUGIN_REGISTRATIONS.append(
@@ -116,7 +120,6 @@ def register_plugin(
     require_packages: Optional[Set] = None
 
     def __init__(self, configurations: Dict[str, Dict]):
-
         # will pass in a list of dictionaries keyed by "configuration_keys"
         # to be used for initialization
         self.configurations = configurations
@@ -153,13 +156,15 @@ def _check_config_and_maybe_check_values(self, key: str, values: List[Any] = Non
                 # if the tree is a dict
                 if len(t.keys()) > 1:
                     raise AccelerationPluginConfigError(
-                        f"{self.__class__.__name__}: '{key}' found but amongst multiple '{t.keys()}' exist. Ambiguous check in expected set '{values}'."
+                        f"{self.__class__.__name__}: '{key}' found but amongst multiple "
+                        "'{t.keys()}' exist. Ambiguous check in expected set '{values}'."
                     )
                 t = list(t.keys())[0]  # otherwise take the first value
 
             if t not in values:
                 raise AccelerationPluginConfigError(
-                    f"{self.__class__.__name__}: Value at '{key}' was '{t}'. Not found in expected set '{values}'."
+                    f"{self.__class__.__name__}: Value at '{key}' was '{t}'. "
+                    "Not found in expected set '{values}'."
                 )
         else:
             # if nothing to check against, we still want to ensure its a valid
diff --git a/plugins/framework/src/fms_acceleration/utils/test_utils.py b/plugins/framework/src/fms_acceleration/utils/test_utils.py
index aa796707..3cc4004f 100644
--- a/plugins/framework/src/fms_acceleration/utils/test_utils.py
+++ b/plugins/framework/src/fms_acceleration/utils/test_utils.py
@@ -51,7 +51,7 @@ def update_configuration_contents(
 
 def read_configuration(path: str) -> Dict:
     "helper function to read yaml config into json"
-    with open(path) as f:
+    with open(path, encoding="utf-8") as f:
         return yaml.safe_load(f)
 
 
@@ -69,13 +69,16 @@ def build_framework_and_maybe_instantiate(
     plugins_to_be_registered: List[
         Tuple[List[str], Type[AccelerationPlugin]]  # and_paths, plugin_class
     ],
-    configuration_contents: Dict = {},
+    configuration_contents: Dict = None,
     instantiate: bool = True,
     reset_registrations: bool = True,
     require_packages_check: bool = True,
 ):
     "helper function to register plugins and instantiate an acceleration framework for testing"
 
+    if configuration_contents is None:
+        configuration_contents = {}
+
     # empty out
     if reset_registrations:
         old_registrations = []
@@ -93,7 +96,9 @@ def build_framework_and_maybe_instantiate(
         )
 
     if instantiate:
-        yield configure_framework_from_json(configuration_contents, require_packages_check)
+        yield configure_framework_from_json(
+            configuration_contents, require_packages_check
+        )
     else:
         yield
 
@@ -104,9 +109,11 @@ def build_framework_and_maybe_instantiate(
     AccelerationFramework.active_plugins = old_active_plugins
     AccelerationFramework.plugins_require_custom_loading = old_custom_loading_plugins
 
-# alias because default instantiate=True 
+
+# alias because default instantiate=True
 build_framework_and_instantiate = build_framework_and_maybe_instantiate
 
+
 def instantiate_framework(
     configuration_contents: Dict,
     require_packages_check: bool = True,
@@ -122,8 +129,12 @@ def instantiate_framework(
     )
 
 
-def create_noop_model_with_archs(class_name: str = "ModelNoop", archs: List[str] = []):
+def create_noop_model_with_archs(
+    class_name: str = "ModelNoop", archs: List[str] = None
+):
     "helper function to create a dummy model with mocked architectures"
+    if archs is None:
+        archs = []
 
     config = type("Config", (object,), {"architectures": archs})
     return type(class_name, (torch.nn.Module,), {"config": config})
@@ -131,8 +142,8 @@ def create_noop_model_with_archs(class_name: str = "ModelNoop", archs: List[str]
 
 def create_plugin_cls(
     class_name: str = "PluginNoop",
-    restricted_models: Set = {},
-    require_pkgs: Set = {},
+    restricted_models: Set = None,
+    require_pkgs: Set = None,
     requires_custom_loading: bool = False,
     requires_agumentation: bool = False,
     agumentation: Callable = None,
@@ -140,6 +151,11 @@ def create_plugin_cls(
 ):
     "helper function to create plugin class"
 
+    if restricted_models is None:
+        restricted_models = set()
+    if require_pkgs is None:
+        require_pkgs = set()
+
     attributes = {
         "restricted_model_archs": restricted_models,
         "require_packages": require_pkgs,
diff --git a/plugins/framework/tests/test_framework.py b/plugins/framework/tests/test_framework.py
index eff1600a..b59ff62f 100644
--- a/plugins/framework/tests/test_framework.py
+++ b/plugins/framework/tests/test_framework.py
@@ -15,103 +15,19 @@
 # SPDX-License-Identifier: Apache-2.0
 # https://spdx.dev/learn/handling-license-info/
 
-# Standard
-from contextlib import contextmanager
-from tempfile import NamedTemporaryFile
-from typing import Callable, Dict, List, Set, Tuple, Type
-
 # Third Party
-import pytest
+import pytest  # pylint: disable=(import-error
 import torch
-import yaml
 
 # First Party
-from fms_acceleration.framework import KEY_PLUGINS, AccelerationFramework
-from fms_acceleration.framework_plugin import PLUGIN_REGISTRATIONS, AccelerationPlugin
-
-# ----------------------------- HELPER -------------------------------------
-
-
-@contextmanager
-def build_framework_and_instantiate(
-    plugins_to_be_registered: List[
-        Tuple[List[str], Type[AccelerationPlugin]]  # and_paths, plugin_class
-    ],
-    configuration_contents: Dict,
-):
-    "helper function to instantiate an acceleration framework for testing"
-
-    # empty out
-    old_registrations = []
-    old_registrations.extend(PLUGIN_REGISTRATIONS)
-    PLUGIN_REGISTRATIONS.clear()
-    old_active_plugins = AccelerationFramework.active_plugins
-    old_custom_loading_plugins = AccelerationFramework.plugins_require_custom_loading
-    AccelerationFramework.active_plugins = []
-    AccelerationFramework.plugins_require_custom_loading = []
-
-    for path, plugin in plugins_to_be_registered:
-        AccelerationPlugin.register_plugin(
-            plugin,
-            configuration_and_paths=path,
-        )
-
-    with NamedTemporaryFile("w") as f:
-        yaml.dump({KEY_PLUGINS: configuration_contents}, f)
-        yield AccelerationFramework(f.name)
-
-    # put back
-    PLUGIN_REGISTRATIONS.clear()
-    PLUGIN_REGISTRATIONS.extend(old_registrations)
-    AccelerationFramework.active_plugins = old_active_plugins
-    AccelerationFramework.plugins_require_custom_loading = old_custom_loading_plugins
-
-
-def create_noop_model_with_archs(class_name: str = "ModelNoop", archs: List[str] = []):
-    "helper function to create a dummy model with mocked architectures"
-
-    config = type("Config", (object,), {"architectures": archs})
-    return type(class_name, (torch.nn.Module,), {"config": config})
-
-
-def create_plugin_cls(
-    class_name: str = "PluginNoop",
-    restricted_models: Set = {},
-    require_pkgs: Set = {},
-    requires_custom_loading: bool = False,
-    requires_agumentation: bool = False,
-    agumentation: Callable = None,
-    model_loader: Callable = None,
-):
-    "helper function to create plugin class"
-
-    attributes = {
-        "restricted_model_archs": restricted_models,
-        "require_packages": require_pkgs,
-        "requires_custom_loading": requires_custom_loading,
-        "requires_agumentation": requires_agumentation,
-    }
-
-    if agumentation is not None:
-        attributes["augmentation"] = agumentation
-
-    if model_loader is not None:
-        attributes["model_loader"] = model_loader
-
-    return type(class_name, (AccelerationPlugin,), attributes)
-
-
-def dummy_augmentation(self, model, train_args, modifiable_args):
-    "dummy augmentation implementation"
-    return model, modifiable_args
-
-
-def dummy_custom_loader(self, model_name, **kwargs):
-    "dummy custom loader returning dummy model"
-    return create_noop_model_with_archs(archs=["DummyModel"])
-
-
-# ----------------------------- TESTS -------------------------------------
+from fms_acceleration.framework_plugin import PLUGIN_REGISTRATIONS
+from fms_acceleration.utils.test_utils import (
+    build_framework_and_instantiate,
+    create_noop_model_with_archs,
+    create_plugin_cls,
+    dummy_augmentation,
+    dummy_custom_loader,
+)
 
 
 def test_config_with_empty_body_raises():
@@ -208,7 +124,6 @@ def test_single_plugin():
         plugins_to_be_registered=[(["dummy"], incomplete_plugin)],
         configuration_contents={"dummy": {"key1": 1}},
     ) as framework:
-
         # check 1.
         assert len(PLUGIN_REGISTRATIONS) == 1
         assert len(framework.active_plugins) == 1
@@ -300,7 +215,6 @@ def test_two_plugins():
         ],
         configuration_contents={"dummy": {"key1": 1}, "dummy2": {"key1": 1}},
     ) as framework:
-
         # check 1.
         assert len(PLUGIN_REGISTRATIONS) == 2
 
@@ -357,7 +271,11 @@ def test_plugin_registration_order():
     "test that plugin registration order determines their activation order"
 
     # build a set of hooks that register the activation order
-    def hook_builder(act_order=[]):
+    def hook_builder(act_order=None):
+
+        if act_order is None:
+            act_order = []
+
         def _hook(
             self,
             model,
@@ -391,7 +309,6 @@ def _hook(
         plugins_to_be_registered=[([k], v) for k, v in plugins_to_be_installed],
         configuration_contents={k: {"key1": 1} for k, _ in plugins_to_be_installed},
     ) as framework:
-
         # trigger augmentation of active plugins and check order of activation
         framework.augmentation(model, None, None)
         for c, (n, _) in zip(plugin_activation_order, plugins_to_be_installed):
diff --git a/plugins/framework/tox.ini b/plugins/framework/tox.ini
index a5db281a..52513f9a 100644
--- a/plugins/framework/tox.ini
+++ b/plugins/framework/tox.ini
@@ -8,6 +8,13 @@ commands = pytest {posargs:tests}
 
 [testenv:lint]
 description = run linters
+deps =
+    pylint>=2.16.2,<=3.1.0
+commands = pylint src tests
+allowlist_externals = pylint
+
+[testenv:fmt]
+description = format 
 skip_install = true
 deps =
     black>=22.12
diff --git a/sample-configurations/CONTENTS.yaml b/sample-configurations/CONTENTS.yaml
index 33c24253..8d45bedf 100644
--- a/sample-configurations/CONTENTS.yaml
+++ b/sample-configurations/CONTENTS.yaml
@@ -14,4 +14,9 @@ framework_configs:
     - shortname: accelerated-peft-bnb
       plugins:
         - accelerated-peft
-      filename: accelerated-peft-bnb-nf4-sample-configuration.yaml
\ No newline at end of file
+      filename: accelerated-peft-bnb-nf4-sample-configuration.yaml
+
+    - shortname: baseline-peft-bnb
+      plugins:
+        - accelerated-peft
+      filename: baseline-peft-bnb-nf4-sample-configuration.yaml
\ No newline at end of file
diff --git a/sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml b/sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml
index e920931c..19fb71fb 100644
--- a/sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml
+++ b/sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml
@@ -18,3 +18,7 @@ plugins:
       # bitsandbytes:
       bitsandbytes:
         quant_type: nf4
+
+        # If True, then no get_peft_model and prepare_model_for_kbit_training
+        # will be called. 
+        no_peft_model: false
diff --git a/sample-configurations/baseline-peft-bnb-nf4-sample-configuration.yaml b/sample-configurations/baseline-peft-bnb-nf4-sample-configuration.yaml
new file mode 100644
index 00000000..244de5e7
--- /dev/null
+++ b/sample-configurations/baseline-peft-bnb-nf4-sample-configuration.yaml
@@ -0,0 +1,24 @@
+# FMS Acceleration Plugin Configuration. 
+#
+# Each stanza incorporates various configurations for 
+# different fine-tuning / training tasks.
+plugins:
+  # PEFT-related acceleration
+  peft:
+
+    # quantization-releated acceleration
+    # e.g., kernels for quantized base weights
+    quantization:
+
+      # For loading BitsAndBytes quantized layers
+      # to serve as 4bit base-weights for LoRA PEFT-tuning.
+      # NOTE: currently AutoGPTQ is not properly integrated into huggingface /
+      # bitsandbytes, thus recommended quant_type to be either "nf4"
+      # or "fp4".
+      # bitsandbytes:
+      bitsandbytes:
+        quant_type: nf4
+
+        # If True, then no get_peft_model and prepare_model_for_kbit_training
+        # will be called. 
+        no_peft_model: true
diff --git a/scripts/benchmarks/README.md b/scripts/benchmarks/README.md
index fdc6d7bc..115719b7 100644
--- a/scripts/benchmarks/README.md
+++ b/scripts/benchmarks/README.md
@@ -51,6 +51,11 @@ A `scenario` has the following key components:
 
 The best way is via `tox` which manages the dependencies, including installing the correct version [fms-hf-tuning](https://github.com/foundation-model-stack/fms-hf-tuning).
 
+- install the `setup_requirements.txt` to get `tox`:
+    ```
+    pip install -r setup_requirements.txt
+    ```
+
 - run a *small* representative set of benches:
     ```
     tox -e run-benches
@@ -59,7 +64,9 @@ The best way is via `tox` which manages the dependencies, including installing t
     ```
     tox -e run-benches -- "1 2" 
     ```
-ationFramework` to demonstrate the various plugins.
+
+Note:
+- `tox` command above accepts environment variables `DRY_RUN, NO_DATA_PROCESSING, NO_OVERWRITE`. See `scripts/run_benchmarks.sh`
 
 ## Running Benchmarks
 
@@ -82,3 +89,98 @@ Alternatively run [`benchmark.py`](./benchmark.py) directly. To see the help do:
 ```
 python benchmark.py --help
 ```
+
+Note:
+- in `run_benchmarks.sh` we will clear the `RESULT_DIR` if it exists, to avoid contaimination with old results. To protect against overwrite, then always run with `NO_OVERWRITE=true`.
+
+## Logging GPU Memory
+
+There are 2 ways to benchmark memory in `run_benchmarks.sh`:
+- Setting the environment variable `MEMORY_LOGGING=nvidia` will use Nvidia `nvidia-smi`'s API
+- Setting the environment variable `MEMORY_LOGGING=huggingface` (default) will use HuggingFace `HFTrainer`'s API 
+
+Both approaches will print out the memory values to the benchmark report.
+ - For Nvidia, the result column will be `nvidia_mem_reserved`
+ - For Torch/HF, the result column will be `peak_torch_mem_alloc_in_bytes` and `torch_mem_alloc_in_bytes`
+
+### Nvidia-SMI `nvidia-smi`
+`nvidia-smi` is a command line utility (CLI) based on the Nvidia Manage Library (NVML)`. A separate process call is used to start, log and finally terminate the CLI for every experiment.  
+
+The keyword `memory.used` is passed to `--query-gpu` argument to log the memory usage at some interval. The list of keywords that can be logged can be referenced from `nvidia-smi --help-query-gpu`
+
+Since it runs on a separate process, it is less likely to affect the training. However, it is a coarser approach than HF as NVML's definition of used memory takes the sum of (memory allocated + memory reserved). Refer to their [documentation](https://docs.nvidia.com/deploy/nvml-api/structnvmlMemory__t.html#structnvmlMemory__t:~:text=Sum%20of%20Reserved%20and%20Allocated%20device%20memory%20(in%20bytes).%20Note%20that%20the%20driver/GPU%20always%20sets%20aside%20a%20small%20amount%20of%20memory%20for%20bookkeeping) here.
+
+After every experiment, 
+  - the logged values are calibrated to remove any existing foreign memory values
+  - the peak values for each gpu device are taken
+  - the values are finally averaged across all devices.
+
+### Torch/HuggingFace `HFTrainer`
+HFTrainer has a feature to log memory through the `skip_memory_metrics=False` training argument. In their [documentation](https://huggingface.co/docs/transformers/en/main_classes/trainer#transformers.TrainingArguments.skip_memory_metrics), it is mentioned that setting this argument to `False` will affect training speed. In our tests so far (below), we do not see significant difference in throughput (tokens/sec) when using this argument.
+
+The HFTrainer API is more granular than `nvidia-smi` as it uses `torch.cuda` to pinpoint memory usage inside the trainer
+  - It reports the allocated memory by calling `torch.cuda.memory_allocated()` and `torch.cuda.max_memory_allocated()` inside its probes
+  - It has memory logging probes at different stages of the Trainer - `init`, `train`, `evaluate`, `predict` 
+
+##### NOTE:
+- When in distributed mode, the Trainer will only log the rank 0 memory.
+- For stability purposes, it only tracks the outer level of train, evaluate and predict methods. i.e. if eval is called during train, there won't be a nested invocation of the memory probe.
+- Any GPU memory incurred outside of the defined Trainer stages won't be tracked.
+
+### Additional Details
+
+#### Calculating Memory from HFTrainer Output Metrics
+
+This is an example of the memory values that HFTrainer will produce in the outputs of `train()`
+```
+output_metrics = {
+    'train_runtime': 191.2491, 
+    'train_samples_per_second': 0.209, 
+    'train_steps_per_second': 0.052, 
+    'train_tokens_per_second': 428.342, 
+    'train_loss': 1.0627506256103516, 
+    'init_mem_cpu_alloc_delta': 4096, 
+    'init_mem_gpu_alloc_delta': 0, 
+    'init_mem_cpu_peaked_delta': 0, 
+    'init_mem_gpu_peaked_delta': 0, 
+    'train_mem_cpu_alloc_delta': 839086080, 
+    'train_mem_gpu_alloc_delta': -17491768832, 
+    'train_mem_cpu_peaked_delta': 0, 
+    'train_mem_gpu_peaked_delta': 26747825664, 
+    'before_init_mem_cpu': 5513297920, 
+    'before_init_mem_gpu': 36141687296, 
+    'epoch': 0.01
+}
+```
+
+We refer to the keys of the memory metrics in this order 
+ - `before_init_mem_X` as stage0 
+ - `init_mem_X` as stage1 
+ - `train_mem_X` as stage2
+ - ... 
+
+We currently compute the memory values in the report by taking the largest of sums. For example:
+
+For allocated memory value
+```
+max([
+  stage0_mem + stage1_allocated_delta, 
+  stage0_mem + stage1_allocated_delta + stage2_allocated_delta,
+  ...
+])
+```
+
+For peak memory value
+```
+max([
+  stage0_mem + stage1_allocated_delta + stage1_peaked_delta, 
+  stage0_mem + stage1_allocated_delta + stage2_allocated_delta + stage2_peaked_delta,
+  ...
+])
+```
+
+Notice that we do not include `stage0_mem` alone when computing the max value. This is to avoid misleading comparisons between GPTQ-LoRA and others. GPTQ-LoRA + FSDP currently does not support low-memory mode as mentioned [here](https://github.com/foundation-model-stack/fms-acceleration/issues/18). The `stage0_mem` value of GPTQ-LoRA + FSDP will reflect a larger than expected value as it is loaded fully before the trainer is initialized and then subsequently will be sharded internally in `trainer.prepare`. This might cause some misleading comparisons when other variants are loaded in low-memory mode and have smaller `stage0_mem` memory consumption than GPTQ-LoRA + FSDP. Once low-memory mode is supported for GPTQ-LoRA, we will include `stage0_mem` back inside the max computation
+
+We compare memory values between Nvidia-SMI and Torch in this PR - [Memory Benchmarking](https://github.com/foundation-model-stack/fms-acceleration/pull/14).
+
+
diff --git a/scripts/benchmarks/benchmark.py b/scripts/benchmarks/benchmark.py
index 021426cd..afbf61cf 100644
--- a/scripts/benchmarks/benchmark.py
+++ b/scripts/benchmarks/benchmark.py
@@ -1,17 +1,20 @@
+# Standard
+from itertools import product
+from typing import Any, Callable, Dict, List, Tuple, Union
 import argparse
 import json
 import os
 import re
 import subprocess
 import warnings
-from itertools import product
-from typing import Callable, Dict, List, Tuple, Any, Union
 
+# Third Party
+from tqdm import tqdm
+from transformers import AutoConfig, HfArgumentParser, TrainingArguments
 import datasets
 import pandas as pd
+import torch
 import yaml
-from tqdm import tqdm
-from transformers import HfArgumentParser, TrainingArguments, AutoConfig
 
 """
 This benchmarking script 
@@ -45,22 +48,20 @@
 FILE_RESULTS = "results.json"
 FILE_SHELL_COMMAND = "command.sh"
 FILE_SCRIPT_ARGS = "script.json"
-FILE_SUMMARY_CSV = 'summary.csv'
+FILE_SUMMARY_CSV = "raw_summary.csv"
 
 DIR_BENCHMARKS = os.path.dirname(os.path.realpath(__file__))
-DIR_PREFIX_EXPERIMENT = 'exp'
-DIR_NAME_RESULTS_DEFAULT = 'benchmark_results'
-DIR_SAMP_CONFIGS = os.path.join(DIR_BENCHMARKS, '../../sample-configurations')
+DIR_PREFIX_EXPERIMENT = "exp"
+DIR_NAME_RESULTS_DEFAULT = "benchmark_results"
+DIR_SAMP_CONFIGS = os.path.join(DIR_BENCHMARKS, "../../sample-configurations")
 
 # read list of sample configurations from contents file
 FRAMEWORK_CONFIG_KEYPAIRS = []
-with open(os.path.join(DIR_SAMP_CONFIGS, 'CONTENTS.yaml')) as f:
-    configs = yaml.safe_load(f)['framework_configs']
+with open(os.path.join(DIR_SAMP_CONFIGS, "CONTENTS.yaml")) as f:
+    configs = yaml.safe_load(f)["framework_configs"]
     for d in configs:
-        FRAMEWORK_CONFIG_KEYPAIRS.append(d['shortname'])
-        FRAMEWORK_CONFIG_KEYPAIRS.append(
-            os.path.join(DIR_SAMP_CONFIGS, d['filename'])
-        )
+        FRAMEWORK_CONFIG_KEYPAIRS.append(d["shortname"])
+        FRAMEWORK_CONFIG_KEYPAIRS.append(os.path.join(DIR_SAMP_CONFIGS, d["filename"]))
 
 # regex to capture the start and end of tracebacks
 REGEX_START_OF_TRACEBACK = "Traceback\s\(most\srecent\scall\slast\)"
@@ -72,6 +73,68 @@
     "torch.distributed.elastic.multiprocessing.errors.ChildFailedError"
 ]
 
+FILE_MEM = "gpu_memory_logs.csv"
+GPU_LOG_USED_MEM_COLUMN_NAME = "memory.used [MiB]"
+GPU_LOG_METRIC_SUFFIX = " MiB"
+GPU_TABLE = "timestamp,name,index,memory.used"
+RESULT_FIELD_RESERVED_GPU_MEM = "nvidia_mem_reserved"
+RESULT_FIELD_DEVICE_NAME = "gpu_device_name"
+
+HF_TRAINER_LOG_GPU_STAGE_BEFORE_INIT = "before_init_mem_gpu"
+HF_TRAINER_LOG_GPU_STAGE_INIT = "init_mem_gpu"
+HF_TRAINER_LOG_GPU_STAGE_TRAIN = "train_mem_gpu"
+KEYWORD_PEAKED_DELTA = "peaked_delta"
+KEYWORD_ALLOC_DELTA = "alloc_delta"
+HF_ARG_SKIP_MEMORY_METRIC = "--skip_memory_metrics"
+RESULT_FIELD_ALLOCATED_GPU_MEM = "torch_mem_alloc_in_bytes"
+RESULT_FIELD_PEAK_ALLOCATED_GPU_MEM = "peak_torch_mem_alloc_in_bytes"
+
+
+def extract_gpu_memory_metrics(output_metrics) -> Tuple[float]:
+    """
+    This function computes the gpu summary metrics from the output metrics of Trainer
+    when `skip_memory_metrics` is set to `False` in transformers.TrainingArguments
+
+    This function is called only when `--skip_memory_metrics` exist in the experiment arg
+    and is set to False. The memory key values are expected to be inside output_metrics. If
+    output_metrics is empty, return peak=0 and usage=0
+
+    Returns
+     - gpu_peak value in Bytes
+     - gpu_usage value in Bytes
+    """
+    # Assumes train stage is always called
+    # this is a tuple of stage names, and a bool to say if it should be included in the summarized number
+    # we exclude the model loading stages for now, due to
+    # https://github.com/foundation-model-stack/fms-acceleration/issues/18
+    # we will renable the loading stages later on once this issue is addressed
+    if len(output_metrics.keys()) < 1:
+        return 0, 0
+
+    trainer_stage_order = [
+        (HF_TRAINER_LOG_GPU_STAGE_BEFORE_INIT, False),
+        (HF_TRAINER_LOG_GPU_STAGE_INIT, False),
+        (HF_TRAINER_LOG_GPU_STAGE_TRAIN, True),
+    ]
+    alloc_running_sum = 0
+    list_of_alloc_running_sums = []
+    list_of_peak_running_sums = []
+    for STAGE_NAME, include in trainer_stage_order:
+        delta_key = f"{STAGE_NAME}_{KEYWORD_ALLOC_DELTA}"
+        alloc_running_sum += (
+            output_metrics[delta_key]
+            if delta_key in output_metrics
+            else output_metrics[STAGE_NAME]
+        )
+        peak_delta = output_metrics.get(f"{STAGE_NAME}_{KEYWORD_PEAKED_DELTA}", 0)
+        if include:
+            list_of_alloc_running_sums.append(alloc_running_sum)
+            list_of_peak_running_sums.append(alloc_running_sum + peak_delta)
+
+    max_alloc_running_sum = max(list_of_alloc_running_sums)
+    max_peak_running_sum = max(list_of_peak_running_sums)
+    return max_peak_running_sum, max_alloc_running_sum
+
 
 def get_hf_arguments_with_no_value(dataclass_types):
     """this function will return a map (str, bool) of true/false arguments.
@@ -203,7 +266,7 @@ def cartesian_product_on_dict(variable_matrices: Dict) -> List[Dict]:
             list_of_products.append(
                 {
                     name: arg
-                for name, arg in zip(variable_matrices.keys(), arg_combinations)
+                    for name, arg in zip(variable_matrices.keys(), arg_combinations)
                 }
             )
         return list_of_products
@@ -223,14 +286,14 @@ def convert_args_to_dict(experiment_arguments: List[Any]):
                     argument_dict[current_key] = item
                 else:
                     # otherwise it was from a list, so make into sequence
-                    argument_dict[current_key] = v + ' ' + item
+                    argument_dict[current_key] = v + " " + item
 
         return argument_dict
 
 
 class ScenarioMatrix:
 
-    matrix_args = ['model_name_or_path']
+    matrix_args = ["model_name_or_path"]
 
     def __init__(self, scenario: Dict, acceleration_config_map: Dict = None) -> None:
         assert "arguments" in scenario.keys(), "Missing `arguments` key in `scenario`"
@@ -246,7 +309,7 @@ def __init__(self, scenario: Dict, acceleration_config_map: Dict = None) -> None
             setattr(self, key, val)
 
     def preload_models(self):
-        for model_name in self.arguments['model_name_or_path']:
+        for model_name in self.arguments["model_name_or_path"]:
             print(f"Scenario '{self.name}' preloading model '{model_name}'")
             # just preload the config
             AutoConfig.from_pretrained(model_name)
@@ -292,8 +355,15 @@ def __init__(
         self.stderr_filename = os.path.join(self.save_dir, FILE_STDERR)
         self.command_filename = os.path.join(self.save_dir, FILE_SHELL_COMMAND)
         self.results_filename = os.path.join(self.save_dir, FILE_RESULTS)
+        self.gpu_log_filename = os.path.join(self.save_dir, FILE_MEM)
 
-    def run(self, run_cmd: str, environment_variables: Dict = None):
+    def run(
+        self,
+        run_cmd: str,
+        environment_variables: Dict = None,
+        log_nvidia_smi: bool = False,
+        memory_log_interval_secs: int = 1,
+    ):
 
         # form the command line
         commands = []
@@ -302,14 +372,47 @@ def run(self, run_cmd: str, environment_variables: Dict = None):
                 commands.extend([str(x) for x in c])
             else:
                 commands.append(str(c))
-            
+
         # will save the command line in str
         self.shell_command = run_cmd.split() + commands
         self.environment = environment_variables
         self.experiment_args_str = commands
         os.makedirs(self.save_dir, exist_ok=True)
+
+        if log_nvidia_smi:
+            """
+            Opens a parallel process to log the device memory of the main experiment process.
+            - Logs memory at intervals to a csv file in `self.save_dir`
+            - Terminates at the end of experiment
+            - GPU log is read and aggregated when the experiment ends & results are saved in Experiment.write_result,
+
+            NOTE: This feature assumes the following
+            1. Experiment is the only process on the gpu devices -
+            there are no other processes running on the device in parallel.
+
+            Can log more info from nvidia-smi by expanding GPU_Table argument
+            e.g. "timestamp,name,index,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used"
+            Use `nvidia-smi --help-query-gpu` for more reference
+            """
+            nvidia_logging_cmd = [
+                "nvidia-smi",
+                "--query-gpu",
+                GPU_TABLE,
+                "--format",
+                "csv",
+                "--id",
+                str(environment_variables["CUDA_VISIBLE_DEVICES"]),
+                "--loop",
+                str(memory_log_interval_secs),
+            ]
+            memory_process = subprocess.Popen(
+                nvidia_logging_cmd,
+                stdout=open(self.gpu_log_filename, "w"),
+                text=True,
+            )
+
         subprocess.run(
-            self.shell_command, 
+            self.shell_command,
             capture_output=False,
             stdout=open(self.stdout_filename, "w"),
             stderr=open(self.stderr_filename, "w"),
@@ -317,6 +420,9 @@ def run(self, run_cmd: str, environment_variables: Dict = None):
             env={**os.environ.copy(), **environment_variables},
         )
 
+        if log_nvidia_smi:
+            memory_process.terminate()
+
     def get_experiment_final_metrics(
         self, final_metrics_keys: List[str] = ["train_loss", "train_runtime"]
     ):
@@ -374,19 +480,76 @@ def maybe_get_experiment_error_traceback(self):
 
         return None if len(results) == 0 else results
 
+    def get_peak_mem_usage_by_device_id(self):
+        """
+        This function retrieves the raw measurements of reserved GPU memory per device across the experiment -
+        computing the peak value for each gpu and then performing a simple calibration (subtracts peak values by the first reading).
+        Returns:
+            - pd.Series of peak memory usage per device id
+            - the device name as string - e.g. "NVIDIA A100-SXM4-80GB"
+
+        Example: For 2 devices with GPU Indices 0,1 - it will return the max measurement value (in MiB) of each device as a Series:
+
+        - pd.Series
+        index
+        0    52729.0
+        1    52783.0
+        Name: memory.used [MiB], dtype: float64
+        """
+
+        # group the gpu readings into device ids
+        gpu_logs = pd.read_csv(self.gpu_log_filename, skipinitialspace=True)
+        # assume that all the devices have the same device name
+        device_name = gpu_logs.name.iloc[-1]
+        # extract and convert the gpu memory usage as float values
+        gpu_logs[GPU_LOG_USED_MEM_COLUMN_NAME] = gpu_logs[
+            GPU_LOG_USED_MEM_COLUMN_NAME
+        ].apply(lambda x: float(x.replace(GPU_LOG_METRIC_SUFFIX, "")))
+        mem_usage_by_device_id = gpu_logs.groupby("index")[GPU_LOG_USED_MEM_COLUMN_NAME]
+        # Calibrate values by subtracting out the initial values of the GPU readings
+        # to ensure no existing memory is counted in addition with the experiment
+        initial_values = mem_usage_by_device_id.first()
+        peak_values = mem_usage_by_device_id.max()
+        return peak_values.sub(initial_values), device_name
+
     def write_result(self):
         "Function to write a json result file"
 
         # save some basic args
         save_result = ConfigUtils.convert_args_to_dict(self.experiment_args_str)
-        save_result['num_gpus'] = self.num_gpus
+        save_result["num_gpus"] = self.num_gpus
+
+        # if a gpu log file exist, process the raw nvidia logs and write to result
+        if os.path.isfile(self.gpu_log_filename):
+            # Add GPU info and measurements into the result saving
+            peak_mem_usage_by_device_id, device_name = (
+                self.get_peak_mem_usage_by_device_id()
+            )
+            save_result[RESULT_FIELD_DEVICE_NAME] = device_name
+            # Memory usage is averaged across all devices in the final result
+            save_result[RESULT_FIELD_RESERVED_GPU_MEM] = (
+                peak_mem_usage_by_device_id.mean()
+            )
+
+        # process gpu mem from output metrics and write to result
+        # check if HF_ARG_SKIP_MEMORY_METRIC is set to False in experiment arg
+        # this arg is specified explicitly inside `def generate_list_of_experiments``
+        argument_idx = self.experiment_arg.index(HF_ARG_SKIP_MEMORY_METRIC)
+        write_memory_metric = not self.experiment_arg[argument_idx + 1]
+        if write_memory_metric:
+            peak_gpu_mem, gpu_allocated_mem = extract_gpu_memory_metrics(
+                self.get_experiment_final_metrics()
+            )
+            save_result[RESULT_FIELD_PEAK_ALLOCATED_GPU_MEM] = peak_gpu_mem
+            save_result[RESULT_FIELD_ALLOCATED_GPU_MEM] = gpu_allocated_mem
 
         # if there is an error we save the error message else we save the final result
         maybe_error_messages = self.maybe_get_experiment_error_traceback()
         if maybe_error_messages is None:
             other_results = self.get_experiment_final_metrics()
             save_result = {
-                **save_result, **self.get_experiment_final_metrics(),
+                **save_result,
+                **self.get_experiment_final_metrics(),
             }
         else:
             other_results = {"error_messages": maybe_error_messages}
@@ -394,26 +557,25 @@ def write_result(self):
         # combine the final thing
         save_result = {**save_result, **other_results}
 
-        with open(self.results_filename, 'w') as f:
+        with open(self.results_filename, "w") as f:
             json.dump(save_result, f, indent=4, sort_keys=True)
 
-    # NOTE: can be improved. Not sure if this really gets parity with 
+    # NOTE: can be improved. Not sure if this really gets parity with
     # subprocess.run
     def write_shell_command(self):
 
         def _escape(x: str):
             # if there is is whitespace we just escape with single quotes
             # not sure if this is the best thing to do
-            return x if not re.search(r"\s", x) else f"\'{x}\'"
+            return x if not re.search(r"\s", x) else f"'{x}'"
 
         "Write a shell script to repro the run"
-        with open(self.command_filename, 'w') as f:
+        with open(self.command_filename, "w") as f:
             f.write("#!/bin/bash\n\n")
             for key, val in self.environment.items():
-                f.write(f"{key}={val}\n")
-            f.write(" ".join([
-                _escape(x) for x in self.shell_command
-            ]))
+                f.write(f"export {key}={val}\n")
+            f.write(" ".join([_escape(x) for x in self.shell_command]))
+
 
 class DryRunExperiment(Experiment):
 
@@ -423,6 +585,7 @@ def __init__(self, *args, **kwargs):
     def run(self, run_cmd: str, environment_variables: Dict = None):
         def _dummy(*args, **kwargs):
             pass
+
         _old = subprocess.run
         subprocess.run = _dummy
         super().run(run_cmd, environment_variables)
@@ -436,6 +599,7 @@ def get_experiment_final_metrics(
     def maybe_get_experiment_error_traceback(self):
         return None
 
+
 def prepare_arguments(args):
     defaults = ConfigUtils.read_yaml(args.defaults_config_path)
     defaults["training_data_path"] = args.dataset_save_path
@@ -451,18 +615,15 @@ def prepare_arguments(args):
     }
     experiment_factor = 1
     for k, v in experiment_matrices.items():
-        print (f"Experiment has matrix '{k}' of len {len(v)}")
+        print(f"Experiment has matrix '{k}' of len {len(v)}")
         experiment_factor *= len(v)
-    print (f"Experiment matrices will product by factor of '{experiment_factor}'")
+    print(f"Experiment matrices will product by factor of '{experiment_factor}'")
 
     for scenario_config in scenarios:
         _scn_name = scenario_config["name"]
         # if a `run_only_scenarios` list exist, filter out any scenario not in the list
-        if (
-            args.run_only_scenarios
-            and _scn_name not in args.run_only_scenarios
-        ):
-            print (f"Skipping scenario '{_scn_name}'")
+        if args.run_only_scenarios and _scn_name not in args.run_only_scenarios:
+            print(f"Skipping scenario '{_scn_name}'")
             continue
         scenario = ScenarioMatrix(scenario_config, acceleration_config_map)
         scenario_matrices, scenario_constants = (
@@ -470,7 +631,7 @@ def prepare_arguments(args):
         )
         scn_factor = 1
         for k, v in scenario_matrices.items():
-            print (f"Scenario '{_scn_name}' has matrix '{k}' of len {len(v)}")
+            print(f"Scenario '{_scn_name}' has matrix '{k}' of len {len(v)}")
             scn_factor *= len(v)
 
         # update defaults with scenario constants
@@ -478,7 +639,9 @@ def prepare_arguments(args):
         # Remove any empty variables and combine matrices to dictionary to cartesian product on
         combined_matrices = {**scenario_matrices, **experiment_matrices}
         products = ConfigUtils.cartesian_product_on_dict(combined_matrices)
-        print (f"Scenario '{_scn_name}' will add to the total products by: ----> '{experiment_factor} x {scn_factor}' = '{len(products)}'\n")
+        print(
+            f"Scenario '{_scn_name}' will add to the total products by: ----> '{experiment_factor} x {scn_factor}' = '{len(products)}'\n"
+        )
         if args.preload_models and len(products) > 0:
             scenario.preload_models()
         for num_gpus, experiment_arg in ConfigUtils.build_args_from_products(
@@ -492,6 +655,7 @@ def generate_list_of_experiments(
     output_dir: str = "results",
     hf_products_dir: str = "hf",
     dry_run: bool = False,
+    log_memory_in_trainer: bool = False,
 ) -> List[Experiment]:
     """Construct list of experiments to be run. Takes in default_config and
     any matrices in scenario and experiment_config
@@ -503,6 +667,8 @@ def generate_list_of_experiments(
         expr_arg_w_outputdir = exp_arg + [
             "--output_dir",
             os.path.join(experiment_output_dir, hf_products_dir),
+            HF_ARG_SKIP_MEMORY_METRIC,
+            not log_memory_in_trainer,
         ]
         expr_cls = Experiment if not dry_run else DryRunExperiment
         _expr = expr_cls(
@@ -515,7 +681,7 @@ def generate_list_of_experiments(
     return experiments
 
 
-def gather_report(result_dir: Union[str, List[str]], raw: bool=True):
+def gather_report(result_dir: Union[str, List[str]], raw: bool = True):
 
     def _gather(rdir):
 
@@ -524,26 +690,28 @@ def _gather(rdir):
 
         # map from config file to tag
         fcm = convert_keypairs_to_map(
-            script_args['acceleration_framework_config_keypairs']
+            script_args["acceleration_framework_config_keypairs"]
         )
-        fcm = {v:k for k,v in fcm.items()}
+        fcm = {v: k for k, v in fcm.items()}
 
         experiment_stats = {}
-        exper_dirs = [x for x in os.listdir(rdir) if x.startswith(DIR_PREFIX_EXPERIMENT)]
+        exper_dirs = [
+            x for x in os.listdir(rdir) if x.startswith(DIR_PREFIX_EXPERIMENT)
+        ]
         for tag in exper_dirs:
             try:
                 with open(os.path.join(rdir, tag, FILE_RESULTS)) as f:
-                    tag = tag.replace(DIR_PREFIX_EXPERIMENT + '_', '')
+                    tag = tag.replace(DIR_PREFIX_EXPERIMENT + "_", "")
                     tag = int(tag)
                     experiment_stats[tag] = json.load(f)
             except FileNotFoundError:
                 pass
         df = pd.DataFrame.from_dict(experiment_stats, orient="index").sort_index()
         try:
-            df['framework_config'] = df['acceleration_framework_config_file'].map(
-                lambda x : fcm.get(x, 'none')
+            df["framework_config"] = df["acceleration_framework_config_file"].map(
+                lambda x: fcm.get(x, "none")
             )
-        except KeyError: 
+        except KeyError:
             pass
 
         return df
@@ -564,23 +732,39 @@ def _nunique(series):
             # if unique does not work, then return number of non-na
             # elements
             return len(series) - series.isna().sum()
-    u = df.apply(_nunique) # columns that are unique
-    return df.loc[:,u != 1], df.iloc[0][u == 1].to_dict()
+
+    u = df.apply(_nunique)  # columns that are unique
+    return df.loc[:, u != 1], df.iloc[0][u == 1].to_dict()
+
 
 def compress(df):
-    return df.loc[:,df.apply(pd.Series.nunique) != 1]
+    return df.loc[:, df.apply(pd.Series.nunique) != 1]
+
 
 def main(args):
 
+    # Gathers available gpu device ids that will be used for benchmarking.
+    # If "CUDA_VISIBLE_DEVICES" is specified, it will return the specified device ids
+    # if no gpu ids are specified, it will default to the enumeration of available ids
+    assert torch.cuda.device_count() > 0, "No device detected for memory logging!"
+    available_gpus_indices = os.environ.get("CUDA_VISIBLE_DEVICES")
+    if available_gpus_indices:
+        available_gpus_indices = available_gpus_indices.split(",")
+    else:
+        available_gpus_indices = [str(i) for i in range(torch.cuda.device_count())]
+
+    if args.dry_run and args.log_nvidia_smi:
+        args.log_nvidia_smi = False
+
     # 1. Prepares a standard BenchmarkDataset
     # TODO: consider caching the json file
     if not args.no_data_processing:
         benchmark_dataset = BenchmarkDataset(args.dataset_name, format_fn)
         benchmark_dataset.save_to_path(args.dataset_save_path)
 
-    # dump out the script arguments 
+    # dump out the script arguments
     os.makedirs(args.results_output_path, exist_ok=True)
-    with open(os.path.join(args.results_output_path, FILE_SCRIPT_ARGS), 'w') as f:
+    with open(os.path.join(args.results_output_path, FILE_SCRIPT_ARGS), "w") as f:
         json.dump(vars(args), f, indent=4, sort_keys=True)
 
     # 2. Prepares a list of experiment arguments from a set of configs
@@ -589,10 +773,14 @@ def main(args):
     # 3. Builds a list of experiment objects to run based on the set of experiment arguments
     experiment_stats = {}
     experiment: Experiment
-    for experiment in tqdm(generate_list_of_experiments(
-        experiment_args, output_dir=args.results_output_path,
-        dry_run=args.dry_run,
-    )):
+    for experiment in tqdm(
+        generate_list_of_experiments(
+            experiment_args,
+            output_dir=args.results_output_path,
+            dry_run=args.dry_run,
+            log_memory_in_trainer=args.log_memory_hf,
+        )
+    ):
         if experiment.num_gpus > 1:
             prefix = COMMAND_ACCELERATE.format(
                 accelerate_config_path=args.accelerate_config,
@@ -602,10 +790,20 @@ def main(args):
         else:
             prefix = COMMAND_PYTHON
 
-        device_ids = ",".join([str(i) for i in range(experiment.num_gpus)])
+        assert experiment.num_gpus <= len(
+            available_gpus_indices
+        ), "Experiment requires more gpus than is available on the platform."
+        """
+        Experiment will take only the ids from the available gpu indices, 
+        this ensures that whatever GPUs are exposed to benchmark.py are the only 
+        devices that each experiment can have access to.
+        """
+        device_ids = ",".join(available_gpus_indices[: experiment.num_gpus])
+
         experiment.run(
             f"{prefix} {FMS_TRAINER}",
             environment_variables={"CUDA_VISIBLE_DEVICES": device_ids},
+            log_nvidia_smi=args.log_nvidia_smi,
         )
 
         # write results and store pointers to files
@@ -618,9 +816,7 @@ def main(args):
         with open(path) as f:
             experiment_stats[tag] = json.load(f)
     df = pd.DataFrame.from_dict(experiment_stats, orient="index")
-    df.to_csv(
-        os.path.join(args.results_output_path, FILE_SUMMARY_CSV), index=None
-    )
+    df.to_csv(os.path.join(args.results_output_path, FILE_SUMMARY_CSV), index=None)
 
     # TO CREATE THE checked in CSV FILE DO
     # df, constant = gather_report(..., raw=False)
@@ -635,6 +831,7 @@ def main(args):
     #     index=False
     # )
 
+
 if __name__ == "__main__":
 
     parser = argparse.ArgumentParser(
@@ -723,17 +920,32 @@ def main(args):
         "--process_port", type=int, default=29500, help="accelerate process port"
     )
     parser.add_argument(
-        "--no_data_processing", action='store_true', 
-        help="skip the json data prep (useful for re-runs)"
+        "--no_data_processing",
+        action="store_true",
+        help="skip the json data prep (useful for re-runs)",
     )
     parser.add_argument(
-        "--dry_run", action='store_true', 
-        help="perform a dry run only. Useful for debuging benchmark scenarios."
+        "--dry_run",
+        action="store_true",
+        help="perform a dry run only. Useful for debuging benchmark scenarios.",
     )
     parser.add_argument(
-        "--preload_models", action='store_true', 
+        "--preload_models",
+        action="store_true",
         help="ensures 'model_name_or_paths 'specified in scenarios.yaml work. "
-        "Useful to check model paths specified correctly before lengthly benchmark runs."
+        "Useful to check model paths specified correctly before lengthly benchmark runs.",
+    )
+    parser.add_argument(
+        "--log_nvidia_smi",
+        action="store_true",
+        help="Use `nvidia-smi` API to log reserved memory of benchmarks",
     )
+
+    parser.add_argument(
+        "--log_memory_hf",
+        action="store_true",
+        help="Uses memory logging from HF Trainer Arguments API to log gpu memory, for distributed runs only rank 0 is measured",
+    )
+
     args = parser.parse_args()
     main(args)
diff --git a/scripts/benchmarks/display_bench_results.py b/scripts/benchmarks/display_bench_results.py
new file mode 100644
index 00000000..b590f26c
--- /dev/null
+++ b/scripts/benchmarks/display_bench_results.py
@@ -0,0 +1,58 @@
+# Standard
+import argparse
+
+# First Party
+# import this because of alot of internal contants
+from scripts.benchmarks.benchmark import gather_report, DIR_SAMP_CONFIGS
+from typing import List
+
+def main(*directories: str, output_filename: str = "results.csv", remove_columns: List[str] = None):
+    "gather outputs from a list of directories and output to a csv"
+
+    df, constant = gather_report(*directories, raw=False)
+    # filter result columns to keep by the inverse of remove_columns
+    if remove_columns:
+        df = df[df.columns[~df.columns.isin(remove_columns)]]
+
+    errors = []
+    try:
+        # remove error messages if any
+        errors = df.error_messages
+        errors = errors.loc[errors.isna() == False]
+        df = df.loc[df.error_messages.isna()]
+    except:
+        pass
+    df = df.reset_index().drop("output_dir", axis=1)
+    df.reindex(sorted(df.columns), axis=1).to_csv(output_filename, index=False)
+    print("***************** Report Created ******************")
+    print(f"Total lines: '{len(df)}'")
+    print(f"Number columns included: '{len(df.columns)}'")
+    print(f"Number columns excluded: '{len(constant)}'")
+    print(f"Excluding number of exceptions caught: '{len(errors)}'")
+    print(f"Written report to '{output_filename}'")
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(
+        prog="Acceleration Benchmarking Reporting Tool",
+        description="This script gathers a set benchmarks to produce a CSV report",
+    )
+    parser.add_argument(
+        "bench_outputs",
+        nargs="+",
+        help="list of directories from which to gather bench outputs.",
+    )
+    parser.add_argument(
+        "--result_file",
+        default="results.csv",
+        help="name of final csv report file.",
+    )
+    parser.add_argument(
+        "--remove_columns",
+        nargs="*",
+        help="list of columns to ignore from results.csv",
+    )
+
+    args = parser.parse_args()
+    main(args.bench_outputs, output_filename=args.result_file, remove_columns=args.remove_columns)
diff --git a/scripts/benchmarks/refs/a100_80gb.csv b/scripts/benchmarks/refs/a100_80gb.csv
index 93dd0a28..4434d864 100644
--- a/scripts/benchmarks/refs/a100_80gb.csv
+++ b/scripts/benchmarks/refs/a100_80gb.csv
@@ -1,49 +1,61 @@
-acceleration_framework_config_file,epoch,fp16,framework_config,index,learning_rate,lora_alpha,lora_dropout,model_name_or_path,num_gpus,output_dir,peft_method,per_device_train_batch_size,r,target_modules,train_loss,train_runtime,train_samples_per_second,train_steps_per_second,train_tokens_per_second
-,0.15,,none,0,2e-5,,,mistralai/Mistral-7B-v0.1,1,,,4,,,0.8943243026733398,561.4936,0.712,0.178,2917.932
-,0.15,,none,1,2e-5,,,mistralai/Mistral-7B-v0.1,2,,,2,,,0.8696886157989502,306.2728,1.306,0.327,2674.74
-,0.29,,none,2,2e-5,,,mistralai/Mistral-7B-v0.1,1,,,8,,,1.0190681648254394,1094.7748,0.731,0.091,2993.127
-,0.29,,none,3,2e-5,,,mistralai/Mistral-7B-v0.1,2,,,4,,,0.8909366416931153,572.0158,1.399,0.175,2864.256
-,,,none,4,2e-5,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,,4,,,,,,,
-,,,none,5,2e-5,,,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,,2,,,,,,,
-,,,none,6,2e-5,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,,8,,,,,,,
-,,,none,7,2e-5,,,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,,4,,,,,,,
-,,,none,8,2e-5,,,NousResearch/Llama-2-70b-hf,1,,,4,,,,,,,
-,,,none,9,2e-5,,,NousResearch/Llama-2-70b-hf,2,,,2,,,,,,,
-,,,none,10,2e-5,,,NousResearch/Llama-2-70b-hf,1,,,8,,,,,,,
-,,,none,11,2e-5,,,NousResearch/Llama-2-70b-hf,2,,,4,,,,,,,
-,0.15,,none,12,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,1,,lora,4,16,q_proj k_proj v_proj o_proj,0.8808393669128418,458.0185,0.873,0.218,3577.148
-,0.15,,none,13,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,2,,lora,2,16,q_proj k_proj v_proj o_proj,0.8548675441741943,259.6061,1.541,0.385,3155.55
-,0.29,,none,14,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,1,,lora,8,16,q_proj k_proj v_proj o_proj,1.007005090713501,915.9053,0.873,0.109,3577.662
-,0.29,,none,15,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,2,,lora,4,16,q_proj k_proj v_proj o_proj,0.8773036098480225,480.6995,1.664,0.208,3408.367
-,,,none,16,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,lora,4,16,q_proj k_proj v_proj o_proj,,,,,
-,0.15,,none,17,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,lora,2,16,q_proj k_proj v_proj o_proj,0.862400369644165,535.3534,0.747,0.187,1530.204
-,,,none,18,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,lora,8,16,q_proj k_proj v_proj o_proj,,,,,
-,0.29,,none,19,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,lora,4,16,q_proj k_proj v_proj o_proj,0.8798200416564942,924.5333,0.865,0.108,1772.137
-,,,none,20,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,1,,lora,4,16,q_proj k_proj v_proj o_proj,,,,,
-,,,none,21,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,2,,lora,2,16,q_proj k_proj v_proj o_proj,,,,,
-,,,none,22,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,1,,lora,8,16,q_proj k_proj v_proj o_proj,,,,,
-,,,none,23,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,2,,lora,4,16,q_proj k_proj v_proj o_proj,,,,,
-sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,0.15,True,accelerated-peft-bnb,24,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,1,,lora,4,16,q_proj k_proj v_proj o_proj,0.8661054801940918,481.8265,0.83,0.208,3400.394
-sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,0.15,True,accelerated-peft-bnb,25,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,2,,lora,2,16,q_proj k_proj v_proj o_proj,0.8560933685302734,271.0715,1.476,0.369,3022.081
-sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,0.29,True,accelerated-peft-bnb,26,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,1,,lora,8,16,q_proj k_proj v_proj o_proj,0.8718929100036621,951.8817,0.84,0.105,3442.445
-sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,0.29,True,accelerated-peft-bnb,27,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,2,,lora,4,16,q_proj k_proj v_proj o_proj,0.8511034965515136,498.9262,1.603,0.2,3283.852
-sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,0.15,True,accelerated-peft-bnb,28,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,lora,4,16,q_proj k_proj v_proj o_proj,0.8973640727996827,908.6145,0.44,0.11,1803.185
-sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,0.15,True,accelerated-peft-bnb,29,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,lora,2,16,q_proj k_proj v_proj o_proj,0.8554682540893555,548.0391,0.73,0.182,1494.784
-sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,0.29,True,accelerated-peft-bnb,30,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,,lora,8,16,q_proj k_proj v_proj o_proj,0.8935444927215577,1714.3117,0.467,0.058,1911.438
-sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,0.29,True,accelerated-peft-bnb,31,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,,lora,4,16,q_proj k_proj v_proj o_proj,0.8596937179565429,954.0851,0.838,0.105,1717.247
-sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,0.14,True,accelerated-peft-bnb,32,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,1,,lora,4,16,q_proj k_proj v_proj o_proj,1.000812177658081,3696.2907,0.108,0.027,443.255
-sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,0.14,True,accelerated-peft-bnb,33,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,2,,lora,2,16,q_proj k_proj v_proj o_proj,0.9307080173492431,1960.7862,0.204,0.051,417.792
-sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,,True,accelerated-peft-bnb,34,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,1,,lora,8,16,q_proj k_proj v_proj o_proj,,,,,
-sample-configurations/accelerated-peft-bnb-nf4-sample-configuration.yaml,0.28,True,accelerated-peft-bnb,35,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,2,,lora,4,16,q_proj k_proj v_proj o_proj,0.9387501430511475,3809.1796,0.21,0.026,430.119
-sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,0.15,True,accelerated-peft-autogptq,36,2e-4,16,0.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,,lora,4,16,q_proj k_proj v_proj o_proj,0.9700051403045654,478.8299,0.835,0.209,3421.675
-sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,0.15,True,accelerated-peft-autogptq,37,2e-4,16,0.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,,lora,2,16,q_proj k_proj v_proj o_proj,0.9695001697540283,270.0251,1.481,0.37,3033.792
-sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,0.29,True,accelerated-peft-autogptq,38,2e-4,16,0.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,,lora,8,16,q_proj k_proj v_proj o_proj,0.9514076042175293,946.5715,0.845,0.106,3461.756
-sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,0.29,True,accelerated-peft-autogptq,39,2e-4,16,0.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,,lora,4,16,q_proj k_proj v_proj o_proj,0.9824443531036376,496.6611,1.611,0.201,3298.829
-sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,0.15,True,accelerated-peft-autogptq,40,2e-4,16,0.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,,lora,4,16,q_proj k_proj v_proj o_proj,0.9041421699523926,872.5836,0.458,0.115,1877.643
-sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,0.15,True,accelerated-peft-autogptq,41,2e-4,16,0.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,,lora,2,16,q_proj k_proj v_proj o_proj,0.9010070323944092,499.3435,0.801,0.2,1640.554
-sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,0.29,True,accelerated-peft-autogptq,42,2e-4,16,0.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,,lora,8,16,q_proj k_proj v_proj o_proj,0.9001609039306641,1666.1579,0.48,0.06,1966.68
-sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,0.29,True,accelerated-peft-autogptq,43,2e-4,16,0.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,,lora,4,16,q_proj k_proj v_proj o_proj,0.8965495491027832,897.4939,0.891,0.111,1825.528
-sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,0.14,True,accelerated-peft-autogptq,44,2e-4,16,0.0,TheBloke/Nous-Hermes-Llama2-70B-GPTQ,1,,lora,4,16,q_proj k_proj v_proj o_proj,0.9533391189575195,3621.8261,0.11,0.028,452.368
-sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,0.14,True,accelerated-peft-autogptq,45,2e-4,16,0.0,TheBloke/Nous-Hermes-Llama2-70B-GPTQ,2,,lora,2,16,q_proj k_proj v_proj o_proj,0.9467405033111572,1886.6815,0.212,0.053,434.202
-sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,,True,accelerated-peft-autogptq,46,2e-4,16,0.0,TheBloke/Nous-Hermes-Llama2-70B-GPTQ,1,,lora,8,16,q_proj k_proj v_proj o_proj,,,,,
-sample-configurations/accelerated-peft-autogptq-sample-configuration.yaml,,True,accelerated-peft-autogptq,47,2e-4,16,0.0,TheBloke/Nous-Hermes-Llama2-70B-GPTQ,2,,lora,4,16,q_proj k_proj v_proj o_proj,,,,,
+epoch,fp16,framework_config,index,learning_rate,lora_alpha,lora_dropout,model_name_or_path,num_gpus,nvidia_mem_reserved,peak_torch_mem_alloc_in_bytes,peft_method,per_device_train_batch_size,r,target_modules,torch_mem_alloc_in_bytes,train_loss,train_runtime,train_samples_per_second,train_steps_per_second,train_tokens_per_second
+0.04,,none,0,2e-5,,,mistralai/Mistral-7B-v0.1,1,77705.0,72971724288.0,,4,,,44004763136.0,0.9278398831685384,177.1092,0.678,0.169,2775.237
+0.04,,none,1,2e-5,,,mistralai/Mistral-7B-v0.1,2,44706.0,36762859520.0,,2,,,29521119232.0,0.8970902442932129,91.086,1.317,0.329,2698.11
+0.09,,none,2,2e-5,,,mistralai/Mistral-7B-v0.1,1,74383.0,72972117504.0,,8,,,44005156352.0,0.9879656155904134,322.458,0.744,0.093,3048.583
+0.09,,none,3,2e-5,,,mistralai/Mistral-7B-v0.1,2,53907.0,36763056128.0,,4,,,29521315840.0,0.9259945551554362,167.7727,1.431,0.179,2929.678
+,,none,4,2e-5,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,81043.0,,,4,,,,,,,,
+,,none,5,2e-5,,,mistralai/Mixtral-8x7B-Instruct-v0.1,2,79353.0,,,2,,,,,,,,
+,,none,6,2e-5,,,mistralai/Mixtral-8x7B-Instruct-v0.1,1,81043.0,,,8,,,,,,,,
+,,none,7,2e-5,,,mistralai/Mixtral-8x7B-Instruct-v0.1,2,79827.0,,,4,,,,,,,,
+,,none,8,2e-5,,,NousResearch/Llama-2-70b-hf,1,80837.0,,,4,,,,,,,,
+,,none,9,2e-5,,,NousResearch/Llama-2-70b-hf,2,80830.0,,,2,,,,,,,,
+,,none,10,2e-5,,,NousResearch/Llama-2-70b-hf,1,80837.0,,,8,,,,,,,,
+,,none,11,2e-5,,,NousResearch/Llama-2-70b-hf,2,80834.5,,,4,,,,,,,,
+0.04,,none,12,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,1,29731.0,26108963328.0,lora,4,16,q_proj k_proj v_proj o_proj,15119590912.0,0.9096682230631511,136.624,0.878,0.22,3597.611
+0.04,,none,13,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,2,18697.0,15123161088.0,lora,2,16,q_proj k_proj v_proj o_proj,7850391552.0,0.8918854713439941,82.0311,1.463,0.366,2995.936
+0.09,,none,14,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,1,43195.0,37098695168.0,lora,8,16,q_proj k_proj v_proj o_proj,15119984128.0,0.962119706471761,270.6301,0.887,0.111,3632.412
+0.09,,none,15,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,2,26235.0,21433753600.0,lora,4,16,q_proj k_proj v_proj o_proj,7850588160.0,0.9218235015869141,143.8184,1.669,0.209,3417.643
+,,none,16,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,80955.0,,lora,4,16,q_proj k_proj v_proj o_proj,,,,,,
+0.04,,none,17,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,62617.0,57540387840.0,lora,2,16,q_proj k_proj v_proj o_proj,47311452160.0,0.9361546834309896,179.3128,0.669,0.167,1370.566
+,,none,18,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,80955.0,,lora,8,16,q_proj k_proj v_proj o_proj,,,,,,
+0.09,,none,19,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,69848.0,64347637760.0,lora,4,16,q_proj k_proj v_proj o_proj,47311648768.0,0.9383139928181966,280.8919,0.854,0.107,1749.855
+,,none,20,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,1,80917.0,,lora,4,16,q_proj k_proj v_proj o_proj,,,,,,
+,,none,21,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,2,80894.0,,lora,2,16,q_proj k_proj v_proj o_proj,,,,,,
+,,none,22,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,1,80917.0,,lora,8,16,q_proj k_proj v_proj o_proj,,,,,,
+,,none,23,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,2,80979.0,,lora,4,16,q_proj k_proj v_proj o_proj,,,,,,
+0.04,True,baseline-peft-bnb,24,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,1,27023.0,22825932800.0,lora,4,16,q_proj k_proj v_proj o_proj,5368221184.0,0.9589527130126954,178.8061,0.671,0.168,2748.9
+0.04,True,baseline-peft-bnb,25,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,2,13530.0,9974622720.0,lora,2,16,q_proj k_proj v_proj o_proj,2727018496.0,0.9154380798339844,87.3652,1.374,0.343,2813.02
+0.09,True,baseline-peft-bnb,26,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,1,47145.0,40278956032.0,lora,8,16,q_proj k_proj v_proj o_proj,5368614400.0,0.9702634493509928,341.2286,0.703,0.088,2880.884
+0.09,True,baseline-peft-bnb,27,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,2,21502.0,16587205120.0,lora,4,16,q_proj k_proj v_proj o_proj,2727215104.0,0.914565912882487,149.9341,1.601,0.2,3278.241
+0.04,True,baseline-peft-bnb,28,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,48313.0,46419968512.0,lora,4,16,q_proj k_proj v_proj o_proj,25726225920.0,0.9744932492574055,351.8623,0.341,0.085,1396.91
+0.04,True,baseline-peft-bnb,29,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,25549.0,21922782720.0,lora,2,16,q_proj k_proj v_proj o_proj,13219233792.0,0.9303209940592448,171.4299,0.7,0.175,1433.589
+0.09,True,baseline-peft-bnb,30,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,69931.0,67089150464.0,lora,8,16,q_proj k_proj v_proj o_proj,25726619136.0,0.9745417594909668,629.837,0.381,0.048,1560.785
+0.09,True,baseline-peft-bnb,31,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,32957.0,29384115200.0,lora,4,16,q_proj k_proj v_proj o_proj,13219430400.0,0.9310146331787109,300.5119,0.799,0.1,1635.609
+,True,baseline-peft-bnb,32,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,1,80893.0,,lora,4,16,q_proj k_proj v_proj o_proj,,,,,,
+0.04,True,baseline-peft-bnb,33,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,2,52634.0,46524471808.0,lora,2,16,q_proj k_proj v_proj o_proj,19172741120.0,1.0399916648864747,584.3145,0.205,0.051,420.595
+,True,baseline-peft-bnb,34,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,1,79557.0,,lora,8,16,q_proj k_proj v_proj o_proj,,,,,,
+,True,baseline-peft-bnb,35,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,2,80749.0,,lora,4,16,q_proj k_proj v_proj o_proj,,,,,,
+0.04,True,accelerated-peft-bnb,36,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,1,19931.0,15860019712.0,lora,4,16,q_proj k_proj v_proj o_proj,4843384320.0,0.9652111371358235,143.3569,0.837,0.209,3428.645
+0.04,True,accelerated-peft-bnb,37,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,2,13497.0,9974622720.0,lora,2,16,q_proj k_proj v_proj o_proj,2727018496.0,0.9277165730794271,86.4307,1.388,0.347,2843.435
+0.09,True,accelerated-peft-bnb,38,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,1,34355.0,26849751552.0,lora,8,16,q_proj k_proj v_proj o_proj,4843777536.0,0.9493892669677735,279.7156,0.858,0.107,3514.427
+0.09,True,accelerated-peft-bnb,39,2e-4,16,0.0,mistralai/Mistral-7B-v0.1,2,21479.0,16587205120.0,lora,4,16,q_proj k_proj v_proj o_proj,2727215104.0,0.9110882759094239,149.3914,1.607,0.201,3290.15
+0.04,True,accelerated-peft-bnb,40,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,38405.0,36218024448.0,lora,4,16,q_proj k_proj v_proj o_proj,25201389056.0,0.9741149584452311,278.5888,0.431,0.108,1764.32
+0.04,True,accelerated-peft-bnb,41,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,25592.0,21906697728.0,lora,2,16,q_proj k_proj v_proj o_proj,13219233792.0,0.9300654411315918,172.7359,0.695,0.174,1422.75
+0.09,True,accelerated-peft-bnb,42,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,1,50875.0,47207756288.0,lora,8,16,q_proj k_proj v_proj o_proj,25201782272.0,0.9748441060384114,512.2298,0.469,0.059,1919.139
+0.09,True,accelerated-peft-bnb,43,2e-4,16,0.0,mistralai/Mixtral-8x7B-Instruct-v0.1,2,32957.0,29369087488.0,lora,4,16,q_proj k_proj v_proj o_proj,13219430400.0,0.9301350593566895,287.6381,0.834,0.104,1708.814
+0.04,True,accelerated-peft-bnb,44,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,1,72829.0,68159977472.0,lora,4,16,q_proj k_proj v_proj o_proj,37346815488.0,1.118430455525716,1075.2044,0.112,0.028,457.141
+0.04,True,accelerated-peft-bnb,45,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,2,52632.0,46524471808.0,lora,2,16,q_proj k_proj v_proj o_proj,19172741120.0,1.040946865081787,586.651,0.205,0.051,418.92
+,True,accelerated-peft-bnb,46,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,1,80405.0,,lora,8,16,q_proj k_proj v_proj o_proj,,,,,,
+,True,accelerated-peft-bnb,47,2e-4,16,0.0,NousResearch/Llama-2-70b-hf,2,80954.0,,lora,4,16,q_proj k_proj v_proj o_proj,,,,,,
+0.04,True,accelerated-peft-autogptq,48,2e-4,16,0.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,20453.0,15890329088.0,lora,4,16,q_proj k_proj v_proj o_proj,4873693696.0,1.3805528958638509,151.0359,0.795,0.199,3254.326
+0.04,True,accelerated-peft-autogptq,49,2e-4,16,0.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,17198.0,9952175616.0,lora,2,16,q_proj k_proj v_proj o_proj,3005709312.0,1.1706618309020995,87.4109,1.373,0.343,2811.548
+0.09,True,accelerated-peft-autogptq,50,2e-4,16,0.0,TheBloke/Mistral-7B-v0.1-GPTQ,1,34247.0,26880060928.0,lora,8,16,q_proj k_proj v_proj o_proj,4874086912.0,1.2741642634073893,282.6391,0.849,0.106,3478.076
+0.09,True,accelerated-peft-autogptq,51,2e-4,16,0.0,TheBloke/Mistral-7B-v0.1-GPTQ,2,24783.0,16262768128.0,lora,4,16,q_proj k_proj v_proj o_proj,3005905920.0,1.043952751159668,152.5473,1.573,0.197,3222.083
+0.04,True,accelerated-peft-autogptq,52,2e-4,16,0.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,37461.0,35528093184.0,lora,4,16,q_proj k_proj v_proj o_proj,24511457792.0,0.9936613400777181,263.6066,0.455,0.114,1864.597
+0.04,True,accelerated-peft-autogptq,53,2e-4,16,0.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,46641.0,25708175360.0,lora,2,16,q_proj k_proj v_proj o_proj,12788874240.0,0.9420519828796386,167.065,0.718,0.18,1471.045
+0.09,True,accelerated-peft-autogptq,54,2e-4,16,0.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,1,49925.0,46517825024.0,lora,8,16,q_proj k_proj v_proj o_proj,24511851008.0,0.9855653127034505,498.9022,0.481,0.06,1970.406
+0.09,True,accelerated-peft-autogptq,55,2e-4,16,0.0,TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ,2,52358.0,27739090432.0,lora,4,16,q_proj k_proj v_proj o_proj,12789070848.0,0.9389812151590983,281.8034,0.852,0.106,1744.195
+0.04,True,accelerated-peft-autogptq,56,2e-4,16,0.0,TheBloke/Llama-2-70B-GPTQ,1,71565.0,65895347200.0,lora,4,16,q_proj k_proj v_proj o_proj,36290144768.0,1.0755928039550782,1060.8387,0.113,0.028,463.331
+0.04,True,accelerated-peft-autogptq,57,2e-4,16,0.0,TheBloke/Llama-2-70B-GPTQ,2,80387.0,45397678592.0,lora,2,16,q_proj k_proj v_proj o_proj,18649885696.0,1.0256956418355305,576.0422,0.208,0.052,426.635
+,True,accelerated-peft-autogptq,58,2e-4,16,0.0,TheBloke/Llama-2-70B-GPTQ,1,80293.0,,lora,8,16,q_proj k_proj v_proj o_proj,,,,,,
+0.08,True,accelerated-peft-autogptq,59,2e-4,16,0.0,TheBloke/Llama-2-70B-GPTQ,2,80363.0,70667573760.0,lora,4,16,q_proj k_proj v_proj o_proj,18650082304.0,1.0266701062520345,1089.3291,0.22,0.028,451.214
diff --git a/scripts/benchmarks/scenarios.yaml b/scripts/benchmarks/scenarios.yaml
index e79a74e6..248eacb2 100644
--- a/scripts/benchmarks/scenarios.yaml
+++ b/scripts/benchmarks/scenarios.yaml
@@ -32,6 +32,23 @@ scenarios:
                 - 'mistralai/Mixtral-8x7B-Instruct-v0.1'
                 - 'NousResearch/Llama-2-70b-hf'
 
+    -   name: baseline-peft-bnb
+        framework_config: 
+            - baseline-peft-bnb
+        arguments:
+            fp16: True
+            learning_rate: 2e-4
+            torch_dtype: float16
+            peft_method: lora
+            r: 16
+            lora_alpha: 16
+            lora_dropout: 0.0
+            target_modules: ["q_proj", "k_proj", "v_proj", "o_proj"]
+            model_name_or_path: 
+                - 'mistralai/Mistral-7B-v0.1'
+                - 'mistralai/Mixtral-8x7B-Instruct-v0.1'
+                - 'NousResearch/Llama-2-70b-hf'
+
     -   name: accelerated-peft-bnb
         framework_config: 
             - accelerated-peft-bnb
@@ -64,4 +81,4 @@ scenarios:
             model_name_or_path: 
                 - 'TheBloke/Mistral-7B-v0.1-GPTQ'
                 - 'TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ'
-                - TheBloke/Nous-Hermes-Llama2-70B-GPTQ
\ No newline at end of file
+                - 'TheBloke/Llama-2-70B-GPTQ'
\ No newline at end of file
diff --git a/scripts/generate_sample_configurations.py b/scripts/generate_sample_configurations.py
index cd354cbf..67ad4058 100644
--- a/scripts/generate_sample_configurations.py
+++ b/scripts/generate_sample_configurations.py
@@ -141,6 +141,7 @@ def read_configuration(path: str) -> Dict:
 # specified key path, with the value.
 KEY_AUTO_GPTQ = "auto_gptq"
 KEY_BNB_NF4 = "bnb-nf4"
+KEY_BNB_NF4_BASELINE = "baseline-bnb-nf4"
 
 CONFIGURATIONS = {
     KEY_AUTO_GPTQ: "plugins/accelerated-peft/configs/autogptq.yaml",
@@ -148,6 +149,13 @@ def read_configuration(path: str) -> Dict:
         "plugins/accelerated-peft/configs/bnb.yaml",
         [("peft.quantization.bitsandbytes.quant_type", "nf4")],
     ),
+    KEY_BNB_NF4_BASELINE: (
+        "plugins/accelerated-peft/configs/bnb.yaml",
+        [
+            ("peft.quantization.bitsandbytes.quant_type", "nf4"), 
+            ("peft.quantization.bitsandbytes.no_peft_model", True), 
+        ],
+    ),
 }
 
 # list of (tag, combi) tuples
@@ -157,7 +165,8 @@ def read_configuration(path: str) -> Dict:
 #   config.
 COMBINATIONS = [
     ("accelerated-peft-autogptq", (KEY_AUTO_GPTQ,)),
-    # ("accelerated-peft-bnb-nf4", (KEY_BNB_NF4,)),
+    ("accelerated-peft-bnb-nf4", (KEY_BNB_NF4,)),
+    ("baseline-peft-bnb-nf4", (KEY_BNB_NF4_BASELINE,)),
 ]
 
 
diff --git a/scripts/run_benchmarks.sh b/scripts/run_benchmarks.sh
index a281cc53..e08125b3 100644
--- a/scripts/run_benchmarks.sh
+++ b/scripts/run_benchmarks.sh
@@ -27,9 +27,18 @@ SCNTAG_PEFT_AUTOGPTQ=accelerated-peft-gptq
 # data will be cached in here
 DATA_CACHE=data/cache.json
 
+# final result placed here
+BENCH_RESULT_FILE=benchmarks.csv
+
+# freeze the pip requirements here
+PIP_REQUIREMENTS_FILE=requirements.txt
+
+# ------------- DROP COLUMNS FRO RESULTS -----------------
 # env inputs
 DRY_RUN=${DRY_RUN:-"false"}
 NO_DATA_PROCESSING=${NO_DATA_PROCESSING:-"false"}
+NO_OVERWRITE=${NO_OVERWRITE:-"false"}
+MEMORY_LOGGING=${MEMORY_LOGGING:-"huggingface"}
 
 # inputs
 NUM_GPUS_MATRIX=${1-"1 2"}
@@ -41,12 +50,27 @@ echo "NUM_GPUS_MATRIX: $NUM_GPUS_MATRIX"
 echo "RESULT_DIR: $RESULT_DIR"
 echo "SCENARIOS_CONFIG: $SCENARIOS_CONFIG"
 echo "SCENARIOS_FILTER: $SCENARIOS_FILTER"
+echo "MEMORY_LOGGING: $MEMORY_LOGGING"
+
+if [ -n "$RESULT_DIR" ]; then
+    echo "The results directory is not empty. "
+    if [ "$NO_OVERWRITE" = "true" ]; then 
+        echo "Results dir $RESULT_DIR is not empty, but NO_OVERWRITE=true"
+        echo "If intending to overwrite please delete the folder manually"
+        echo "or do not set NO_OVERWRITE"
+        exit 1
+    fi
+    echo "Deleting $RESULT_DIR"
+    rm -rf $RESULT_DIR
+fi
 
 # tag on the directories
 SCENARIOS_CONFIG=$WORKING_DIR/$SCENARIOS_CONFIG
 DEFAULTS_CONFIG=$WORKING_DIR/$DEFAULTS_CONFIG
 ACCELERATE_CONFIG=$WORKING_DIR/$ACCELERATE_CONFIG
 DATA_CACHE=$RESULT_DIR/$DATA_CACHE
+BENCH_RESULT_FILE=$RESULT_DIR/$BENCH_RESULT_FILE
+PIP_REQUIREMENTS_FILE=$RESULT_DIR/$PIP_REQUIREMENTS_FILE
 
 # ------------- EXTRA ARGS -----------------
 
@@ -65,6 +89,17 @@ if [ "$NO_DATA_PROCESSING" = "true" ]; then
     EXTRA_ARGS="$EXTRA_ARGS --no_data_processing"
 fi
 
+if [ "$MEMORY_LOGGING" = "huggingface" ]; then 
+    EXTRA_ARGS="$EXTRA_ARGS --log_memory_hf"
+elif [ "$MEMORY_LOGGING" = "nvidia" ]; then 
+    EXTRA_ARGS="$EXTRA_ARGS --log_nvidia_smi"
+elif [ "$MEMORY_LOGGING" = "all" ]; then 
+    EXTRA_ARGS="$EXTRA_ARGS --log_nvidia_smi --log_memory_hf"
+fi
+
+# dump out the environment
+pip freeze > $PIP_REQUIREMENTS_FILE
+
 # run the bench
 python $WORKING_DIR/benchmark.py \
    --num_gpus $NUM_GPUS_MATRIX \
@@ -73,3 +108,24 @@ python $WORKING_DIR/benchmark.py \
    --defaults_config_path $DEFAULTS_CONFIG \
    --dataset_save_path $DATA_CACHE \
    --results_output_path $RESULT_DIR $EXTRA_ARGS
+
+# produce the final CSV for checkin
+# need to set PYTHONPATH because there is an import inside
+# this will write to the BENCH_RESULT_FILE
+# Remove the columns with values already represented by other metrics in the summary report
+PYTHONPATH=. \
+    python $WORKING_DIR/display_bench_results.py benchmark_outputs \
+    --result_file $BENCH_RESULT_FILE \
+    --remove_columns \
+        'before_init_mem_cpu' \
+        'before_init_mem_gpu' \
+        'init_mem_cpu_alloc_delta' \
+        'init_mem_cpu_peaked_delta' \
+        'init_mem_gpu_alloc_delta' \
+        'init_mem_gpu_peaked_delta' \
+        'train_mem_cpu_alloc_delta' \
+        'train_mem_cpu_peaked_delta' \
+        'train_mem_gpu_alloc_delta' \
+        'train_mem_gpu_peaked_delta' \
+        'acceleration_framework_config_file'
+
diff --git a/scripts/verify_generated_configurations.sh b/scripts/verify_generated_configurations.sh
new file mode 100755
index 00000000..83344796
--- /dev/null
+++ b/scripts/verify_generated_configurations.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+OUTPUT_DIR=${1:-sample-configurations}
+
+GIT_DIFF=$(git diff HEAD -- $OUTPUT_DIR)
+echo "git diff of configurations with HEAD:"
+echo "$GIT_DIFF"
+
+function echoWarning() {
+  LIGHT_YELLOW='\033[1;33m'
+  NC='\033[0m' # No Color
+  echo -e "${LIGHT_YELLOW}${1}${NC}"
+}
+
+if [ ! -z "$GIT_DIFF" ]; then
+    echoWarning "At least one of the configs in the plugins should have changed."
+    echoWarning "Please run 'tox -e gen-configs' to ensure that the sample-configurations are correctly generated!"
+    echoWarning "After that commit the generated sample-configurations to remove this error."
+    exit 1
+fi
+
+echo "sample configurations up to date with configs in plugin directories"
diff --git a/tox.ini b/tox.ini
index b9f48607..d719cb3e 100644
--- a/tox.ini
+++ b/tox.ini
@@ -9,22 +9,30 @@ skip_install = true
 commands = 
     python scripts/generate_sample_configurations.py {posargs:sample-configurations}
 
+[testenv:verify-configs]
+description = verify that sample configurations for all plugins are properly generated
+skip_install = true
+commands = 
+    bash scripts/verify_generated_configurations.sh {posargs:sample-configurations}
+allowlist_externals = bash
+
 # put this here first, consider moving it later
 [testenv:run-benches]
 description = run benchmarks 
 skip_install = true
+deps = 
+    packaging # this is required for flash-attn dep as fms_hf_tuning did not specify
+    -e {toxinidir}/plugins/framework # install the framework here as the flash attention deps requires torch
+passenv = * # will pass the parent env, otherwise there are too many envs e.g. TRANSFORMERS that need to be set
 commands = 
     # need a version of fms-hf-tuning that has integrated the framework
     # NOTE: have to install this first coz havnt merged
     # - this repo has a lot of pins, so we just install it first
-    pip install "fms-hf-tuning[flash-attn] @ git+https://github.com/fabianlim/fms-hf-tuning.git@acceleration-framework"
+    pip install "fms-hf-tuning[flash-attn] @ git+https://github.com/fabianlim/fms-hf-tuning.git@"{env:FHT_BRANCH:main}
 
     # some models need this for tokenizers
     pip install protobuf
 
-    # install the framework
-    pip install -e {toxinidir}/plugins/framework
-
     # install the plugins for test
     # NOTE: when there are more plugins install here
     python -m fms_acceleration.cli install -e {toxinidir}/plugins/accelerated-peft