diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
index aead579e..2ec2bbca 100644
--- a/.github/workflows/format.yml
+++ b/.github/workflows/format.yml
@@ -51,3 +51,19 @@ jobs:
         run: |
           cd plugins/${{ matrix.plugin_name }}
           tox -e py
+
+  sample-config:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python 3.9
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.9
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install tox
+      - name: Run Config Verification
+        run: tox -e verify-configs
diff --git a/plugins/framework/pyproject.toml b/plugins/framework/pyproject.toml
index 534a8eeb..8200f0bf 100644
--- a/plugins/framework/pyproject.toml
+++ b/plugins/framework/pyproject.toml
@@ -22,7 +22,7 @@ classifiers=[
     "Programming Language :: Python :: 3.11",
 ]
 dependencies = [
-  "torch<2.3",
+  "torch>2.2,<2.3",
   "transformers<4.40",
   "peft",
   "accelerate"
diff --git a/scripts/benchmarks/README.md b/scripts/benchmarks/README.md
index fdc6d7bc..c4be3268 100644
--- a/scripts/benchmarks/README.md
+++ b/scripts/benchmarks/README.md
@@ -51,6 +51,11 @@ A `scenario` has the following key components:
 
 The best way is via `tox` which manages the dependencies, including installing the correct version [fms-hf-tuning](https://github.com/foundation-model-stack/fms-hf-tuning).
 
+- install the `setup_requirements.txt` to get `tox`:
+    ```
+    pip install -r setup_requirements.txt
+    ```
+
 - run a *small* representative set of benches:
     ```
     tox -e run-benches
@@ -59,7 +64,9 @@ The best way is via `tox` which manages the dependencies, including installing t
     ```
     tox -e run-benches -- "1 2" 
     ```
-ationFramework` to demonstrate the various plugins.
+
+Note:
+- `tox` command above accepts environment variables `DRY_RUN, NO_DATA_PROCESSING, NO_OVERWRITE`. See `scripts/run_benchmarks.sh`
 
 ## Running Benchmarks
 
@@ -82,3 +89,6 @@ Alternatively run [`benchmark.py`](./benchmark.py) directly. To see the help do:
 ```
 python benchmark.py --help
 ```
+
+Note:
+- in `run_benchmarks.sh` we will clear the `RESULT_DIR` if it exists, to avoid contaimination with old results. To protect against overwrite, then always run with `NO_OVERWRITE=true`.
diff --git a/scripts/benchmarks/benchmark.py b/scripts/benchmarks/benchmark.py
index fdb4e7a8..9d05b540 100644
--- a/scripts/benchmarks/benchmark.py
+++ b/scripts/benchmarks/benchmark.py
@@ -47,7 +47,7 @@
 FILE_RESULTS = "results.json"
 FILE_SHELL_COMMAND = "command.sh"
 FILE_SCRIPT_ARGS = "script.json"
-FILE_SUMMARY_CSV = "summary.csv"
+FILE_SUMMARY_CSV = "raw_summary.csv"
 
 DIR_BENCHMARKS = os.path.dirname(os.path.realpath(__file__))
 DIR_PREFIX_EXPERIMENT = "exp"
@@ -411,7 +411,7 @@ def _escape(x: str):
         with open(self.command_filename, "w") as f:
             f.write("#!/bin/bash\n\n")
             for key, val in self.environment.items():
-                f.write(f"{key}={val}\n")
+                f.write(f"export {key}={val}\n")
             f.write(" ".join([_escape(x) for x in self.shell_command]))
 
 
diff --git a/scripts/benchmarks/display_bench_results.py b/scripts/benchmarks/display_bench_results.py
new file mode 100644
index 00000000..30b54e63
--- /dev/null
+++ b/scripts/benchmarks/display_bench_results.py
@@ -0,0 +1,50 @@
+# Standard
+import argparse
+
+# First Party
+# import this because of alot of internal contants
+from scripts.benchmarks.benchmark import gather_report
+
+
+def main(*directories: str, output_filename: str = "results.csv"):
+    "gather outputs from a list of directories and output to a csv"
+
+    df, constant = gather_report(*directories, raw=False)
+    errors = []
+    try:
+        # remove error messages if any
+        errors = df.error_messages
+        errors = errors.loc[errors.isna() == False]
+        df = df.loc[df.error_messages.isna()]
+    except:
+        pass
+    df = df.reset_index().drop("output_dir", axis=1)
+    df.reindex(sorted(df.columns), axis=1).to_csv(
+        output_filename, index=False
+    )
+    print("***************** Report Created ******************")
+    print(f"Total lines: '{len(df)}'")
+    print(f"Number columns included: '{len(df.columns)}'")
+    print(f"Number columns excluded: '{len(constant)}'")
+    print(f"Excluding number of exceptions caught: '{len(errors)}'")
+    print(f"Written report to '{output_filename}'")
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(
+        prog="Acceleration Benchmarking Reporting Tool",
+        description="This script gathers a set benchmarks to produce a CSV report",
+    )
+    parser.add_argument(
+        "bench_outputs",
+        nargs="+",
+        help="list of directories from which to gather bench outputs.",
+    )
+    parser.add_argument(
+        "--result_file",
+        default="results.csv",
+        help="name of final csv report file.",
+    )
+    args = parser.parse_args()
+    main(args.bench_outputs, output_filename=args.result_file)
diff --git a/scripts/generate_sample_configurations.py b/scripts/generate_sample_configurations.py
index cd354cbf..9f239041 100644
--- a/scripts/generate_sample_configurations.py
+++ b/scripts/generate_sample_configurations.py
@@ -157,7 +157,7 @@ def read_configuration(path: str) -> Dict:
 #   config.
 COMBINATIONS = [
     ("accelerated-peft-autogptq", (KEY_AUTO_GPTQ,)),
-    # ("accelerated-peft-bnb-nf4", (KEY_BNB_NF4,)),
+    ("accelerated-peft-bnb-nf4", (KEY_BNB_NF4,)),
 ]
 
 
diff --git a/scripts/run_benchmarks.sh b/scripts/run_benchmarks.sh
index a281cc53..758e2d9e 100644
--- a/scripts/run_benchmarks.sh
+++ b/scripts/run_benchmarks.sh
@@ -27,9 +27,16 @@ SCNTAG_PEFT_AUTOGPTQ=accelerated-peft-gptq
 # data will be cached in here
 DATA_CACHE=data/cache.json
 
+# final result placed here
+BENCH_RESULT_FILE=benchmarks.csv
+
+# freeze the pip requirements here
+PIP_REQUIREMENTS_FILE=requirements.txt
+
 # env inputs
 DRY_RUN=${DRY_RUN:-"false"}
 NO_DATA_PROCESSING=${NO_DATA_PROCESSING:-"false"}
+NO_OVERWRITE=${NO_OVERWRITE:-"false"}
 
 # inputs
 NUM_GPUS_MATRIX=${1-"1 2"}
@@ -42,11 +49,25 @@ echo "RESULT_DIR: $RESULT_DIR"
 echo "SCENARIOS_CONFIG: $SCENARIOS_CONFIG"
 echo "SCENARIOS_FILTER: $SCENARIOS_FILTER"
 
+if [ -n "$RESULT_DIR" ]; then
+    echo "The results directory is not empty. "
+    if [ "$NO_OVERWRITE" = "true" ]; then 
+        echo "Results dir $RESULT_DIR is not empty, but NO_OVERWRITE=true"
+        echo "If intending to overwrite please delete the folder manually"
+        echo "or do not set NO_OVERWRITE"
+        exit 1
+    fi
+    echo "Deleting $RESULT_DIR"
+    rm -rf $RESULT_DIR
+fi
+
 # tag on the directories
 SCENARIOS_CONFIG=$WORKING_DIR/$SCENARIOS_CONFIG
 DEFAULTS_CONFIG=$WORKING_DIR/$DEFAULTS_CONFIG
 ACCELERATE_CONFIG=$WORKING_DIR/$ACCELERATE_CONFIG
 DATA_CACHE=$RESULT_DIR/$DATA_CACHE
+BENCH_RESULT_FILE=$RESULT_DIR/$BENCH_RESULT_FILE
+PIP_REQUIREMENTS_FILE=$RESULT_DIR/$PIP_REQUIREMENTS_FILE
 
 # ------------- EXTRA ARGS -----------------
 
@@ -65,6 +86,9 @@ if [ "$NO_DATA_PROCESSING" = "true" ]; then
     EXTRA_ARGS="$EXTRA_ARGS --no_data_processing"
 fi
 
+# dump out the environment
+pip freeze > $PIP_REQUIREMENTS_FILE
+
 # run the bench
 python $WORKING_DIR/benchmark.py \
    --num_gpus $NUM_GPUS_MATRIX \
@@ -73,3 +97,10 @@ python $WORKING_DIR/benchmark.py \
    --defaults_config_path $DEFAULTS_CONFIG \
    --dataset_save_path $DATA_CACHE \
    --results_output_path $RESULT_DIR $EXTRA_ARGS
+
+# produce the final CSV for checkin
+# need to set PYTHONPATH because there is an import inside
+# this will write to the BENCH_RESULT_FILE
+PYTHONPATH=. \
+    python $WORKING_DIR/display_bench_results.py benchmark_outputs \
+    --result_file $BENCH_RESULT_FILE
diff --git a/scripts/verify_generated_configurations.sh b/scripts/verify_generated_configurations.sh
new file mode 100755
index 00000000..83344796
--- /dev/null
+++ b/scripts/verify_generated_configurations.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+OUTPUT_DIR=${1:-sample-configurations}
+
+GIT_DIFF=$(git diff HEAD -- $OUTPUT_DIR)
+echo "git diff of configurations with HEAD:"
+echo "$GIT_DIFF"
+
+function echoWarning() {
+  LIGHT_YELLOW='\033[1;33m'
+  NC='\033[0m' # No Color
+  echo -e "${LIGHT_YELLOW}${1}${NC}"
+}
+
+if [ ! -z "$GIT_DIFF" ]; then
+    echoWarning "At least one of the configs in the plugins should have changed."
+    echoWarning "Please run 'tox -e gen-configs' to ensure that the sample-configurations are correctly generated!"
+    echoWarning "After that commit the generated sample-configurations to remove this error."
+    exit 1
+fi
+
+echo "sample configurations up to date with configs in plugin directories"
diff --git a/tox.ini b/tox.ini
index b9f48607..d719cb3e 100644
--- a/tox.ini
+++ b/tox.ini
@@ -9,22 +9,30 @@ skip_install = true
 commands = 
     python scripts/generate_sample_configurations.py {posargs:sample-configurations}
 
+[testenv:verify-configs]
+description = verify that sample configurations for all plugins are properly generated
+skip_install = true
+commands = 
+    bash scripts/verify_generated_configurations.sh {posargs:sample-configurations}
+allowlist_externals = bash
+
 # put this here first, consider moving it later
 [testenv:run-benches]
 description = run benchmarks 
 skip_install = true
+deps = 
+    packaging # this is required for flash-attn dep as fms_hf_tuning did not specify
+    -e {toxinidir}/plugins/framework # install the framework here as the flash attention deps requires torch
+passenv = * # will pass the parent env, otherwise there are too many envs e.g. TRANSFORMERS that need to be set
 commands = 
     # need a version of fms-hf-tuning that has integrated the framework
     # NOTE: have to install this first coz havnt merged
     # - this repo has a lot of pins, so we just install it first
-    pip install "fms-hf-tuning[flash-attn] @ git+https://github.com/fabianlim/fms-hf-tuning.git@acceleration-framework"
+    pip install "fms-hf-tuning[flash-attn] @ git+https://github.com/fabianlim/fms-hf-tuning.git@"{env:FHT_BRANCH:main}
 
     # some models need this for tokenizers
     pip install protobuf
 
-    # install the framework
-    pip install -e {toxinidir}/plugins/framework
-
     # install the plugins for test
     # NOTE: when there are more plugins install here
     python -m fms_acceleration.cli install -e {toxinidir}/plugins/accelerated-peft