add additional entry to requirements.txt

foundation-model-stack · Jul 8, 2024 · 37eb9d7 · 37eb9d7
1 parent 1017224
commit 37eb9d7
Show file tree

Hide file tree

Showing 4 changed files with 13 additions and 7 deletions.
diff --git a/plugins/accelerated-peft/requirements.txt b/plugins/accelerated-peft/requirements.txt
@@ -6,3 +6,8 @@ accelerate >= 0.29
 
 # bitsandbytes for the BNB plugin
 bitsandbytes
+
+# Used to manage the thread limit in functions for converting old 
+# GPTQ models to new GPTQ model format that support symmetrical=False
+# https://github.com/AutoGPTQ/AutoGPTQ/pull/640
+threadpoolctl
diff --git a/plugins/accelerated-peft/tests/test_gptqmodel.py b/plugins/accelerated-peft/tests/test_gptqmodel.py
@@ -219,7 +219,7 @@ def test_quantizing_pretrained_model_outputs_match(
     calibration_dataset = get_wikitext2(tokenizer, num_samples=128, seqlen=128)
     quant_config_kwargs = {
         "bits": 4,
-        "group_size": -1,
+        "group_size": 64,
         "desc_act": True,
         "damp_percent": 0.1,
         "static_groups": False,
@@ -286,13 +286,13 @@ def test_quantizing_pretrained_model_outputs_match(
     # Measure the distribution error with KD Loss
     # flatten as a single batch bs*seqlen
     # since batchmean sums the loss and averages on dim=0
-    loss_fn = torch.nn.KLDivLoss(reduction="batchmean")
+    loss_fn = torch.nn.KLDivLoss(reduction="sum")
     # input should be a distribution in the log space
     input = torch.nn.functional.log_softmax(refactored_logits, dim=-1)
-    input = torch.flatten(input, start_dim=0, end_dim=1)
+    input = input.view(BS*SEQLEN, -1)
     # target must be prob distribution
     target = torch.nn.functional.softmax(original_logits, dim=-1)
-    target = torch.flatten(target, start_dim=0, end_dim=1)
+    target = target.view(BS*SEQLEN, -1)
     error = loss_fn(input, target)
     assert error.lt(
         LOSS_TOLERANCE

diff --git a/plugins/accelerated-peft/tests/test_q4_triton.py b/plugins/accelerated-peft/tests/test_q4_triton.py
@@ -55,7 +55,7 @@ def test_generation_desc_act_false(self):
         else:
             raise ValueError("Did not find a tritonv2 linear layer")
 
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
 
         inp = tokenizer(prompt, return_tensors="pt").to("cuda:0")
 
@@ -101,7 +101,7 @@ def test_generation_desc_act_true(self):
         else:
             raise ValueError("Did not find a tritonv2 linear layer")
 
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
 
         inp = tokenizer(prompt, return_tensors="pt").to(device)
 

diff --git a/plugins/accelerated-peft/tox.ini b/plugins/accelerated-peft/tox.ini
@@ -5,13 +5,14 @@ envlist = py, lint, fmt, build, twinecheck
 deps = 
     pytest>=7
     # for the tests, we need to install the deps ourselves
-    # as the package will install the github version
+    # as the package will install the github version    
     -e {toxinidir}/../framework
 # set skip package installation as it will install package pyproject.toml before deps, will throw error when AutoGPTQ needs torch
 skip_install = true 
 commands = 
     # install the current package
     pip install --no-deps {toxinidir}
+    pip install threadpoolctl protobuf sentencepiece # these packages are required for some tests
     pytest {posargs:tests}
 
 [testenv:lint]