diff --git a/plugins/accelerated-peft/requirements.txt b/plugins/accelerated-peft/requirements.txt index a00233d3..237f9092 100644 --- a/plugins/accelerated-peft/requirements.txt +++ b/plugins/accelerated-peft/requirements.txt @@ -6,3 +6,8 @@ accelerate >= 0.29 # bitsandbytes for the BNB plugin bitsandbytes + +# Used to manage the thread limit in functions for converting old +# GPTQ models to new GPTQ model format that support symmetrical=False +# https://github.com/AutoGPTQ/AutoGPTQ/pull/640 +threadpoolctl \ No newline at end of file diff --git a/plugins/accelerated-peft/tests/test_gptqmodel.py b/plugins/accelerated-peft/tests/test_gptqmodel.py index 99abf154..a56cbc9a 100644 --- a/plugins/accelerated-peft/tests/test_gptqmodel.py +++ b/plugins/accelerated-peft/tests/test_gptqmodel.py @@ -219,7 +219,7 @@ def test_quantizing_pretrained_model_outputs_match( calibration_dataset = get_wikitext2(tokenizer, num_samples=128, seqlen=128) quant_config_kwargs = { "bits": 4, - "group_size": -1, + "group_size": 64, "desc_act": True, "damp_percent": 0.1, "static_groups": False, @@ -286,13 +286,13 @@ def test_quantizing_pretrained_model_outputs_match( # Measure the distribution error with KD Loss # flatten as a single batch bs*seqlen # since batchmean sums the loss and averages on dim=0 - loss_fn = torch.nn.KLDivLoss(reduction="batchmean") + loss_fn = torch.nn.KLDivLoss(reduction="sum") # input should be a distribution in the log space input = torch.nn.functional.log_softmax(refactored_logits, dim=-1) - input = torch.flatten(input, start_dim=0, end_dim=1) + input = input.view(BS*SEQLEN, -1) # target must be prob distribution target = torch.nn.functional.softmax(original_logits, dim=-1) - target = torch.flatten(target, start_dim=0, end_dim=1) + target = target.view(BS*SEQLEN, -1) error = loss_fn(input, target) assert error.lt( LOSS_TOLERANCE diff --git a/plugins/accelerated-peft/tests/test_q4_triton.py b/plugins/accelerated-peft/tests/test_q4_triton.py index 33927a7d..ba5d0674 100644 --- a/plugins/accelerated-peft/tests/test_q4_triton.py +++ b/plugins/accelerated-peft/tests/test_q4_triton.py @@ -55,7 +55,7 @@ def test_generation_desc_act_false(self): else: raise ValueError("Did not find a tritonv2 linear layer") - tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True) inp = tokenizer(prompt, return_tensors="pt").to("cuda:0") @@ -101,7 +101,7 @@ def test_generation_desc_act_true(self): else: raise ValueError("Did not find a tritonv2 linear layer") - tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True) inp = tokenizer(prompt, return_tensors="pt").to(device) diff --git a/plugins/accelerated-peft/tox.ini b/plugins/accelerated-peft/tox.ini index cdcf221d..2e81324d 100644 --- a/plugins/accelerated-peft/tox.ini +++ b/plugins/accelerated-peft/tox.ini @@ -5,13 +5,14 @@ envlist = py, lint, fmt, build, twinecheck deps = pytest>=7 # for the tests, we need to install the deps ourselves - # as the package will install the github version + # as the package will install the github version -e {toxinidir}/../framework # set skip package installation as it will install package pyproject.toml before deps, will throw error when AutoGPTQ needs torch skip_install = true commands = # install the current package pip install --no-deps {toxinidir} + pip install threadpoolctl protobuf sentencepiece # these packages are required for some tests pytest {posargs:tests} [testenv:lint]