Skip to content

Commit

Permalink
fix wrong backend on shard_quantized()
Browse files Browse the repository at this point in the history
  • Loading branch information
ZX-ModelCloud committed Jul 23, 2024
1 parent 88392c7 commit 8978393
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion gptqmodel/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,7 @@ def shard_quantized(cls,
# Here, the CPU is always used, so you need to skip it.
quantized_model = cls.from_quantized(quantized_model_path_or_id,
device="cpu",
backend=BACKEND.TRITON,
backend=BACKEND.AUTO,
use_safetensors=use_safetensors,
safetensors_metadata=safetensors_metadata,
model_basename=model_base_name,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_shard_quantized.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,4 @@ def test_again_save_quantized_model(self):

print("catch exception:", raise_exception.exception)

self.assertTrue('Saving a quantized model again is not supported' in str(raise_exception.exception))
self.assertTrue('Saving a loaded quantized model is not supported' in str(raise_exception.exception))

0 comments on commit 8978393

Please sign in to comment.