fix wrong backend on shard_quantized()

ZX-ModelCloud · Jul 23, 2024 · 8978393 · 8978393
1 parent 88392c7
commit 8978393
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 2 deletions.
diff --git a/gptqmodel/models/base.py b/gptqmodel/models/base.py
@@ -591,7 +591,7 @@ def shard_quantized(cls,
         # Here, the CPU is always used, so you need to skip it.
         quantized_model = cls.from_quantized(quantized_model_path_or_id,
                                              device="cpu",
-                                             backend=BACKEND.TRITON,
+                                             backend=BACKEND.AUTO,
                                              use_safetensors=use_safetensors,
                                              safetensors_metadata=safetensors_metadata,
                                              model_basename=model_base_name,

diff --git a/tests/test_shard_quantized.py b/tests/test_shard_quantized.py
@@ -45,4 +45,4 @@ def test_again_save_quantized_model(self):
 
             print("catch exception:", raise_exception.exception)
 
-            self.assertTrue('Saving a quantized model again is not supported' in str(raise_exception.exception))
+            self.assertTrue('Saving a loaded quantized model is not supported' in str(raise_exception.exception))
Original file line number	Diff line number	Diff line change
Expand Up		@@ -45,4 +45,4 @@ def test_again_save_quantized_model(self):

		print("catch exception:", raise_exception.exception)

		self.assertTrue('Saving a quantized model again is not supported' in str(raise_exception.exception))
		self.assertTrue('Saving a loaded quantized model is not supported' in str(raise_exception.exception))