revert changes (ModelCloud#274)

ZX-ModelCloud · Jul 23, 2024 · f50b228 · f50b228
1 parent 13a8ad1
commit f50b228
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 2 deletions.
diff --git a/gptqmodel/models/base.py b/gptqmodel/models/base.py
@@ -1211,6 +1211,10 @@ def skip(*args, **kwargs):
         if backend == BACKEND.BITBLAS:
             from ..utils.bitblas import prepare_model_for_bitblas_load
 
+            if is_sharded:
+                raise ValueError(
+                    "The loading of sharded checkpoints with BitBLAS is currently not supported. Please raise an issue in GPTQModel repository.")
+
             # Prepare model for bitblas load.
             # If is bitblas serialized load then load directly. Otherwise, convert to bitblas.
             model = prepare_model_for_bitblas_load(

diff --git a/tests/test_sharded.py b/tests/test_sharded.py
@@ -83,7 +83,7 @@ def test_save_and_load_no_shard(self):
             print(result)
             self.assertGreater(len(result), 0)
 
-    def test_save_and_load_bitblas_shard(self):
+    def test_save_and_load_unsupports_shard(self):
         model = GPTQModel.from_quantized(
             self.MODEL_ID,
             device_map="auto",
@@ -106,7 +106,6 @@ def test_save_and_load_bitblas_shard(self):
             model = GPTQModel.from_quantized(
                 tmp_dir,
                 device_map="auto",
-                backend=BACKEND.BITBLAS,
             )
 
             tokens = model.generate(**tokenizer("1337", return_tensors="pt").to(model.device), max_new_tokens=20)[0]