diff --git a/gptqmodel/models/base.py b/gptqmodel/models/base.py index 037b9c2a0..6f0d76d89 100644 --- a/gptqmodel/models/base.py +++ b/gptqmodel/models/base.py @@ -614,8 +614,7 @@ def save_quantized( ): """save quantized model and configs to local disk""" if self.from_quantized: - raise NotImplementedError("Saving a quantized model again is not supported. \n" - "If you need to shard the model file, refer to shard_quantized().") + raise NotImplementedError("Saving a loaded quantized model is not supported. If you need to re-shard the model, please use `GPTQModel.shard_quantized()` api.") os.makedirs(save_dir, exist_ok=True)