From 748a9c760c168f0aa6813651ed871745fadd36dd Mon Sep 17 00:00:00 2001
From: ZX-ModelCloud <165115237+ZX-ModelCloud@users.noreply.github.com>
Date: Mon, 23 Dec 2024 12:30:36 +0800
Subject: [PATCH] [FIX] vl model test (#953)

* Use quant_override_files["preprocessor_config.json"] to process input data

* qwen_vl use sample size 1

* add debug log

* Revert "add debug log"

This reverts commit 105b9e692b31bacd31372df9beaeed9dc14a43cf.

* When calling OvisModel.generate(), you need to pass in max_new_tokens.

* cleanup
---
 gptqmodel/models/base.py                   |  2 +-
 gptqmodel/models/definitions/qwen2_vl.py   | 19 +++++++++++++++++--
 tests/models/ovis/image_to_test_dataset.py |  2 +-
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/gptqmodel/models/base.py b/gptqmodel/models/base.py
index c5de0c658..2833f8d3e 100644
--- a/gptqmodel/models/base.py
+++ b/gptqmodel/models/base.py
@@ -466,7 +466,7 @@ def store_input_hook(_, args, kwargs):
                     example[k] = move_to(v, cur_layer_device)
             try:
                 if is_ovis:
-                    self.generate(inputs=example.pop("input_ids"), **example)
+                    self.generate(inputs=example.pop("input_ids"), max_new_tokens=1024, **example)
                 else:
                     self.model(**example)
             except ValueError:
diff --git a/gptqmodel/models/definitions/qwen2_vl.py b/gptqmodel/models/definitions/qwen2_vl.py
index 000ec0dcb..b5ffc2706 100644
--- a/gptqmodel/models/definitions/qwen2_vl.py
+++ b/gptqmodel/models/definitions/qwen2_vl.py
@@ -1,7 +1,9 @@
+import os.path
+import shutil
 from typing import Dict, Optional
 from PIL import Image
 
-from transformers import AutoModelForVision2Seq, AutoProcessor
+from transformers import AutoModelForVision2Seq, AutoProcessor, AutoTokenizer
 
 from ..base import BaseGPTQModel
 from ...utils.calibration import batched
@@ -82,7 +84,20 @@ def prepare_dataset(
             calibration_dataset,
             batch_size: int = 1,
             tokenizer=None, ):
-        processor = AutoProcessor.from_pretrained(self.model_id_or_path)
+        import tempfile
+        import json
+
+        if tokenizer is None:
+            tokenizer = AutoTokenizer.from_pretrained(self.model_id_or_path)
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            chat_template_file = os.path.join(self.model_id_or_path, "chat_template.json")
+            if os.path.exists(chat_template_file):
+                shutil.copyfile(chat_template_file, os.path.join(tmp_dir, "chat_template.json"))
+            tokenizer.save_pretrained(tmp_dir)
+            with open(os.path.join(tmp_dir, "preprocessor_config.json"), "w") as f:
+                f.write(json.dumps(self.quant_override_files["preprocessor_config.json"]))
+            processor = AutoProcessor.from_pretrained(tmp_dir)
         calib_data = []
         for batch in batched(calibration_dataset, batch_size, process_func=self.preprocess_dataset):
             text = processor.apply_chat_template(
diff --git a/tests/models/ovis/image_to_test_dataset.py b/tests/models/ovis/image_to_test_dataset.py
index 22645c4eb..bc0eccecb 100644
--- a/tests/models/ovis/image_to_test_dataset.py
+++ b/tests/models/ovis/image_to_test_dataset.py
@@ -47,6 +47,6 @@ def get_calib_dataset(model):
         return prepare_dataset(format_ovis_dataset, n_sample=20)
 
     if isinstance(model, Qwen2VLGPTQ):
-        return prepare_dataset(format_qwen2_vl_dataset, n_sample=20)
+        return prepare_dataset(format_qwen2_vl_dataset, n_sample=1)
 
     raise NotImplementedError(f"Unsupported MODEL: {model.__class__}")