From c79982d177e81e3482432c93385f061e894e817e Mon Sep 17 00:00:00 2001 From: "Chendi.Xue" Date: Mon, 18 Nov 2024 09:06:03 -0600 Subject: [PATCH] [BUGFIX]fix FP8 failing issue on habana_main [PatchedVLLMKVCache fwd rror] (#502) Fix argument incompatible issue for FP8 ``` ERROR 11-11 04:29:13 engine.py:143] File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1556, in _wrapped_call_impl ERROR 11-11 04:29:13 engine.py:143] return self._call_impl(*args, **kwargs) ERROR 11-11 04:29:13 engine.py:143] File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1606, in _call_impl ERROR 11-11 04:29:13 engine.py:143] result = forward_call(*args, **kwargs) ERROR 11-11 04:29:13 engine.py:143] TypeError: PatchedVLLMKVCache.forward() missing 2 required positional arguments: 'block_indices' and 'block_offset' ``` FIX https://github.com/HabanaAI/vllm-fork/issues/453 https://github.com/HabanaAI/vllm-fork/blob/habana_main/README_GAUDI.md#troubleshooting-tweaking-hpu-graphs --- requirements-hpu.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements-hpu.txt b/requirements-hpu.txt index 29e0185060ae7..5e3cdef0e54e4 100644 --- a/requirements-hpu.txt +++ b/requirements-hpu.txt @@ -9,4 +9,5 @@ tabulate setuptools>=61 setuptools-scm>=8 vllm-hpu-extension @ git+https://github.com/HabanaAI/vllm-hpu-extension.git@3a60b49 +neural-compressor @ git+https://github.com/intel/neural-compressor.git@b196432