From de54109ce89fa2ea4e3cd9b79706db313d71d6b8 Mon Sep 17 00:00:00 2001 From: Konrad Zawora Date: Wed, 30 Oct 2024 12:46:47 +0100 Subject: [PATCH 1/2] Update hpu_model_runner.py --- vllm/worker/hpu_model_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py index 559ed33548dea..b1227f0b07d9c 100644 --- a/vllm/worker/hpu_model_runner.py +++ b/vllm/worker/hpu_model_runner.py @@ -282,7 +282,7 @@ class HpuModelAdapter(): def __init__(self, model, block_size, dtype, enforce_eager): self.model = model self.prefill_use_fusedsdpa = os.getenv('VLLM_PROMPT_USE_FUSEDSDPA', - '0').lower() in ['1', 'true'] + '1').lower() in ['1', 'true'] self.block_size = block_size self.dtype = dtype if not is_fake_hpu() and not htorch.utils.internal.is_lazy( From 9dc2395956b634b966e08764b381fee15c30bb82 Mon Sep 17 00:00:00 2001 From: Konrad Zawora Date: Wed, 30 Oct 2024 12:52:27 +0100 Subject: [PATCH 2/2] Update hpu_model_runner.py --- vllm/worker/hpu_model_runner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py index b1227f0b07d9c..a92d952c308fd 100644 --- a/vllm/worker/hpu_model_runner.py +++ b/vllm/worker/hpu_model_runner.py @@ -282,7 +282,8 @@ class HpuModelAdapter(): def __init__(self, model, block_size, dtype, enforce_eager): self.model = model self.prefill_use_fusedsdpa = os.getenv('VLLM_PROMPT_USE_FUSEDSDPA', - '1').lower() in ['1', 'true'] + '1').lower() in ['1', 'true'] \ + and not is_fake_hpu() self.block_size = block_size self.dtype = dtype if not is_fake_hpu() and not htorch.utils.internal.is_lazy(