From 94858b5af49068330c6bdfeaa89b838244182f63 Mon Sep 17 00:00:00 2001 From: Michal Adamczyk Date: Wed, 30 Oct 2024 14:21:05 +0100 Subject: [PATCH] Fix default value for FSDPA (#448) --- vllm/attention/backends/hpu_attn.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/vllm/attention/backends/hpu_attn.py b/vllm/attention/backends/hpu_attn.py index 2e987b039c220..8f16081e2e2b5 100644 --- a/vllm/attention/backends/hpu_attn.py +++ b/vllm/attention/backends/hpu_attn.py @@ -16,6 +16,7 @@ from vllm.attention.ops.hpu_paged_attn import (HPUPagedAttention, HPUPagedAttentionMetadata) from vllm.logger import init_logger +from vllm.utils import is_fake_hpu logger = init_logger(__name__) @@ -120,9 +121,10 @@ def __init__( assert self.num_heads % self.num_kv_heads == 0 self.num_queries_per_kv = self.num_heads // self.num_kv_heads - self.prefill_usefusedsdpa = os.getenv('VLLM_PROMPT_USE_FUSEDSDPA', - '0').lower() in ['1', 'true'] - if self.prefill_usefusedsdpa: + self.prefill_use_fusedsdpa = os.getenv('VLLM_PROMPT_USE_FUSEDSDPA', + '1').lower() in ['1', 'true'] \ + and not is_fake_hpu() + if self.prefill_use_fusedsdpa: assert alibi_slopes is None, \ 'Prefill with FusedSDPA not supported with alibi slopes!' @@ -188,7 +190,7 @@ def forward( kv_shape = (batch_size, seq_len_kv, self.num_kv_heads, self.head_size) if attn_metadata is None or attn_metadata.block_list is None: - if not self.prefill_usefusedsdpa: + if not self.prefill_use_fusedsdpa: # TODO: move this outside of model assert attn_metadata.attn_bias is not None, \ 'attn_bias must be set before calling model.forward'