From 94858b5af49068330c6bdfeaa89b838244182f63 Mon Sep 17 00:00:00 2001
From: Michal Adamczyk <madamczyk@habana.ai>
Date: Wed, 30 Oct 2024 14:21:05 +0100
Subject: [PATCH] Fix default value for FSDPA (#448)

---
 vllm/attention/backends/hpu_attn.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/vllm/attention/backends/hpu_attn.py b/vllm/attention/backends/hpu_attn.py
index 2e987b039c220..8f16081e2e2b5 100644
--- a/vllm/attention/backends/hpu_attn.py
+++ b/vllm/attention/backends/hpu_attn.py
@@ -16,6 +16,7 @@
 from vllm.attention.ops.hpu_paged_attn import (HPUPagedAttention,
                                                HPUPagedAttentionMetadata)
 from vllm.logger import init_logger
+from vllm.utils import is_fake_hpu
 
 logger = init_logger(__name__)
 
@@ -120,9 +121,10 @@ def __init__(
         assert self.num_heads % self.num_kv_heads == 0
         self.num_queries_per_kv = self.num_heads // self.num_kv_heads
 
-        self.prefill_usefusedsdpa = os.getenv('VLLM_PROMPT_USE_FUSEDSDPA',
-                                              '0').lower() in ['1', 'true']
-        if self.prefill_usefusedsdpa:
+        self.prefill_use_fusedsdpa = os.getenv('VLLM_PROMPT_USE_FUSEDSDPA',
+                                               '1').lower() in ['1', 'true'] \
+                                               and not is_fake_hpu()
+        if self.prefill_use_fusedsdpa:
             assert alibi_slopes is None, \
                 'Prefill with FusedSDPA not supported with alibi slopes!'
 
@@ -188,7 +190,7 @@ def forward(
             kv_shape = (batch_size, seq_len_kv, self.num_kv_heads,
                         self.head_size)
             if attn_metadata is None or attn_metadata.block_list is None:
-                if not self.prefill_usefusedsdpa:
+                if not self.prefill_use_fusedsdpa:
                     # TODO: move this outside of model
                     assert attn_metadata.attn_bias is not None, \
                             'attn_bias must be set before calling model.forward'