Fixed ALiBi (HabanaAI#254)

Fixed ALiB and [MPT-7B](https://www.databricks.com/blog/mpt-7b) model. Accuracy results comparing to CPU(collected using [EleutherAI](https://github.com/EleutherAI/lm-evaluation-harness)) | Tasks | CPU | HPU | | -------------- | ------ | ------ | | arc_challenge | 0.4224 | 0.4189 | | arc_easy | 0.6974 | 0.6999 | | hellaswag | 0.7603 | 0.7626 | | lambada_openai | 0.7306 | 0.7326 | | mmlu | 0.293 | 0.2925 | | winogrande | 0.6851 | 0.6811 |
zhouyu5 · Sep 20, 2024 · 4729d85 · 4729d85
1 parent a5904b6
commit 4729d85
Showing 1 changed file with 7 additions and 12 deletions.
diff --git a/vllm/attention/backends/habana_attn.py b/vllm/attention/backends/habana_attn.py
@@ -108,17 +108,10 @@ def __init__(
         self.v_cache = VLLMKVCache()
         self.num_kv_heads = num_heads if num_kv_heads is None else num_kv_heads
         self.sliding_window = sliding_window
-        self.position_bias = None
         self.alibi_slopes = alibi_slopes
         if alibi_slopes is not None:
-            # FIXME(kzawora): Need a general method to set max_seq_len on
-            # per-model basis.
             alibi_slopes_tensor = torch.tensor(alibi_slopes,
                                                dtype=torch.bfloat16)
-            self.position_bias = _make_alibi_bias(alibi_slopes_tensor,
-                                                  num_kv_heads,
-                                                  alibi_slopes_tensor.dtype,
-                                                  max_seq_len)
             self.alibi_slopes = alibi_slopes_tensor
         assert self.num_heads % self.num_kv_heads == 0
         self.num_queries_per_kv = self.num_heads // self.num_kv_heads
@@ -190,11 +183,13 @@ def forward(
                 assert attn_metadata.attn_bias is not None, \
                         'attn_bias must be set before calling model.forward!'
                 attn_bias = attn_metadata.attn_bias
-                if self.alibi_slopes is not None and \
-                    self.position_bias is not None:
-                    attn_bias.add_(self.position_bias[:, :,
-                                                      -attn_bias.size(2):,
-                                                      -attn_bias.size(3):])
+                if self.alibi_slopes is not None:
+                    position_bias = _make_alibi_bias(self.alibi_slopes,
+                                                     self.num_kv_heads,
+                                                     attn_bias.dtype,
+                                                     attn_bias.shape[-1])
+                    attn_bias = attn_bias.tile((1, self.num_kv_heads, 1, 1))
+                    attn_bias.add_(position_bias)
             else:
                 attn_bias = None