From 4729d8502e4895084c191396b6d78ad2b2d176a9 Mon Sep 17 00:00:00 2001 From: Ilia Taraban Date: Thu, 12 Sep 2024 11:51:05 +0200 Subject: [PATCH] Fixed ALiBi (#254) Fixed ALiB and [MPT-7B](https://www.databricks.com/blog/mpt-7b) model. Accuracy results comparing to CPU(collected using [EleutherAI](https://github.com/EleutherAI/lm-evaluation-harness)) | Tasks | CPU | HPU | | -------------- | ------ | ------ | | arc_challenge | 0.4224 | 0.4189 | | arc_easy | 0.6974 | 0.6999 | | hellaswag | 0.7603 | 0.7626 | | lambada_openai | 0.7306 | 0.7326 | | mmlu | 0.293 | 0.2925 | | winogrande | 0.6851 | 0.6811 | --- vllm/attention/backends/habana_attn.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/vllm/attention/backends/habana_attn.py b/vllm/attention/backends/habana_attn.py index 20b0f2bc7630b..56b71a431aca7 100644 --- a/vllm/attention/backends/habana_attn.py +++ b/vllm/attention/backends/habana_attn.py @@ -108,17 +108,10 @@ def __init__( self.v_cache = VLLMKVCache() self.num_kv_heads = num_heads if num_kv_heads is None else num_kv_heads self.sliding_window = sliding_window - self.position_bias = None self.alibi_slopes = alibi_slopes if alibi_slopes is not None: - # FIXME(kzawora): Need a general method to set max_seq_len on - # per-model basis. alibi_slopes_tensor = torch.tensor(alibi_slopes, dtype=torch.bfloat16) - self.position_bias = _make_alibi_bias(alibi_slopes_tensor, - num_kv_heads, - alibi_slopes_tensor.dtype, - max_seq_len) self.alibi_slopes = alibi_slopes_tensor assert self.num_heads % self.num_kv_heads == 0 self.num_queries_per_kv = self.num_heads // self.num_kv_heads @@ -190,11 +183,13 @@ def forward( assert attn_metadata.attn_bias is not None, \ 'attn_bias must be set before calling model.forward!' attn_bias = attn_metadata.attn_bias - if self.alibi_slopes is not None and \ - self.position_bias is not None: - attn_bias.add_(self.position_bias[:, :, - -attn_bias.size(2):, - -attn_bias.size(3):]) + if self.alibi_slopes is not None: + position_bias = _make_alibi_bias(self.alibi_slopes, + self.num_kv_heads, + attn_bias.dtype, + attn_bias.shape[-1]) + attn_bias = attn_bias.tile((1, self.num_kv_heads, 1, 1)) + attn_bias.add_(position_bias) else: attn_bias = None