Skip to content

Commit

Permalink
Fixed ALiBi (HabanaAI#254)
Browse files Browse the repository at this point in the history
Fixed ALiB and [MPT-7B](https://www.databricks.com/blog/mpt-7b) model.
Accuracy results comparing to CPU(collected using
[EleutherAI](https://github.com/EleutherAI/lm-evaluation-harness))

| Tasks          | CPU    | HPU    |
| -------------- | ------ | ------ |
| arc_challenge  | 0.4224 | 0.4189 |
| arc_easy       | 0.6974 | 0.6999 |
| hellaswag      | 0.7603 | 0.7626 |
| lambada_openai | 0.7306 | 0.7326 |
| mmlu           | 0.293  | 0.2925 |
| winogrande     | 0.6851 | 0.6811 |
  • Loading branch information
itaraban authored and zhouyu5 committed Sep 20, 2024
1 parent a5904b6 commit 4729d85
Showing 1 changed file with 7 additions and 12 deletions.
19 changes: 7 additions & 12 deletions vllm/attention/backends/habana_attn.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,17 +108,10 @@ def __init__(
self.v_cache = VLLMKVCache()
self.num_kv_heads = num_heads if num_kv_heads is None else num_kv_heads
self.sliding_window = sliding_window
self.position_bias = None
self.alibi_slopes = alibi_slopes
if alibi_slopes is not None:
# FIXME(kzawora): Need a general method to set max_seq_len on
# per-model basis.
alibi_slopes_tensor = torch.tensor(alibi_slopes,
dtype=torch.bfloat16)
self.position_bias = _make_alibi_bias(alibi_slopes_tensor,
num_kv_heads,
alibi_slopes_tensor.dtype,
max_seq_len)
self.alibi_slopes = alibi_slopes_tensor
assert self.num_heads % self.num_kv_heads == 0
self.num_queries_per_kv = self.num_heads // self.num_kv_heads
Expand Down Expand Up @@ -190,11 +183,13 @@ def forward(
assert attn_metadata.attn_bias is not None, \
'attn_bias must be set before calling model.forward!'
attn_bias = attn_metadata.attn_bias
if self.alibi_slopes is not None and \
self.position_bias is not None:
attn_bias.add_(self.position_bias[:, :,
-attn_bias.size(2):,
-attn_bias.size(3):])
if self.alibi_slopes is not None:
position_bias = _make_alibi_bias(self.alibi_slopes,
self.num_kv_heads,
attn_bias.dtype,
attn_bias.shape[-1])
attn_bias = attn_bias.tile((1, self.num_kv_heads, 1, 1))
attn_bias.add_(position_bias)
else:
attn_bias = None

Expand Down

0 comments on commit 4729d85

Please sign in to comment.