From 8fb04d123031f0cf1aff047d40f5928eac324a31 Mon Sep 17 00:00:00 2001 From: Tomasz Zielinski Date: Fri, 29 Nov 2024 17:58:43 +0200 Subject: [PATCH] copy output from prepare_cos_sin --- vllm/model_executor/layers/rotary_embedding.py | 2 ++ vllm/model_executor/models/llama.py | 12 ++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/rotary_embedding.py b/vllm/model_executor/layers/rotary_embedding.py index 6389a876c9e76..61c629adaeab6 100644 --- a/vllm/model_executor/layers/rotary_embedding.py +++ b/vllm/model_executor/layers/rotary_embedding.py @@ -123,6 +123,8 @@ def prepare_cos_sin(self, self.register_buffer("cos", cos, persistent=False) self.register_buffer("sin", sin, persistent=False) + return cos, sin + def _compute_inv_freq(self, base: Union[int, float]) -> torch.Tensor: """Compute the inverse frequency.""" # NOTE(woosuk): To exactly match the HF implementation, we need to diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index 7e504ff11474a..eae560337fb9f 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -339,8 +339,16 @@ def forward( residual = intermediate_tensors["residual"] if is_hpu: - for i in range(self.start_layer, self.end_layer): - self.layers[i].self_attn.rotary_emb.prepare_cos_sin(positions) + cos, sin = self.layers[0].self_attn.rotary_emb.prepare_cos_sin( + positions) + for layer in self.layers[1:]: + layer.self_attn.rotary_emb.register_buffer("cos", + cos, + persistent=False) + layer.self_attn.rotary_emb.register_buffer("sin", + sin, + persistent=False) + import habana_frameworks.torch as htorch htorch.core.mark_step()