diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py index cb2364fae03ab..3ec52c5f99f9f 100644 --- a/vllm/worker/hpu_model_runner.py +++ b/vllm/worker/hpu_model_runner.py @@ -352,7 +352,8 @@ def _update_metadata(self, attn_metadata, batch_size, seq_len, device, def forward(self, *args, **kwargs): kwargs = kwargs.copy() selected_token_indices = kwargs.pop('selected_token_indices') - warmup_mode = kwargs.pop('warmup_mode') if 'warmup_mode' in kwargs else None + warmup_mode = kwargs.pop( + 'warmup_mode') if 'warmup_mode' in kwargs else None input_ids = kwargs['input_ids'] kwargs['attn_metadata'] = self._update_metadata( kwargs['attn_metadata'], input_ids.size(0), input_ids.size(1),