diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py index be4918c840d9b..03c5e62c8f11e 100755 --- a/vllm/worker/hpu_model_runner.py +++ b/vllm/worker/hpu_model_runner.py @@ -2241,9 +2241,12 @@ def try_revert_dummy_output_tokens(): else: raise RuntimeError( "seq_group_metadata_list is uninitialized") - # Cache the original output token ids for i, seq_group_metadata in enumerate( seq_group_metadata_list): + # Skip empty steps + seq_group_metadata.state.current_step += ( + num_steps - 2) + # Cache the original output token ids cache_orig_output_tokens_len.append({}) for j, data in seq_group_metadata.seq_data.items(): cache_orig_output_tokens_len[i][j] = \