From efe0268b51de321aae4fcb509146383c801ca7b3 Mon Sep 17 00:00:00 2001
From: Jan Kaniecki <jkaniecki@habana.ai>
Date: Wed, 20 Nov 2024 13:29:20 +0100
Subject: [PATCH] Skip empty steps in multi step sheduling (#526)

This change allows to skip empty steps in multistep scenario. We are
currently wasting host time on launching n-2 empty steps.
This PR removes it. The gain will be visible after device time
optimizations, as we are currently limited by HPU calculations inside
multistep.
---
 vllm/worker/hpu_model_runner.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py
index be4918c840d9b..03c5e62c8f11e 100755
--- a/vllm/worker/hpu_model_runner.py
+++ b/vllm/worker/hpu_model_runner.py
@@ -2241,9 +2241,12 @@ def try_revert_dummy_output_tokens():
                         else:
                             raise RuntimeError(
                                 "seq_group_metadata_list is uninitialized")
-                        # Cache the original output token ids
                         for i, seq_group_metadata in enumerate(
                                 seq_group_metadata_list):
+                            # Skip empty steps
+                            seq_group_metadata.state.current_step += (
+                                num_steps - 2)
+                            # Cache the original output token ids
                             cache_orig_output_tokens_len.append({})
                             for j, data in seq_group_metadata.seq_data.items():
                                 cache_orig_output_tokens_len[i][j] = \