From 0272ab68937ead7ba0b5342d3452f37b9776ff73 Mon Sep 17 00:00:00 2001
From: Konrad Zawora <kzawora@habana.ai>
Date: Tue, 10 Dec 2024 14:22:13 +0200
Subject: [PATCH 1/3] Enable padding aware scheduling by default

---
 vllm/engine/arg_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 5a64741f3b709..c3dd364382741 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -119,7 +119,8 @@ class EngineArgs:
     enable_prefix_caching: Optional[bool] = None
     disable_sliding_window: bool = False
     use_v2_block_manager: bool = True
-    use_padding_aware_scheduling: bool = False
+    use_padding_aware_scheduling: bool = False if \
+        not current_platform.is_hpu() else True
     swap_space: float = 4  # GiB
     cpu_offload_gb: float = 0  # GiB
     gpu_memory_utilization: float = 0.90

From 57c16cc87c2ec2851e54318a732225bbed92bfb5 Mon Sep 17 00:00:00 2001
From: Konrad Zawora <kzawora@habana.ai>
Date: Tue, 10 Dec 2024 14:23:29 +0200
Subject: [PATCH 2/3] Enable padding aware scheduling by default on HPU

---
 vllm/engine/arg_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index c3dd364382741..e5fc82128bb41 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -455,7 +455,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
             action='store_true',
             help=('Use padding-aware scheduling. If True, the scheduler '
                   'will consider padded tokens in prefill. '
-                  'By default this is set to False. '))
+                  'By default this is set to False on non-HPU devices. '))
         parser.add_argument(
             '--num-lookahead-slots',
             type=int,

From 72cc90972cacf4af8ea69366a6043e62b6636acf Mon Sep 17 00:00:00 2001
From: Konrad Zawora <kzawora@habana.ai>
Date: Tue, 10 Dec 2024 14:28:24 +0200
Subject: [PATCH 3/3] ruff stuff

---
 vllm/engine/arg_utils.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index e5fc82128bb41..8fd96aad25357 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -119,8 +119,7 @@ class EngineArgs:
     enable_prefix_caching: Optional[bool] = None
     disable_sliding_window: bool = False
     use_v2_block_manager: bool = True
-    use_padding_aware_scheduling: bool = False if \
-        not current_platform.is_hpu() else True
+    use_padding_aware_scheduling: bool = current_platform.is_hpu()
     swap_space: float = 4  # GiB
     cpu_offload_gb: float = 0  # GiB
     gpu_memory_utilization: float = 0.90