HabanaAI · madamczykhabana · Dec 10, 2024 · Dec 10, 2024 · Dec 10, 2024 · Dec 10, 2024
@@ -119,7 +119,7 @@ class EngineArgs:
     enable_prefix_caching: Optional[bool] = None
     disable_sliding_window: bool = False
     use_v2_block_manager: bool = True
-    use_padding_aware_scheduling: bool = False
+    use_padding_aware_scheduling: bool = current_platform.is_hpu()
     swap_space: float = 4  # GiB
     cpu_offload_gb: float = 0  # GiB
     gpu_memory_utilization: float = 0.90
@@ -454,7 +454,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
             action='store_true',
             help=('Use padding-aware scheduling. If True, the scheduler '
                   'will consider padded tokens in prefill. '
-                  'By default this is set to False. '))
+                  'By default this is set to False on non-HPU devices. '))
         parser.add_argument(
             '--num-lookahead-slots',
             type=int,