Use contiguous pa by default (#519)

HabanaAI · Nov 18, 2024 · a4e689a · a4e689a
1 parent dac5d80
commit a4e689a
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py
@@ -642,7 +642,7 @@ def __init__(
         self._setup_buckets()
         self._set_gc_threshold()
         self.use_contiguous_pa = os.environ.get('VLLM_CONTIGUOUS_PA',
-                                                'false').lower() == 'true'
+                                                'true').lower() == 'true'
         # For multi-step scheduling
         self.cached_step_outputs: List[torch.Tensor] = []