From a4e689a54592aae3a51052791b2f581315ca324d Mon Sep 17 00:00:00 2001 From: Michal Adamczyk Date: Mon, 18 Nov 2024 10:26:53 +0100 Subject: [PATCH] Use contiguous pa by default (#519) --- vllm/worker/hpu_model_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py index 97ad0a6893dd4..93ff84f64f89c 100755 --- a/vllm/worker/hpu_model_runner.py +++ b/vllm/worker/hpu_model_runner.py @@ -642,7 +642,7 @@ def __init__( self._setup_buckets() self._set_gc_threshold() self.use_contiguous_pa = os.environ.get('VLLM_CONTIGUOUS_PA', - 'false').lower() == 'true' + 'true').lower() == 'true' # For multi-step scheduling self.cached_step_outputs: List[torch.Tensor] = []