From 4f7dca9dfd23dcc909e8fe7efa09136ca1706762 Mon Sep 17 00:00:00 2001 From: Karol Damaszke Date: Wed, 30 Oct 2024 17:37:12 +0200 Subject: [PATCH] format.sh --- vllm/model_executor/layers/sampler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/sampler.py b/vllm/model_executor/layers/sampler.py index 53d445c2ac671..1b6bc2b1848c1 100755 --- a/vllm/model_executor/layers/sampler.py +++ b/vllm/model_executor/layers/sampler.py @@ -268,7 +268,8 @@ def forward( if do_top_p_top_k and flashinfer_top_k_top_p_sampling is None: # If we have a scalar p and k, we can use the optimized version. if self._scalar_p_and_k.any(): - logits = self._apply_top_k_top_p_opt(logits, self._top_p_scalar.item(), + logits = self._apply_top_k_top_p_opt(logits, + self._top_p_scalar.item(), self._top_k_scalar.item()) else: logits = _apply_top_k_top_p(logits, sampling_tensors.top_ps,