From 92049756cc698c9c1cdf99fff387b8fb610c39ff Mon Sep 17 00:00:00 2001 From: Konrad Zawora Date: Fri, 6 Dec 2024 17:47:53 +0200 Subject: [PATCH] fix (hopefully) all linter errors --- vllm/config.py | 2 +- vllm/executor/ray_hpu_executor.py | 3 ++- vllm/lora/layers.py | 3 --- vllm/model_executor/layers/quantization/__init__.py | 2 +- vllm/platforms/hpu.py | 3 ++- vllm/utils.py | 2 -- 6 files changed, 6 insertions(+), 9 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index 901b360b2864d..d41f8752704ad 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -1130,7 +1130,7 @@ class SchedulerConfig: chunked_prefill_enabled: bool = field(init=False) # Maximum number of prefill sequences to be - # processed in a single iteration. Used only with padding-aware + # processed in a single iteration. Used only with padding-aware # scheduling. max_num_prefill_seqs: Optional[int] = None diff --git a/vllm/executor/ray_hpu_executor.py b/vllm/executor/ray_hpu_executor.py index 8a6e193c13da7..1c6825d8d365b 100644 --- a/vllm/executor/ray_hpu_executor.py +++ b/vllm/executor/ray_hpu_executor.py @@ -2,7 +2,8 @@ import os from collections import defaultdict from itertools import islice, repeat -from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Type +from typing import (TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, + Type) import msgspec diff --git a/vllm/lora/layers.py b/vllm/lora/layers.py index 13c15adc78e99..bec904c30a660 100644 --- a/vllm/lora/layers.py +++ b/vllm/lora/layers.py @@ -33,9 +33,6 @@ VocabParallelEmbedding) from vllm.platforms import current_platform -if current_platform.is_hpu(): - from vllm_hpu_extension.punica_hpu import GaudiPunicaWrapper - if TYPE_CHECKING: pass diff --git a/vllm/model_executor/layers/quantization/__init__.py b/vllm/model_executor/layers/quantization/__init__.py index cc9e45cc1d232..6f2b9f3be322c 100644 --- a/vllm/model_executor/layers/quantization/__init__.py +++ b/vllm/model_executor/layers/quantization/__init__.py @@ -50,13 +50,13 @@ def get_quantization_config(quantization: str) -> Type[QuantizationConfig]: from .gptq_marlin import GPTQMarlinConfig from .gptq_marlin_24 import GPTQMarlin24Config from .hqq_marlin import HQQMarlinConfig + from .inc import INCConfig from .ipex_quant import IPEXConfig from .marlin import MarlinConfig from .modelopt import ModelOptFp8Config from .neuron_quant import NeuronQuantConfig from .qqq import QQQConfig from .tpu_int8 import Int8TpuConfig - from .inc import INCConfig method_to_config: Dict[str, Type[QuantizationConfig]] = { "aqlm": AQLMConfig, diff --git a/vllm/platforms/hpu.py b/vllm/platforms/hpu.py index 2d77f3e0c2fb5..2b1a1dad95753 100644 --- a/vllm/platforms/hpu.py +++ b/vllm/platforms/hpu.py @@ -32,7 +32,8 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: parallel_config = vllm_config.parallel_config if parallel_config.worker_cls == "auto": if scheduler_config.is_multi_step: - parallel_config.worker_cls = "vllm.worker.multi_step_hpu_worker.MultiStepHPUWorker" + parallel_config.worker_cls = \ + "vllm.worker.multi_step_hpu_worker.MultiStepHPUWorker" elif vllm_config.speculative_config: parallel_config.worker_cls = \ "vllm.spec_decode.spec_decode_worker.create_spec_worker" diff --git a/vllm/utils.py b/vllm/utils.py index c426e58a49e55..320b14653854b 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -773,8 +773,6 @@ def is_pin_memory_available() -> bool: return False elif current_platform.is_cpu() or current_platform.is_openvino(): return False - elif (current_platform.is_hpu()): - return False return True