Skip to content

Commit

Permalink
fix (hopefully) all linter errors
Browse files Browse the repository at this point in the history
  • Loading branch information
kzawora-intel committed Dec 6, 2024
1 parent 48ab12b commit 9204975
Show file tree
Hide file tree
Showing 6 changed files with 6 additions and 9 deletions.
2 changes: 1 addition & 1 deletion vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1130,7 +1130,7 @@ class SchedulerConfig:
chunked_prefill_enabled: bool = field(init=False)

# Maximum number of prefill sequences to be
# processed in a single iteration. Used only with padding-aware
# processed in a single iteration. Used only with padding-aware
# scheduling.
max_num_prefill_seqs: Optional[int] = None

Expand Down
3 changes: 2 additions & 1 deletion vllm/executor/ray_hpu_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
import os
from collections import defaultdict
from itertools import islice, repeat
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Type
from typing import (TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple,
Type)

import msgspec

Expand Down
3 changes: 0 additions & 3 deletions vllm/lora/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,6 @@
VocabParallelEmbedding)
from vllm.platforms import current_platform

if current_platform.is_hpu():
from vllm_hpu_extension.punica_hpu import GaudiPunicaWrapper

if TYPE_CHECKING:
pass

Expand Down
2 changes: 1 addition & 1 deletion vllm/model_executor/layers/quantization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,13 @@ def get_quantization_config(quantization: str) -> Type[QuantizationConfig]:
from .gptq_marlin import GPTQMarlinConfig
from .gptq_marlin_24 import GPTQMarlin24Config
from .hqq_marlin import HQQMarlinConfig
from .inc import INCConfig
from .ipex_quant import IPEXConfig
from .marlin import MarlinConfig
from .modelopt import ModelOptFp8Config
from .neuron_quant import NeuronQuantConfig
from .qqq import QQQConfig
from .tpu_int8 import Int8TpuConfig
from .inc import INCConfig

method_to_config: Dict[str, Type[QuantizationConfig]] = {
"aqlm": AQLMConfig,
Expand Down
3 changes: 2 additions & 1 deletion vllm/platforms/hpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
parallel_config = vllm_config.parallel_config
if parallel_config.worker_cls == "auto":
if scheduler_config.is_multi_step:
parallel_config.worker_cls = "vllm.worker.multi_step_hpu_worker.MultiStepHPUWorker"
parallel_config.worker_cls = \
"vllm.worker.multi_step_hpu_worker.MultiStepHPUWorker"
elif vllm_config.speculative_config:
parallel_config.worker_cls = \
"vllm.spec_decode.spec_decode_worker.create_spec_worker"
Expand Down
2 changes: 0 additions & 2 deletions vllm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -773,8 +773,6 @@ def is_pin_memory_available() -> bool:
return False
elif current_platform.is_cpu() or current_platform.is_openvino():
return False
elif (current_platform.is_hpu()):
return False
return True


Expand Down

0 comments on commit 9204975

Please sign in to comment.