diff --git a/vllm/platforms/hpu.py b/vllm/platforms/hpu.py index ee83187fff797..314cd98212e9c 100644 --- a/vllm/platforms/hpu.py +++ b/vllm/platforms/hpu.py @@ -1,7 +1,5 @@ from typing import TYPE_CHECKING, Optional -import torch - from .interface import Platform, PlatformEnum, _Backend if TYPE_CHECKING: @@ -24,10 +22,6 @@ def get_default_attn_backend(cls, selected_backend: _Backend) -> _Backend: def is_async_output_supported(cls, enforce_eager: Optional[bool]) -> bool: return True - @staticmethod - def inference_mode(): - return torch.no_grad() - @classmethod def check_and_update_config(cls, vllm_config: VllmConfig) -> None: