From a6a32ac9d74e14046d22c83d8a4ca670fce9c415 Mon Sep 17 00:00:00 2001 From: Rafal Litka Date: Tue, 17 Dec 2024 10:29:07 +0200 Subject: [PATCH 1/2] switch no_grad to inference_mode --- vllm/platforms/hpu.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/vllm/platforms/hpu.py b/vllm/platforms/hpu.py index ee83187fff797..c791ec9318fa4 100644 --- a/vllm/platforms/hpu.py +++ b/vllm/platforms/hpu.py @@ -24,10 +24,6 @@ def get_default_attn_backend(cls, selected_backend: _Backend) -> _Backend: def is_async_output_supported(cls, enforce_eager: Optional[bool]) -> bool: return True - @staticmethod - def inference_mode(): - return torch.no_grad() - @classmethod def check_and_update_config(cls, vllm_config: VllmConfig) -> None: From 58a9f90c0b3ceb71701afc2b22d7f571d6a01763 Mon Sep 17 00:00:00 2001 From: Rafal Litka Date: Tue, 17 Dec 2024 11:07:41 +0200 Subject: [PATCH 2/2] remove unused torch import --- vllm/platforms/hpu.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vllm/platforms/hpu.py b/vllm/platforms/hpu.py index c791ec9318fa4..314cd98212e9c 100644 --- a/vllm/platforms/hpu.py +++ b/vllm/platforms/hpu.py @@ -1,7 +1,5 @@ from typing import TYPE_CHECKING, Optional -import torch - from .interface import Platform, PlatformEnum, _Backend if TYPE_CHECKING: