diff --git a/vllm/model_executor/model_loader/loader.py b/vllm/model_executor/model_loader/loader.py index b03e6aca48c0e..b3274b6d95115 100644 --- a/vllm/model_executor/model_loader/loader.py +++ b/vllm/model_executor/model_loader/loader.py @@ -59,7 +59,7 @@ def device_loading_context(module: torch.nn.Module, # Store original device states and move parameters to GPU if they're on CPU for name, p in module.named_parameters(): - if p.device.type == "cpu": + if p.device.type == "cpu" and target_device.type != 'hpu': original_device_states[name] = p.device p.data = p.data.to(target_device) # Parameters already on target device are not touched diff --git a/vllm/platforms/hpu.py b/vllm/platforms/hpu.py index feddce69ac5b4..170cfff94f90d 100644 --- a/vllm/platforms/hpu.py +++ b/vllm/platforms/hpu.py @@ -1,18 +1,11 @@ -from typing import Optional - import torch -from .interface import DeviceCapability, Platform, PlatformEnum +from .interface import Platform, PlatformEnum class HpuPlatform(Platform): _enum = PlatformEnum.HPU - @staticmethod - def get_device_capability( - device_id: int = 0) -> Optional[DeviceCapability]: - raise RuntimeError("HPU does not have device capability.") - @staticmethod def inference_mode(): return torch.no_grad()