diff --git a/vllm/worker/habana_worker.py b/vllm/worker/habana_worker.py index f2678c5e405dc..a9c391db99d04 100644 --- a/vllm/worker/habana_worker.py +++ b/vllm/worker/habana_worker.py @@ -9,6 +9,7 @@ import habana_frameworks.torch as htorch # noqa:F401 import torch import torch.distributed +from vllm_hpu_extension import HabanaMemoryProfiler from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig, ModelConfig, ObservabilityConfig, ParallelConfig, @@ -21,8 +22,8 @@ from vllm.model_executor import set_random_seed from vllm.prompt_adapter.request import PromptAdapterRequest from vllm.sequence import ExecuteModelRequest -from vllm.utils import (HabanaMemoryProfiler, format_bytes, hpu_backend_string, - hpu_device_string, is_fake_hpu) +from vllm.utils import (format_bytes, hpu_backend_string, hpu_device_string, + is_fake_hpu) from vllm.worker.cache_engine import CacheEngine from vllm.worker.habana_model_runner import HabanaModelRunner from vllm.worker.model_runner_base import ModelRunnerBase