diff --git a/.github/workflows/cpu-test.yml b/.github/workflows/cpu-test.yml
index 529af9fc7b1ec..64bdd36a6e3eb 100644
--- a/.github/workflows/cpu-test.yml
+++ b/.github/workflows/cpu-test.yml
@@ -31,4 +31,4 @@ jobs:
         VLLM_TARGET_DEVICE=hpu python setup.py develop
     - name: cpu-test
       run: |
-        VLLM_SKIP_WARMUP=true VLLM_PROMPT_SEQ_BUCKET_MAX=128 python examples/offline_inference_fakehpu.py --fake_hpu
+        VLLM_SKIP_WARMUP=true VLLM_PROMPT_SEQ_BUCKET_MAX=128 VLLM_USE_FAKE_HPU=1 python examples/offline_inference_fakehpu.py --fake_hpu
diff --git a/vllm/config.py b/vllm/config.py
index 6acb70ad047b2..839a00ef0a4ca 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -12,7 +12,7 @@
 from vllm.tracing import is_otel_installed
 from vllm.transformers_utils.config import get_config, get_hf_text_config
 from vllm.utils import (cuda_device_count_stateless, get_cpu_memory, is_cpu,
-                        is_hip, is_hpu, is_neuron, is_openvino, is_tpu, is_xpu,
+                        is_hip, is_hpu, is_fake_hpu, is_neuron, is_openvino, is_tpu, is_xpu,
                         print_warning_once)
 
 if TYPE_CHECKING:
@@ -858,6 +858,8 @@ def __init__(self, device: str = "auto") -> None:
             # Automated device type detection
             if is_neuron():
                 self.device_type = "neuron"
+            elif is_fake_hpu():
+                self.device_type = "cpu"
             elif is_hpu():
                 self.device_type = "hpu"
             elif is_openvino():
diff --git a/vllm/utils.py b/vllm/utils.py
index ae0fe26010f06..facdb30ec8e93 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -219,6 +219,7 @@ def is_fake_hpu() -> bool:
 
 @lru_cache(maxsize=None)
 def _is_habana_frameworks_installed() -> bool:
+    if os.environ.get('VLLM_USE_FAKE_HPU', '0') != '0' : return False
     from importlib import util
     return util.find_spec('habana_frameworks') is not None
 
@@ -997,7 +998,9 @@ def cuda_device_count_stateless() -> int:
 
 
 def get_device() -> str:
-    if is_hpu():
+    if is_fake_hpu():
+        return "cpu"
+    elif is_hpu():
         return "hpu"
     return "cuda"
 
@@ -1143,7 +1146,6 @@ def _return_false():
 
 def _migrate_to_cpu():
     import habana_frameworks.torch as htorch
-    
     htorch.core.mark_step = _do_nothing
     htorch.utils.internal.is_lazy = _return_false
     torch.hpu.synchronize = _do_nothing