Skip to content

Commit

Permalink
Run with HPU graphs even when warmup was skipped
Browse files Browse the repository at this point in the history
  • Loading branch information
madamczykhabana committed Sep 23, 2024
1 parent 084db0f commit c80c18a
Showing 1 changed file with 5 additions and 1 deletion.
6 changes: 5 additions & 1 deletion vllm/worker/habana_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,8 @@ def _set_gc_threshold(self) -> None:
self.multi_modal_input_mapper = MULTIMODAL_REGISTRY \
.create_input_mapper(self.model_config)

self.skip_warmup = os.environ.get('VLLM_SKIP_WARMUP', 'false').lower() == 'true'

def load_model(self) -> None:
import habana_frameworks.torch.core as htcore
if self.model_config.quantization == 'inc':
Expand Down Expand Up @@ -647,6 +649,8 @@ def load_model(self) -> None:
def _use_graphs(self, batch_size, seq_len, is_prompt):
if self.enforce_eager:
return False
if self.skip_warmup:
return True
return (batch_size, seq_len, is_prompt) in self.graphed_buckets

def _is_valid_bucket(self, bucket):
Expand Down Expand Up @@ -1501,7 +1505,7 @@ def warmup_model(self, kv_caches: List[torch.Tensor]) -> None:
self.warmup_scenario(int(bs), int(seq_len), is_prompt, kv_caches,
True)
raise AssertionError("Finished profiling")
if os.environ.get('VLLM_SKIP_WARMUP', 'false').lower() == 'true':
if self.skip_warmup:
logger.info("Skipping warmup...")
return
self.profiler.start('internal', 'warmup')
Expand Down

0 comments on commit c80c18a

Please sign in to comment.