Skip to content

Commit

Permalink
Refine INC shutdown code (#335)
Browse files Browse the repository at this point in the history
This PR removes debug printouts in INC shutdown method and covers the
case where application exits before model is initialized properly.
  • Loading branch information
kzawora-intel authored Sep 25, 2024
1 parent 9111a80 commit 8c6dcae
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 11 deletions.
3 changes: 0 additions & 3 deletions vllm/executor/habana_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,9 +195,6 @@ def check_health(self) -> None:
def shutdown(self) -> None:
self.driver_worker.shutdown_inc()

def __del__(self):
self.shutdown()


class HabanaExecutorAsync(HabanaExecutor, ExecutorAsyncBase):

Expand Down
16 changes: 11 additions & 5 deletions vllm/worker/habana_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,7 @@ def __init__(
# Lazy initialization
self.lora_manager: LRUCacheWorkerLoRAManager = None
self.model: torch.nn.Module = None
self.inc_initialized_successfully = False

# Profiler stats
self.profiler_counter_helper = HabanaProfilerCounterHelper()
Expand Down Expand Up @@ -632,6 +633,7 @@ def load_model(self) -> None:
self.model = convert(self.model, config)
htcore.hpu_initialize(self.model,
mark_only_scales_as_const=True)
self.inc_initialized_successfully = True
logger.info("Preparing model with INC took %s",
m_inc.get_summary_string())
elif not is_fake_hpu():
Expand Down Expand Up @@ -1938,14 +1940,18 @@ def execute_model(
return [output]

def shutdown_inc(self):
print('inc shutdown')
if (model_config := getattr(self, "model_config", None)) and \
getattr(model_config, "quantization", None) == 'inc':
print('inc shutdown start')
can_finalize_inc = False
from contextlib import suppress
with suppress(AttributeError):
can_finalize_inc = (self.model_config.quantization == 'inc') and \
(self.model.model is not None) and \
self.inc_initialized_successfully and \
not getattr(self, "_is_inc_finalized", False)
if can_finalize_inc:
from neural_compressor.torch.quantization import (
finalize_calibration)
finalize_calibration(self.model.model)
print('inc shutdown')
self._is_inc_finalized = True

def __del__(self):
self.shutdown_inc()
3 changes: 0 additions & 3 deletions vllm/worker/habana_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,9 +320,6 @@ def list_prompt_adapters(self) -> Set[int]:
def shutdown_inc(self):
self.model_runner.shutdown_inc()

def __del__(self):
self.shutdown_inc()

@property
def max_model_len(self) -> int:
return self.model_config.max_model_len
Expand Down

0 comments on commit 8c6dcae

Please sign in to comment.