Skip to content

Commit

Permalink
fix destructors flow and remove finish_measurements
Browse files Browse the repository at this point in the history
  • Loading branch information
nirda7 committed Oct 10, 2024
1 parent b70c1a5 commit b2e342c
Show file tree
Hide file tree
Showing 6 changed files with 9 additions and 20 deletions.
3 changes: 0 additions & 3 deletions vllm/engine/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1288,9 +1288,6 @@ def _advance_to_next_step(
seq = seq_group.seqs[0]
seq.append_token_id(sample.output_token, sample.logprobs)

def finish_measurements(self):
self.model_executor.finish_measurements()

def step(self) -> List[Union[RequestOutput, EmbeddingRequestOutput]]:
"""Performs one decoding iteration and returns newly generated results.
Expand Down
3 changes: 0 additions & 3 deletions vllm/entrypoints/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,9 +230,6 @@ def set_tokenizer(self, tokenizer: AnyTokenizer) -> None:
else:
tokenizer_group.tokenizer = get_cached_tokenizer(tokenizer)

def finish_measurements(self):
self.llm_engine.finish_measurements()

@overload # LEGACY: single (prompt + optional token ids)
def generate(
self,
Expand Down
8 changes: 4 additions & 4 deletions vllm/executor/hpu_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class HPUExecutor(ExecutorBase):
def _init_executor(self) -> None:
"""Initialize the worker and load the model."""
self._init_worker()
self.shutdown_inc = True

def _get_worker_kwargs(
self,
Expand Down Expand Up @@ -90,9 +91,6 @@ def initialize_cache(self, num_gpu_blocks: int, num_cpu_blocks) -> None:
msg = f"init_cache_engine took {cache_init_m.get_summary_string()}"
logger.info(msg)

def finish_measurements(self):
self.driver_worker.finish_measurements()

def execute_model(
self,
execute_model_req: ExecuteModelRequest) -> List[SamplerOutput]:
Expand Down Expand Up @@ -197,7 +195,9 @@ def stop_profile(self) -> None:
self.driver_worker.stop_profile()

def shutdown(self) -> None:
self.driver_worker.shutdown_inc()
if self.shutdown_inc:
self.driver_worker.shutdown_inc()
self.shutdown_inc = False


class HPUExecutorAsync(HPUExecutor, ExecutorAsyncBase):
Expand Down
5 changes: 5 additions & 0 deletions vllm/executor/ray_hpu_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,12 @@ def _init_executor(self) -> None:
self.output_decoder = msgspec.msgpack.Decoder(
Optional[List[SamplerOutput]])

self.shutdown_inc = True

def shutdown(self) -> None:
if self.shutdown_inc:
self._run_workers("shutdown_inc")
self.shutdown_inc = False
if hasattr(self, "forward_dag") and self.forward_dag is not None:
self.forward_dag.teardown()
import ray
Expand Down
7 changes: 0 additions & 7 deletions vllm/worker/hpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1779,10 +1779,6 @@ def prepare_model_input(
is_prompt=is_prompt,
virtual_engine=virtual_engine)

def finish_measurements(self):
from neural_compressor.torch.quantization import finalize_calibration
finalize_calibration(self.model.model)

def _check_config(self, batch_size, seq_len, is_prompt, warmup_mode):
cfg = (batch_size, seq_len, is_prompt)
seen = cfg in self.seen_configs
Expand Down Expand Up @@ -1993,6 +1989,3 @@ def shutdown_inc(self):
finalize_calibration)
finalize_calibration(self.model.model)
self._is_inc_finalized = True

def __del__(self):
self.shutdown_inc()
3 changes: 0 additions & 3 deletions vllm/worker/hpu_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,6 @@ def _warm_up_model(self) -> None:
# the model initialization and profiling.
set_random_seed(self.model_config.seed)

def finish_measurements(self):
self.model_runner.finish_measurements()

@property
def do_metadata_broadcast(self) -> bool:
return self.parallel_config.tensor_parallel_size > 1
Expand Down

0 comments on commit b2e342c

Please sign in to comment.