Skip to content

Commit

Permalink
fix mypy errors
Browse files Browse the repository at this point in the history
  • Loading branch information
kzawora-intel committed Dec 6, 2024
1 parent d8f395e commit 48ab12b
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 22 deletions.
2 changes: 1 addition & 1 deletion vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1132,7 +1132,7 @@ class SchedulerConfig:
# Maximum number of prefill sequences to be
# processed in a single iteration. Used only with padding-aware
# scheduling.
max_num_prefill_seqs: Optional[int] = None,
max_num_prefill_seqs: Optional[int] = None

# If True, scheduler will consider padded
# tokens in prefill.
Expand Down
13 changes: 0 additions & 13 deletions vllm/model_executor/layers/spec_decode_base_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,6 @@ def __init__(self, strict_mode: bool = False):
self.num_emitted_tokens: Optional[torch.Tensor] = None
self.num_draft_tokens: int = 0

def init_tensors(self,
device: Union[int, str],
device_type: str = 'cuda') -> None:
assert self.num_accepted_tokens is None
if isinstance(device, int):
device = f"{device_type}:{device}"
self.num_accepted_tokens = torch.tensor(0,
dtype=torch.long,
device=device)
self.num_emitted_tokens = torch.tensor(0,
dtype=torch.long,
device=device)

def init_tensors(self,
device: Union[int, str],
device_type: Union[torch.device, str] = 'cuda') -> None:
Expand Down
11 changes: 3 additions & 8 deletions vllm/spec_decode/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,6 @@ def __init__(self,
self._rejsample_metrics_collect_interval_s = collect_interval_s
self._last_metrics_collect_time = self._timer()

def init_tensors(self, rank: int, device: torch.device) -> None:
self._rank = rank
if device.type == 'hpu':
import habana_frameworks.torch as htorch
self._copy_stream = htorch.hpu.Stream()
else:
self._copy_stream = torch.cuda.Stream()

def init_tensors(self,
rank: int,
device_type: Union[torch.device, str] = 'cuda') -> None:
Expand All @@ -94,6 +86,9 @@ def init_tensors(self,
device_type = device_type.type
if device_type == 'cuda':
self._copy_stream = torch.cuda.Stream()
elif device_type == 'hpu':
import habana_frameworks.torch as htorch
self._copy_stream = htorch.hpu.Stream()

def maybe_collect_rejsample_metrics(
self, k: int) -> Optional[SpecDecodeWorkerMetrics]:
Expand Down

0 comments on commit 48ab12b

Please sign in to comment.