fix mypy errors

HabanaAI · Dec 6, 2024 · 48ab12b · 48ab12b
1 parent d8f395e
commit 48ab12b
Show file tree

Hide file tree

Showing 3 changed files with 4 additions and 22 deletions.
diff --git a/vllm/config.py b/vllm/config.py
@@ -1132,7 +1132,7 @@ class SchedulerConfig:
     # Maximum number of prefill sequences to be
     # processed in a single iteration. Used only with padding-aware 
     # scheduling.
-    max_num_prefill_seqs: Optional[int] = None,
+    max_num_prefill_seqs: Optional[int] = None
 
     # If True, scheduler will consider padded
     # tokens in prefill.

diff --git a/vllm/model_executor/layers/spec_decode_base_sampler.py b/vllm/model_executor/layers/spec_decode_base_sampler.py
@@ -30,19 +30,6 @@ def __init__(self, strict_mode: bool = False):
         self.num_emitted_tokens: Optional[torch.Tensor] = None
         self.num_draft_tokens: int = 0
 
-    def init_tensors(self,
-                     device: Union[int, str],
-                     device_type: str = 'cuda') -> None:
-        assert self.num_accepted_tokens is None
-        if isinstance(device, int):
-            device = f"{device_type}:{device}"
-        self.num_accepted_tokens = torch.tensor(0,
-                                                dtype=torch.long,
-                                                device=device)
-        self.num_emitted_tokens = torch.tensor(0,
-                                               dtype=torch.long,
-                                               device=device)
-
     def init_tensors(self,
                      device: Union[int, str],
                      device_type: Union[torch.device, str] = 'cuda') -> None:

diff --git a/vllm/spec_decode/metrics.py b/vllm/spec_decode/metrics.py
@@ -78,14 +78,6 @@ def __init__(self,
         self._rejsample_metrics_collect_interval_s = collect_interval_s
         self._last_metrics_collect_time = self._timer()
 
-    def init_tensors(self, rank: int, device: torch.device) -> None:
-        self._rank = rank
-        if device.type == 'hpu':
-            import habana_frameworks.torch as htorch
-            self._copy_stream = htorch.hpu.Stream()
-        else:
-            self._copy_stream = torch.cuda.Stream()
-
     def init_tensors(self,
                      rank: int,
                      device_type: Union[torch.device, str] = 'cuda') -> None:
@@ -94,6 +86,9 @@ def init_tensors(self,
             device_type = device_type.type
         if device_type == 'cuda':
             self._copy_stream = torch.cuda.Stream()
+        elif device_type == 'hpu':
+            import habana_frameworks.torch as htorch
+            self._copy_stream = htorch.hpu.Stream()
 
     def maybe_collect_rejsample_metrics(
             self, k: int) -> Optional[SpecDecodeWorkerMetrics]: