diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py index def57fd0965ef..b51f6c1a88f0d 100755 --- a/vllm/worker/hpu_model_runner.py +++ b/vllm/worker/hpu_model_runner.py @@ -296,11 +296,11 @@ def _set_block_mapping(self, metadata, batch_size, device, dtype): attn_bias = (torch.zeros_like(mask, dtype=dtype).masked_fill_( mask, -math.inf)) - if not is_fake_hpu() and htorch.utils.internal.is_lazy(): + if not is_fake_hpu(): block_mapping = torch.nn.functional.one_hot(metadata.block_groups, num_classes=batch_size) else: - # Unfortunately one_hot on CPU/torch.compile mode/eager mode + # Unfortunately one_hot on CPU # doesn't handle out of bounds classes so we need to convert # all negative values to 0 (block_mapping) or bs (block_groups) block_groups = metadata.block_groups.to(torch.long)