Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multimodality fix for llava #641

Open
wants to merge 7 commits into
base: habana_main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 29 additions & 6 deletions vllm/worker/hpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@
from vllm.model_executor.models import supports_multimodal
from vllm.model_executor.sampling_metadata import SequenceGroupToSample
from vllm.multimodal import (MULTIMODAL_REGISTRY, BatchedTensorInputs,
MultiModalKwargs, MultiModalRegistry)
MultiModalKwargs, MultiModalPlaceholderMap,
MultiModalRegistry)
from vllm.sampling_params import SamplingParams
from vllm.sequence import (CompletionSequenceGroupOutput, IntermediateTensors,
Logprob, SequenceData, SequenceGroupMetadata,
Expand Down Expand Up @@ -804,6 +805,9 @@
query_lens: List[int] = []
prefix_block_tables: List[List[int]] = []
multi_modal_kwargs_list: List[MultiModalKwargs] = []
multi_modal_placeholder_maps: Dict[
str, MultiModalPlaceholderMap] = collections.defaultdict(
MultiModalPlaceholderMap)

if len(seq_group_metadata_list) == 0:
return PreparePromptMetadata.empty()
Expand Down Expand Up @@ -861,11 +865,25 @@
# is always the first token in the sequence.
input_positions.append(list(range(context_len, seq_len)))

mm_data = seq_group_metadata.multi_modal_data
if mm_data:
mm_kwargs = self.multi_modal_input_mapper(mm_data)
if seq_group_metadata.multi_modal_data:
positions = input_positions[0]
mm_data, placeholder_maps = MultiModalPlaceholderMap \
.from_seq_group(seq_group_metadata, range(positions[0], positions[0] + len(positions)))

if self.mm_registry.has_processor(self.model_config):
mm_kwargs = mm_data
else:
mm_kwargs = self.multi_modal_input_mapper(
mm_data,
seq_group_metadata.mm_processor_kwargs,
)

multi_modal_kwargs_list.append(mm_kwargs)

Check failure on line 881 in vllm/worker/hpu_model_runner.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (E501)

vllm/worker/hpu_model_runner.py:881:81: E501 Line too long (107 > 80)

for modality, placeholder_map in placeholder_maps.items():
multi_modal_placeholder_maps[modality].extend(
placeholder_map)

if seq_group_metadata.block_tables is None:
# During memory profiling, the block tables are not initialized
# yet. In this case, we just use a dummy slot mapping.
Expand Down Expand Up @@ -968,6 +986,12 @@
dtype=torch.long,
device='cpu')

placeholder_index_maps = {
modality: placeholder_map.index_map()
for modality, placeholder_map in
multi_modal_placeholder_maps.items()
}

# Note: num_prefill_tokens is calculated using the length of
# input_tokens after padding.
num_prefill_tokens = input_tokens_tensor.numel()
Expand Down Expand Up @@ -1001,8 +1025,7 @@
num_prefill_tokens=num_prefill_tokens,
num_decode_tokens=0,
slot_mapping=slot_mapping,
multi_modal_placeholder_index_maps=
None # FIXME(kzawora): mutli-modality will not work here
multi_modal_placeholder_index_maps=placeholder_index_maps
)
multi_modal_kwargs = MultiModalKwargs.batch(multi_modal_kwargs_list)
for t in multi_modal_kwargs:
Expand Down
Loading