diff --git a/vllm/v1/engine/llm_engine.py b/vllm/v1/engine/llm_engine.py index f805c5e69bc1c..38d95ab44bb90 100644 --- a/vllm/v1/engine/llm_engine.py +++ b/vllm/v1/engine/llm_engine.py @@ -125,7 +125,10 @@ def __init__( # Ping the tokenizer to ensure liveness if it runs in a # different process. self.tokenizer.ping() - self.detokenizer = Detokenizer(self.model_config.tokenizer) + self.detokenizer = Detokenizer( + tokenizer_name=self.model_config.tokenizer, + tokenizer_mode=self.model_config.tokenizer_mode, + trust_remote_code=self.model_config.trust_remote_code) self.generation_config_fields = _load_generation_config_dict( model_config) diff --git a/vllm/v1/tokenizer/detokenizer.py b/vllm/v1/tokenizer/detokenizer.py index e485fcc3522d9..bf1be5d54140a 100644 --- a/vllm/v1/tokenizer/detokenizer.py +++ b/vllm/v1/tokenizer/detokenizer.py @@ -42,13 +42,17 @@ class DetokenizerOutputs(msgspec.Struct): class Detokenizer: - def __init__(self, tokenizer_name: str): + def __init__(self, tokenizer_name: str, tokenizer_mode: str, + trust_remote_code: bool): # FIXME(woosuk): Currently, the detokenizer is just a hacky prototype. # For example, it does not terminate properly. We need to improve this. self.push_port = get_open_port() self.pull_port = get_open_port() - self.detokenizer = DetokenizerProc(tokenizer_name, self.push_port, - self.pull_port) + self.detokenizer = DetokenizerProc(tokenizer_name=tokenizer_name, + tokenizer_mode=tokenizer_mode, + trust_remote_code=trust_remote_code, + push_port=self.push_port, + pull_port=self.pull_port) self.detokenizer.start() self.zmq_context = zmq.Context() @@ -82,11 +86,15 @@ class DetokenizerProc(multiprocessing.Process): def __init__( self, tokenizer_name: str, + tokenizer_mode: str, + trust_remote_code: bool, pull_port: int, push_port: int, ): super().__init__() self.tokenizer_name = tokenizer_name + self.tokenizer_mode = tokenizer_mode + self.trust_remote_code = trust_remote_code # NOTE: The pull_port of the detokenizer should be the same as the # push_port of the engine. Vice versa. self.pull_port = pull_port @@ -97,7 +105,10 @@ def run(self): # not picklable. self.msgpack_encoder = msgpack.Encoder() self.msgpack_decoder = msgpack.Decoder(DetokenizerInputs) - self.tokenizer = get_tokenizer(self.tokenizer_name) + self.tokenizer = get_tokenizer( + tokenizer_name=self.tokenizer_name, + tokenizer_mode=self.tokenizer_mode, + trust_remote_code=self.trust_remote_code) # req_id -> RequestState self.request_states: Dict[str, RequestState] = {}