You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Traceback (most recent call last):
File "/data/anaconda3/envs/haolu2/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/data/anaconda3/envs/haolu2/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/data/haolu/workspace/vllm-gptq-gptq_hf/vllm/entrypoints/openai/api_server.py", line 156, in
engine = AsyncLLMEngine.from_engine_args(engine_args)
File "/data/haolu/workspace/vllm-gptq-gptq_hf/vllm/engine/async_llm_engine.py", line 344, in from_engine_args
engine = cls(parallel_config.worker_use_ray,
File "/data/haolu/workspace/vllm-gptq-gptq_hf/vllm/engine/async_llm_engine.py", line 310, in init
self.engine = self._init_engine(*args, **kwargs)
File "/data/haolu/workspace/vllm-gptq-gptq_hf/vllm/engine/async_llm_engine.py", line 415, in _init_engine
return engine_class(*args, **kwargs)
File "/data/haolu/workspace/vllm-gptq-gptq_hf/vllm/engine/llm_engine.py", line 106, in init
self.model_executor = executor_class(model_config, cache_config,
File "/data/haolu/workspace/vllm-gptq-gptq_hf/vllm/executor/gpu_executor.py", line 37, in init
self._init_worker()
File "/data/haolu/workspace/vllm-gptq-gptq_hf/vllm/executor/gpu_executor.py", line 66, in _init_worker
self.driver_worker.load_model()
File "/data/haolu/workspace/vllm-gptq-gptq_hf/vllm/worker/worker.py", line 106, in load_model
self.model_runner.load_model()
File "/data/haolu/workspace/vllm-gptq-gptq_hf/vllm/worker/model_runner.py", line 95, in load_model
self.model = get_model(
File "/data/haolu/workspace/vllm-gptq-gptq_hf/vllm/model_executor/model_loader.py", line 92, in get_model
model.load_weights(model_config.model, model_config.download_dir,
File "/data/haolu/workspace/vllm-gptq-gptq_hf/vllm/model_executor/models/qwen2.py", line 416, in load_weights
param = params_dict[name]
KeyError: 'model.layers.0.self_attn.k_proj.weight'
The text was updated successfully, but these errors were encountered:
CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --model /data/haolu/ckpt/Qwen1.5-32B-Chat_2bit_quip --trust-remote-code --port 8001 --host localhost --dtype half --max-model-len 8000
Traceback (most recent call last):
File "/data/anaconda3/envs/haolu2/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/data/anaconda3/envs/haolu2/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/data/haolu/workspace/vllm-gptq-gptq_hf/vllm/entrypoints/openai/api_server.py", line 156, in
engine = AsyncLLMEngine.from_engine_args(engine_args)
File "/data/haolu/workspace/vllm-gptq-gptq_hf/vllm/engine/async_llm_engine.py", line 344, in from_engine_args
engine = cls(parallel_config.worker_use_ray,
File "/data/haolu/workspace/vllm-gptq-gptq_hf/vllm/engine/async_llm_engine.py", line 310, in init
self.engine = self._init_engine(*args, **kwargs)
File "/data/haolu/workspace/vllm-gptq-gptq_hf/vllm/engine/async_llm_engine.py", line 415, in _init_engine
return engine_class(*args, **kwargs)
File "/data/haolu/workspace/vllm-gptq-gptq_hf/vllm/engine/llm_engine.py", line 106, in init
self.model_executor = executor_class(model_config, cache_config,
File "/data/haolu/workspace/vllm-gptq-gptq_hf/vllm/executor/gpu_executor.py", line 37, in init
self._init_worker()
File "/data/haolu/workspace/vllm-gptq-gptq_hf/vllm/executor/gpu_executor.py", line 66, in _init_worker
self.driver_worker.load_model()
File "/data/haolu/workspace/vllm-gptq-gptq_hf/vllm/worker/worker.py", line 106, in load_model
self.model_runner.load_model()
File "/data/haolu/workspace/vllm-gptq-gptq_hf/vllm/worker/model_runner.py", line 95, in load_model
self.model = get_model(
File "/data/haolu/workspace/vllm-gptq-gptq_hf/vllm/model_executor/model_loader.py", line 92, in get_model
model.load_weights(model_config.model, model_config.download_dir,
File "/data/haolu/workspace/vllm-gptq-gptq_hf/vllm/model_executor/models/qwen2.py", line 416, in load_weights
param = params_dict[name]
KeyError: 'model.layers.0.self_attn.k_proj.weight'
The text was updated successfully, but these errors were encountered: