Skip to content

Commit f0a30a0

Browse files
authored
[Bugfix] Fix qwen-moe packed_modules_mapping (vllm-project#26634)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
1 parent 9d6cff3 commit f0a30a0

File tree

3 files changed

+23
-11
lines changed

3 files changed

+23
-11
lines changed

vllm/model_executor/models/interfaces.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@ class SupportsLoRA(Protocol):
325325
# are empty by default.
326326
embedding_modules: ClassVar[dict[str, str]] = {}
327327
embedding_padding_modules: ClassVar[list[str]] = []
328-
packed_modules_mapping: ClassVar[dict[str, list[str]]] = {}
328+
packed_modules_mapping: dict[str, list[str]] = {}
329329

330330

331331
# We can't use runtime_checkable with ClassVar for issubclass checks

vllm/model_executor/models/qwen2_moe.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -534,11 +534,7 @@ class Qwen2MoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
534534
"q_proj",
535535
"k_proj",
536536
"v_proj",
537-
],
538-
"gate_up_proj": [
539-
"gate_proj",
540-
"up_proj",
541-
],
537+
]
542538
}
543539

544540
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
@@ -547,6 +543,18 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
547543
quant_config = vllm_config.quant_config
548544
self.config = config
549545
self.quant_config = quant_config
546+
# Only perform the following mapping when Qwen2MoeMLP exists
547+
if (
548+
getattr(config, "mlp_only_layers", [])
549+
or config.shared_expert_intermediate_size > 0
550+
):
551+
self.packed_modules_mapping["gate_up_proj"] = (
552+
[
553+
"gate_proj",
554+
"up_proj",
555+
],
556+
)
557+
550558
self.model = Qwen2MoeModel(
551559
vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
552560
)

vllm/model_executor/models/qwen3_moe.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -634,11 +634,7 @@ class Qwen3MoeForCausalLM(
634634
"q_proj",
635635
"k_proj",
636636
"v_proj",
637-
],
638-
"gate_up_proj": [
639-
"gate_proj",
640-
"up_proj",
641-
],
637+
]
642638
}
643639

644640
fall_back_to_pt_during_load = False
@@ -649,6 +645,14 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
649645
quant_config = vllm_config.quant_config
650646
self.config = config
651647
self.quant_config = quant_config
648+
# Only perform the following mapping when Qwen3MoeMLP exists
649+
if getattr(config, "mlp_only_layers", []):
650+
self.packed_modules_mapping["gate_up_proj"] = (
651+
[
652+
"gate_proj",
653+
"up_proj",
654+
],
655+
)
652656
self.model = Qwen3MoeModel(
653657
vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
654658
)

0 commit comments

Comments
 (0)