@@ -982,6 +982,7 @@ def maybe_roundup_hidden_size(
982982 act_dtype : torch .dtype ,
983983 quant_config : QuantizationConfig | None ,
984984 moe_parallel_config : FusedMoEParallelConfig ,
985+ is_lora_enabled : bool ,
985986) -> int :
986987 """
987988 Given layer hidden size and MoE configurations, round up hidden_size
@@ -992,6 +993,9 @@ def maybe_roundup_hidden_size(
992993 act_dtype: Data type of the layer activations.
993994 quant_config: Fused MoE quantization configuration.
994995 moe_parallel_config: Fused MoE parallelization strategy configuration.
996+ is_lora_enabled: True if the engine is enabled with LoRA. This
997+ is used in the case of mxfp4 quantization in selecting the
998+ MxFP4Backend.
995999
9961000 Return:
9971001 Rounded up hidden_size if rounding up is required based on the configs.
@@ -1015,7 +1019,7 @@ def maybe_roundup_hidden_size(
10151019 get_mxfp4_backend ,
10161020 )
10171021
1018- current_mxfp4_backend = get_mxfp4_backend ()
1022+ current_mxfp4_backend = get_mxfp4_backend (is_lora_enabled )
10191023 if (
10201024 current_mxfp4_backend == Mxfp4Backend .SM90_FI_MXFP4_BF16
10211025 or current_mxfp4_backend == Mxfp4Backend .SM100_FI_MXFP4_MXFP8_CUTLASS
@@ -1139,7 +1143,11 @@ def __init__(
11391143
11401144 # Round up hidden size if needed.
11411145 hidden_size = maybe_roundup_hidden_size (
1142- hidden_size , moe_in_dtype , quant_config , self .moe_parallel_config
1146+ hidden_size ,
1147+ moe_in_dtype ,
1148+ quant_config ,
1149+ self .moe_parallel_config ,
1150+ is_lora_enabled = self .vllm_config .lora_config is not None ,
11431151 )
11441152
11451153 # For smuggling this layer into the fused moe custom op
@@ -1270,8 +1278,9 @@ def __init__(
12701278 max_num_tokens = envs .VLLM_MOE_DP_CHUNK_SIZE ,
12711279 has_bias = has_bias ,
12721280 is_act_and_mul = is_act_and_mul ,
1281+ is_lora_enabled = vllm_config .lora_config is not None ,
12731282 )
1274- self .moe_config = moe
1283+ self .moe_config : FusedMoEConfig = moe
12751284 self .moe_quant_config : FusedMoEQuantConfig | None = None
12761285 self .quant_config = quant_config
12771286
0 commit comments