File tree Expand file tree Collapse file tree 1 file changed +8
-2
lines changed
vllm/model_executor/layers/fused_moe Expand file tree Collapse file tree 1 file changed +8
-2
lines changed Original file line number Diff line number Diff line change @@ -135,15 +135,21 @@ def maybe_make_prepare_finalize(
135135 block_shape = quant_config .block_shape ,
136136 )
137137
138+ in_dtype = (
139+ quant_config .quant_dtype
140+ if quant_config .quant_dtype is not None
141+ else moe .in_dtype
142+ )
143+
138144 all_to_all_args = dict (
139145 max_num_tokens = moe .max_num_tokens ,
140146 num_experts = moe .num_experts ,
141147 num_experts_per_token = moe .experts_per_token ,
142148 expert_padding = 1 , # TODO: tests use 1 or 16
143149 hidden_dim = moe .hidden_dim ,
144150 hidden_dim_scale = hidden_dim_scale ,
145- in_dtype = moe . in_dtype ,
146- out_dtype = moe . in_dtype , # or quant type?
151+ in_dtype = in_dtype ,
152+ out_dtype = in_dtype ,
147153 scale_dtype = torch .float32 ,
148154 max_private_tokens = None , # For tuning
149155 )
You can’t perform that action at this time.
0 commit comments