Skip to content

Commit b7e7ee9

Browse files
committed
fix setup
Signed-off-by: Bill Nell <bnell@redhat.com>
1 parent d7ee088 commit b7e7ee9

File tree

1 file changed

+8
-2
lines changed

1 file changed

+8
-2
lines changed

vllm/model_executor/layers/fused_moe/all2all_utils.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,15 +135,21 @@ def maybe_make_prepare_finalize(
135135
block_shape=quant_config.block_shape,
136136
)
137137

138+
in_dtype = (
139+
quant_config.quant_dtype
140+
if quant_config.quant_dtype is not None
141+
else moe.in_dtype
142+
)
143+
138144
all_to_all_args = dict(
139145
max_num_tokens=moe.max_num_tokens,
140146
num_experts=moe.num_experts,
141147
num_experts_per_token=moe.experts_per_token,
142148
expert_padding=1, # TODO: tests use 1 or 16
143149
hidden_dim=moe.hidden_dim,
144150
hidden_dim_scale=hidden_dim_scale,
145-
in_dtype=moe.in_dtype,
146-
out_dtype=moe.in_dtype, # or quant type?
151+
in_dtype=in_dtype,
152+
out_dtype=in_dtype,
147153
scale_dtype=torch.float32,
148154
max_private_tokens=None, # For tuning
149155
)

0 commit comments

Comments
 (0)