pytorch · andrewor14 · Dec 22, 2025 · drisspg · Dec 22, 2025 · andrewor14
diff --git a/test/prototype/mx_formats/test_inference_workflow.py b/test/prototype/mx_formats/test_inference_workflow.py
@@ -177,10 +177,6 @@ def test_inference_workflow_nvfp4(
  # DYNAMIC mode requires SM100+, but WEIGHT_ONLY works on older GPUs
  if quant_type == "dynamic" and not is_sm_at_least_100():
  pytest.skip("CUDA capability >= 10.0 required for DYNAMIC float4 gemm")
-
- if bias and inpt_dtype == torch.float32:
- pytest.xfail("Bias is not supported when module weight is in fp32")
-
  if quant_type == "weight_only" and compile:
  pytest.skip("TODO: weight_only quant currently errors w/ compile")
  if quant_type == "weight_only" and use_triton_kernel:

diff --git a/torchao/prototype/mx_formats/inference_workflow.py b/torchao/prototype/mx_formats/inference_workflow.py
@@ -148,12 +148,6 @@ def _nvfp4_inference_linear_transform(
  f"NVFP4 only supports weight shape with last 2 dims divisible by 16, got {weight.shape}"
  )
 
- if module.bias is not None and weight.dtype == torch.float32:
- raise RuntimeError(
- "Bias is not supported when module weight is in fp32 (out_dtype=Float32). "
- "Please use bfloat16 or float16 weights, or remove the bias from the linear layer."
- )
-
  per_tensor_scale = None
  if config.use_dynamic_per_tensor_scale:
  tensor_amax = torch.max(torch.abs(weight))

diff --git a/torchao/prototype/mx_formats/nvfp4_tensor.py b/torchao/prototype/mx_formats/nvfp4_tensor.py
@@ -470,11 +470,17 @@ def _addmm_nvfp4_dispatch(
  assert b.per_tensor_scale is None and a.per_tensor_scale is None
  scale_result = None
 
- # THIS IS A WORKAROUND:
- # RuntimeError: CUDA error: CUBLAS_STATUS_INVALID_VALUE when calling
+ # THIS IS A WORKAROUND FOR TWO ERRORS:
+ #
+ # (1) RuntimeError: CUDA error: CUBLAS_STATUS_INVALID_VALUE when calling
  # When we have per-tensor scaling, we need to apply it before bias
  # since bias is not quantized
- should_add_bias_separately = (scale_result is not None) and (bias is not None)
+ #
+ # (2) RuntimeError: Bias is not supported when out_dtype is set to Float32
+ # This is not supported by _scaled_mm
+ should_add_bias_separately = (
+ scale_result is not None or a._orig_dtype == torch.float32
+ ) and (bias is not None)
  # should_add_bias_separately = bias is not None
 
  result = torch._scaled_mm(