huggingface
diff --git a/‎src/transformers/integrations/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎src/transformers/integrations/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/transformers/integrations/ggml.py‎
Lines changed: 10 additions & 0 deletions b/‎src/transformers/integrations/ggml.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎src/transformers/modeling_gguf_pytorch_utils.py‎
Lines changed: 8 additions & 0 deletions b/‎src/transformers/modeling_gguf_pytorch_utils.py‎
Lines changed: 8 additions & 0 deletions
@@ -54,6 +54,7 @@
  "finegrained_fp8": ["FP8Linear", "replace_with_fp8_linear"],
  "fsdp": ["is_fsdp_enabled", "is_fsdp_managed_module"],
  "ggml": [
+ "GGUF_CONFIG_DEFAULTS_MAPPING",
  "GGUF_CONFIG_MAPPING",
  "GGUF_TOKENIZER_MAPPING",
  "_gguf_parse_value",
@@ -201,6 +202,7 @@
  from .finegrained_fp8 import FP8Linear, replace_with_fp8_linear
  from .fsdp import is_fsdp_enabled, is_fsdp_managed_module
  from .ggml import (
+ GGUF_CONFIG_DEFAULTS_MAPPING,
  GGUF_CONFIG_MAPPING,
  GGUF_TOKENIZER_MAPPING,
  _gguf_parse_value,
 
@@ -313,6 +313,16 @@
  },
 }
 
+# We only need to set here the parameters that default to different values between transformers and llamacpp.
+GGUF_CONFIG_DEFAULTS_MAPPING = {
+ "qwen3_moe": {
+ # NOTE: Qwen3MoeConfig defaults to false but llama.cpp needs this to be true.
+ # See: https://github.com/ggml-org/llama.cpp/blob/17f7f4baad8b3a716ee139da7bb56ae984e8c0fa/src/models/qwen3moe.cpp#L85-L96
+ # (the parameter right after LLM_FFN_SILU corresponds to norm_topk_prob)
+ "norm_topk_prob": True,
+ },
+}
+
 
 def _gguf_parse_value(_value, data_type):
  if not isinstance(data_type, list):
 
@@ -20,6 +20,7 @@
 from tqdm.auto import tqdm
 
 from .integrations import (
+ GGUF_CONFIG_DEFAULTS_MAPPING,
  GGUF_CONFIG_MAPPING,
  GGUF_TOKENIZER_MAPPING,
  _gguf_parse_value,
@@ -437,6 +438,13 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_lo
  all("output.weight" != tensor.name for tensor in reader.tensors) or architecture in exceptions
  )
 
+ # Set GGUF-specific default values
+ config_defaults = GGUF_CONFIG_DEFAULTS_MAPPING.get(
+ updated_architecture, GGUF_CONFIG_DEFAULTS_MAPPING.get(architecture) or {}
+ )
+ for key, value in config_defaults.items():
+ parsed_parameters["config"].setdefault(key, value)
+
  # List all key-value pairs in a columnized format
  for gguf_key, field in reader.fields.items():
  gguf_key = gguf_key.replace(architecture, updated_architecture)