Skip to content

Commit 51a6673

Browse files
a4lgMekkCyber
andauthored
Override Transformers defaults by GGUF defaults (#42770)
* Override Transformers defaults by GGUF defaults In some models, GGUF uses default or fixed values different from this library. To integrate GGUF-based models without additional configuration, we need some kind of compatibility layer. This commit provides additional mapping to provide GGUF-specific default values to initialize parameters in this library. Currently, only fixed "norm_topk_prob" value of Qwen3 MoE (True) is defined because (a) it differs from the default value of this library (False) and (b) if this parameter is incorrectly set, it results in almost completely garbled output. Signed-off-by: Tsukasa OI <floss_llm@irq.a4lg.com> * Apply suggestions from code review Co-authored-by: Mohamed Mekkouri <93391238+MekkCyber@users.noreply.github.com> --------- Signed-off-by: Tsukasa OI <floss_llm@irq.a4lg.com> Co-authored-by: Mohamed Mekkouri <93391238+MekkCyber@users.noreply.github.com>
1 parent f54647c commit 51a6673

File tree

3 files changed

+20
-0
lines changed

3 files changed

+20
-0
lines changed

src/transformers/integrations/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
"finegrained_fp8": ["FP8Linear", "replace_with_fp8_linear"],
5555
"fsdp": ["is_fsdp_enabled", "is_fsdp_managed_module"],
5656
"ggml": [
57+
"GGUF_CONFIG_DEFAULTS_MAPPING",
5758
"GGUF_CONFIG_MAPPING",
5859
"GGUF_TOKENIZER_MAPPING",
5960
"_gguf_parse_value",
@@ -201,6 +202,7 @@
201202
from .finegrained_fp8 import FP8Linear, replace_with_fp8_linear
202203
from .fsdp import is_fsdp_enabled, is_fsdp_managed_module
203204
from .ggml import (
205+
GGUF_CONFIG_DEFAULTS_MAPPING,
204206
GGUF_CONFIG_MAPPING,
205207
GGUF_TOKENIZER_MAPPING,
206208
_gguf_parse_value,

src/transformers/integrations/ggml.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,16 @@
313313
},
314314
}
315315

316+
# We only need to set here the parameters that default to different values between transformers and llamacpp.
317+
GGUF_CONFIG_DEFAULTS_MAPPING = {
318+
"qwen3_moe": {
319+
# NOTE: Qwen3MoeConfig defaults to false but llama.cpp needs this to be true.
320+
# See: https://github.com/ggml-org/llama.cpp/blob/17f7f4baad8b3a716ee139da7bb56ae984e8c0fa/src/models/qwen3moe.cpp#L85-L96
321+
# (the parameter right after LLM_FFN_SILU corresponds to norm_topk_prob)
322+
"norm_topk_prob": True,
323+
},
324+
}
325+
316326

317327
def _gguf_parse_value(_value, data_type):
318328
if not isinstance(data_type, list):

src/transformers/modeling_gguf_pytorch_utils.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from tqdm.auto import tqdm
2121

2222
from .integrations import (
23+
GGUF_CONFIG_DEFAULTS_MAPPING,
2324
GGUF_CONFIG_MAPPING,
2425
GGUF_TOKENIZER_MAPPING,
2526
_gguf_parse_value,
@@ -437,6 +438,13 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_lo
437438
all("output.weight" != tensor.name for tensor in reader.tensors) or architecture in exceptions
438439
)
439440

441+
# Set GGUF-specific default values
442+
config_defaults = GGUF_CONFIG_DEFAULTS_MAPPING.get(
443+
updated_architecture, GGUF_CONFIG_DEFAULTS_MAPPING.get(architecture) or {}
444+
)
445+
for key, value in config_defaults.items():
446+
parsed_parameters["config"].setdefault(key, value)
447+
440448
# List all key-value pairs in a columnized format
441449
for gguf_key, field in reader.fields.items():
442450
gguf_key = gguf_key.replace(architecture, updated_architecture)

0 commit comments

Comments
 (0)