Skip to content

Commit 19521f9

Browse files
committed
rm redundent comment, resolve complicate
1 parent 3412f50 commit 19521f9

File tree

1 file changed

+2
-3
lines changed

1 file changed

+2
-3
lines changed

paddlenlp/transformers/tokenizer_utils_base.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1389,7 +1389,7 @@ def __init__(self, **kwargs):
13891389
self.model_input_names = kwargs.pop("model_input_names", self.model_input_names)
13901390

13911391
# By default, cleaning tokenization spaces for both fast and slow tokenizers
1392-
self.clean_up_tokenization_spaces = kwargs.pop("clean_up_tokenization_spaces", True)
1392+
self.clean_up_tokenization_spaces = kwargs.pop("clean_up_tokenization_spaces", False)
13931393

13941394
# By default, do not split special tokens for both fast and slow tokenizers
13951395
self.split_special_tokens = kwargs.pop("split_special_tokens", False)
@@ -1531,8 +1531,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
15311531
"added_tokens_file": ADDED_TOKENS_FILE,
15321532
"special_tokens_map_file": SPECIAL_TOKENS_MAP_FILE,
15331533
"tokenizer_config_file": TOKENIZER_CONFIG_FILE,
1534-
"chat_template_file": CHAT_TEMPLATE_CONFIG_NAME, # what's this
1535-
# "tokenizer_file": FULL_TOKENIZER_FILE,
1534+
"chat_template_file": CHAT_TEMPLATE_CONFIG_NAME,
15361535
}
15371536

15381537
vocab_files_target = {**cls.resource_files_names, **additional_files_names}

0 commit comments

Comments
 (0)