This project will be merged into elasticsearch-sudachi.
A synonym token filter plugin for Elasticsearch
suppose we use Sudachi for tokenizer.
- Pass
chikkar_synonymfor the type of chikkar plugin. - Set
system_dictto specifiy system synonym dictionary path. The path should be relative path to the ES config folder, as ES can only access files under config folder. - Set
user_dict_listwith a list of user synonym dictionary paths. The paths should be relative path to the ES config folder, as ES can only access files under config folder.
{ "settings": { "index": { "analysis": { "tokenizer": { "sudachi_tokenizer": { "type": "sudachi_tokenizer", "split_mode" : "C", "discard_punctuation": true, "resources_path": "/usr/share/elasticsearch/config" } }, "filter" : { "chikkar_filter" : { "type" : "chikkar_synonym", "system_dict": "hr/hr_synonym_core.txt", "user_dict_list": ["hr/user_dict_1.txt", "hr/user_dict_2.txt", "hr/user_dict_3.txt"] } }, "analyzer": { "sudachi_analyzer_no_synonym": { "filter": ["sudachi_normalizedform"], "tokenizer": "sudachi_tokenizer", "type": "custom" }, "sudachi_analyzer_chikkar_synonym": { "filter": [ "sudachi_normalizedform", "chikkar_filter" ], "tokenizer": "sudachi_tokenizer", "type": "custom" } } } } }, "mappings": { "properties": { "content": { "type": "text", "analyzer": "sudachi_analyzer_no_synonym", "search_analyzer": "sudachi_analyzer_chikkar_synonym", "term_vector": "with_positions_offsets" } } } }