Skip to content

Commit f63fcef

Browse files
committed
Stop using cached component in _analyze API
Stop calling tokenizer/tokenFilters/chaFilter method of IndexService Add some getAnalysisProvider methods Change SynonymTokenFilterFactory constructor Closes elastic#19827
1 parent 563bf01 commit f63fcef

File tree

4 files changed

+116
-27
lines changed

4 files changed

+116
-27
lines changed

core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -467,17 +467,21 @@ private static CharFilterFactory[] getCharFilterFactories(AnalyzeRequest request
467467
// Need to set anonymous "name" of char_filter
468468
charFilterFactories[i] = charFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_charfilter_[" + i + "]", settings);
469469
} else {
470+
AnalysisModule.AnalysisProvider<CharFilterFactory> charFilterFactoryFactory;
470471
if (analysisService == null) {
471-
AnalysisModule.AnalysisProvider<CharFilterFactory> charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name);
472+
charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name);
472473
if (charFilterFactoryFactory == null) {
473474
throw new IllegalArgumentException("failed to find global char filter under [" + charFilter.name + "]");
474475
}
475476
charFilterFactories[i] = charFilterFactoryFactory.get(environment, charFilter.name);
476477
} else {
477-
charFilterFactories[i] = analysisService.charFilter(charFilter.name);
478-
if (charFilterFactories[i] == null) {
478+
charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name, analysisService.getIndexSettings());
479+
if (charFilterFactoryFactory == null) {
479480
throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]");
480481
}
482+
charFilterFactories[i] = charFilterFactoryFactory.get(analysisService.getIndexSettings(), environment, charFilter.name,
483+
AnalysisRegistry.getSettingsFromIndexSettings(analysisService.getIndexSettings(),
484+
AnalysisRegistry.INDEX_ANALYSIS_CHAR_FILTER + "." + charFilter.name));
481485
}
482486
}
483487
if (charFilterFactories[i] == null) {
@@ -509,18 +513,21 @@ private static TokenFilterFactory[] getTokenFilterFactories(AnalyzeRequest reque
509513
// Need to set anonymous "name" of tokenfilter
510514
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenfilter_[" + i + "]", settings);
511515
} else {
516+
AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory;
512517
if (analysisService == null) {
513-
AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name);
514-
518+
tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name);
515519
if (tokenFilterFactoryFactory == null) {
516520
throw new IllegalArgumentException("failed to find global token filter under [" + tokenFilter.name + "]");
517521
}
518522
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(environment, tokenFilter.name);
519523
} else {
520-
tokenFilterFactories[i] = analysisService.tokenFilter(tokenFilter.name);
521-
if (tokenFilterFactories[i] == null) {
524+
tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name, analysisService.getIndexSettings());
525+
if (tokenFilterFactoryFactory == null) {
522526
throw new IllegalArgumentException("failed to find token filter under [" + tokenFilter.name + "]");
523527
}
528+
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(analysisService.getIndexSettings(), environment, tokenFilter.name,
529+
AnalysisRegistry.getSettingsFromIndexSettings(analysisService.getIndexSettings(),
530+
AnalysisRegistry.INDEX_ANALYSIS_FILTER + "." + tokenFilter.name));
524531
}
525532
}
526533
if (tokenFilterFactories[i] == null) {
@@ -550,17 +557,21 @@ private static TokenizerFactory parseTokenizerFactory(AnalyzeRequest request, An
550557
// Need to set anonymous "name" of tokenizer
551558
tokenizerFactory = tokenizerFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenizer", settings);
552559
} else {
560+
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory;
553561
if (analysisService == null) {
554-
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name);
562+
tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name);
555563
if (tokenizerFactoryFactory == null) {
556564
throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizer.name + "]");
557565
}
558566
tokenizerFactory = tokenizerFactoryFactory.get(environment, tokenizer.name);
559567
} else {
560-
tokenizerFactory = analysisService.tokenizer(tokenizer.name);
561-
if (tokenizerFactory == null) {
568+
tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name, analysisService.getIndexSettings());
569+
if (tokenizerFactoryFactory == null) {
562570
throw new IllegalArgumentException("failed to find tokenizer under [" + tokenizer.name + "]");
563571
}
572+
tokenizerFactory = tokenizerFactoryFactory.get(analysisService.getIndexSettings(), environment, tokenizer.name,
573+
AnalysisRegistry.getSettingsFromIndexSettings(analysisService.getIndexSettings(),
574+
AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizer.name));
564575
}
565576
}
566577
return tokenizerFactory;

core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java

Lines changed: 67 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@
4949
* This class exists per node and allows to create per-index {@link AnalysisService} via {@link #build(IndexSettings)}
5050
*/
5151
public final class AnalysisRegistry implements Closeable {
52+
public static final String INDEX_ANALYSIS_CHAR_FILTER = "index.analysis.char_filter";
53+
public static final String INDEX_ANALYSIS_FILTER = "index.analysis.filter";
54+
public static final String INDEX_ANALYSIS_TOKENIZER = "index.analysis.tokenizer";
5255
private final PrebuiltAnalysis prebuiltAnalysis = new PrebuiltAnalysis();
5356
private final Map<String, Analyzer> cachedAnalyzer = new ConcurrentHashMap<>();
5457

@@ -70,6 +73,14 @@ public AnalysisRegistry(Environment environment,
7073
this.analyzers = unmodifiableMap(analyzers);
7174
}
7275

76+
public static Settings getSettingsFromIndexSettings(IndexSettings indexSettings, String groupName) {
77+
Settings settings = indexSettings.getSettings().getAsSettings(groupName);
78+
if (settings.isEmpty()) {
79+
settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, indexSettings.getIndexVersionCreated()).build();
80+
}
81+
return settings;
82+
}
83+
7384
/**
7485
* Returns a registered {@link TokenizerFactory} provider by name or <code>null</code> if the tokenizer was not registered
7586
*/
@@ -122,9 +133,9 @@ public void close() throws IOException {
122133
* Creates an index-level {@link AnalysisService} from this registry using the given index settings
123134
*/
124135
public AnalysisService build(IndexSettings indexSettings) throws IOException {
125-
final Map<String, Settings> charFiltersSettings = indexSettings.getSettings().getGroups("index.analysis.char_filter");
126-
final Map<String, Settings> tokenFiltersSettings = indexSettings.getSettings().getGroups("index.analysis.filter");
127-
final Map<String, Settings> tokenizersSettings = indexSettings.getSettings().getGroups("index.analysis.tokenizer");
136+
final Map<String, Settings> charFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_CHAR_FILTER);
137+
final Map<String, Settings> tokenFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_FILTER);
138+
final Map<String, Settings> tokenizersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_TOKENIZER);
128139
final Map<String, Settings> analyzersSettings = indexSettings.getSettings().getGroups("index.analysis.analyzer");
129140

130141
final Map<String, CharFilterFactory> charFilterFactories = buildMapping(false, "charfilter", indexSettings, charFiltersSettings, charFilters, prebuiltAnalysis.charFilterFactories);
@@ -136,14 +147,54 @@ public AnalysisService build(IndexSettings indexSettings) throws IOException {
136147
* instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and
137148
* hide internal data-structures as much as possible.
138149
*/
139-
tokenFilters.put("synonym", requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, tokenizerFactories, name, settings)));
150+
tokenFilters.put("synonym", requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)));
140151
final Map<String, TokenFilterFactory> tokenFilterFactories = buildMapping(false, "tokenfilter", indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.tokenFilterFactories);
141152
final Map<String, AnalyzerProvider<?>> analyzierFactories = buildMapping(true, "analyzer", indexSettings, analyzersSettings,
142153
analyzers, prebuiltAnalysis.analyzerProviderFactories);
143154
return new AnalysisService(indexSettings, analyzierFactories, tokenizerFactories, charFilterFactories, tokenFilterFactories);
144155
}
145156

146157

158+
public AnalysisProvider<TokenizerFactory> getTokenizerProvider(String tokenizer, IndexSettings indexSettings) {
159+
final Map<String, Settings> tokenizerSettings = indexSettings.getSettings().getGroups("index.analysis.tokenizer");
160+
if (tokenizerSettings.containsKey(tokenizer)) {
161+
Settings currentSettings = tokenizerSettings.get(tokenizer);
162+
return getAnalysisProvider("tokenizer", tokenizers, tokenizer, currentSettings.get("type"));
163+
} else {
164+
return prebuiltAnalysis.tokenizerFactories.get(tokenizer);
165+
}
166+
}
167+
168+
public AnalysisProvider<TokenFilterFactory> getTokenFilterProvider(String tokenFilter, IndexSettings indexSettings) {
169+
final Map<String, Settings> tokenFilterSettings = indexSettings.getSettings().getGroups("index.analysis.filter");
170+
if (tokenFilterSettings.containsKey(tokenFilter)) {
171+
Settings currentSettings = tokenFilterSettings.get(tokenFilter);
172+
String typeName = currentSettings.get("type");
173+
/*
174+
* synonym is different than everything else since it needs access to the tokenizer factories for this index.
175+
* instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and
176+
* hide internal data-structures as much as possible.
177+
*/
178+
if ("synonym".equals(typeName)) {
179+
return requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings));
180+
} else {
181+
return getAnalysisProvider("tokenfilter", tokenFilters, tokenFilter, typeName);
182+
}
183+
} else {
184+
return prebuiltAnalysis.tokenFilterFactories.get(tokenFilter);
185+
}
186+
}
187+
188+
public AnalysisProvider<CharFilterFactory> getCharFilterProvider(String charFilter, IndexSettings indexSettings) {
189+
final Map<String, Settings> tokenFilterSettings = indexSettings.getSettings().getGroups("index.analysis.char_filter");
190+
if (tokenFilterSettings.containsKey(charFilter)) {
191+
Settings currentSettings = tokenFilterSettings.get(charFilter);
192+
return getAnalysisProvider("charfilter", charFilters, charFilter, currentSettings.get("type"));
193+
} else {
194+
return prebuiltAnalysis.charFilterFactories.get(charFilter);
195+
}
196+
}
197+
147198
private static <T> AnalysisModule.AnalysisProvider<T> requriesAnalysisSettings(AnalysisModule.AnalysisProvider<T> provider) {
148199
return new AnalysisModule.AnalysisProvider<T>() {
149200
@Override
@@ -185,13 +236,7 @@ private <T> Map<String, T> buildMapping(boolean analyzer, String toBuild, IndexS
185236
}
186237
factories.put(name, factory);
187238
} else {
188-
if (typeName == null) {
189-
throw new IllegalArgumentException(toBuild + " [" + name + "] must specify either an analyzer type, or a tokenizer");
190-
}
191-
AnalysisModule.AnalysisProvider<T> type = providerMap.get(typeName);
192-
if (type == null) {
193-
throw new IllegalArgumentException("Unknown " + toBuild + " type [" + typeName + "] for [" + name + "]");
194-
}
239+
AnalysisProvider<T> type = getAnalysisProvider(toBuild, providerMap, name, typeName);
195240
final T factory = type.get(settings, environment, name, currentSettings);
196241
factories.put(name, factory);
197242
}
@@ -232,6 +277,17 @@ private <T> Map<String, T> buildMapping(boolean analyzer, String toBuild, IndexS
232277
return factories;
233278
}
234279

280+
private <T> AnalysisProvider<T> getAnalysisProvider(String toBuild, Map<String, AnalysisProvider<T>> providerMap, String name, String typeName) {
281+
if (typeName == null) {
282+
throw new IllegalArgumentException(toBuild + " [" + name + "] must specify either an analyzer type, or a tokenizer");
283+
}
284+
AnalysisProvider<T> type = providerMap.get(typeName);
285+
if (type == null) {
286+
throw new IllegalArgumentException("Unknown " + toBuild + " type [" + typeName + "] for [" + name + "]");
287+
}
288+
return type;
289+
}
290+
235291
private static class PrebuiltAnalysis implements Closeable {
236292

237293
final Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<?>>> analyzerProviderFactories;

core/src/main/java/org/elasticsearch/index/analysis/SynonymTokenFilterFactory.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,18 @@
3232
import org.elasticsearch.common.settings.Settings;
3333
import org.elasticsearch.env.Environment;
3434
import org.elasticsearch.index.IndexSettings;
35+
import org.elasticsearch.indices.analysis.AnalysisModule;
3536

3637
import java.io.IOException;
3738
import java.io.Reader;
3839
import java.util.List;
39-
import java.util.Map;
4040

4141
public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
4242

4343
private final SynonymMap synonymMap;
4444
private final boolean ignoreCase;
4545

46-
public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, Map<String, TokenizerFactory> tokenizerFactories,
46+
public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry,
4747
String name, Settings settings) throws IOException {
4848
super(indexSettings, name, settings);
4949

@@ -65,11 +65,13 @@ public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, M
6565
boolean expand = settings.getAsBoolean("expand", true);
6666

6767
String tokenizerName = settings.get("tokenizer", "whitespace");
68-
final TokenizerFactory tokenizerFactory = tokenizerFactories.get(tokenizerName);
69-
if (tokenizerFactory == null) {
68+
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory =
69+
analysisRegistry.getTokenizerProvider(tokenizerName, indexSettings);
70+
if (tokenizerFactoryFactory == null) {
7071
throw new IllegalArgumentException("failed to find tokenizer [" + tokenizerName + "] for synonym token filter");
7172
}
72-
73+
final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.get(indexSettings, env, tokenizerName,
74+
AnalysisRegistry.getSettingsFromIndexSettings(indexSettings, AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizerName));
7375
Analyzer analyzer = new Analyzer() {
7476
@Override
7577
protected TokenStreamComponents createComponents(String fieldName) {

core/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
2424
import org.elasticsearch.action.admin.indices.analyze.TransportAnalyzeAction;
2525
import org.elasticsearch.cluster.metadata.IndexMetaData;
26+
import org.elasticsearch.common.UUIDs;
2627
import org.elasticsearch.common.settings.Settings;
2728
import org.elasticsearch.env.Environment;
2829
import org.elasticsearch.index.IndexSettings;
@@ -51,12 +52,21 @@ public void setUp() throws Exception {
5152

5253
Settings indexSettings = Settings.builder()
5354
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
55+
.put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID())
5456
.put("index.analysis.filter.wordDelimiter.type", "word_delimiter")
5557
.put("index.analysis.filter.wordDelimiter.split_on_numerics", false)
5658
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "whitespace")
5759
.putArray("index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter")
5860
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "whitespace")
59-
.putArray("index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter").build();
61+
.putArray("index.analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter")
62+
.put("index.analysis.tokenizer.trigram.type", "ngram")
63+
.put("index.analysis.tokenizer.trigram.min_gram", 3)
64+
.put("index.analysis.tokenizer.trigram.max_gram", 3)
65+
.put("index.analysis.filter.synonym.type", "synonym")
66+
.putArray("index.analysis.filter.synonym.synonyms", "kimchy => shay")
67+
.put("index.analysis.filter.synonym.tokenizer", "trigram")
68+
.put("index.analysis.filter.synonym.min_gram", 3)
69+
.put("index.analysis.filter.synonym.max_gram", 3).build();
6070
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
6171
environment = new Environment(settings);
6272
registry = new AnalysisModule(environment, emptyList()).getAnalysisRegistry();
@@ -168,6 +178,16 @@ public void testWithAnalysisService() throws IOException {
168178
assertEquals("brown", tokens.get(2).getTerm());
169179
assertEquals("fox", tokens.get(3).getTerm());
170180
assertEquals("dog", tokens.get(4).getTerm());
181+
182+
request.analyzer(null);
183+
request.tokenizer("trigram");
184+
request.addTokenFilter("synonym");
185+
request.text("kimchy");
186+
analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, analysisService, registry, environment);
187+
tokens = analyze.getTokens();
188+
assertEquals(2, tokens.size());
189+
assertEquals("sha", tokens.get(0).getTerm());
190+
assertEquals("hay", tokens.get(1).getTerm());
171191
}
172192

173193
public void testGetIndexAnalyserWithoutAnalysisService() throws IOException {

0 commit comments

Comments
 (0)