Skip to content

Commit 563bf01

Browse files
authored
Merge pull request elastic#19920 from cbuescher/remove-SuggestUtil
Remove SuggestUtil helper class
2 parents acc50d5 + d115213 commit 563bf01

15 files changed

+192
-254
lines changed

core/src/main/java/org/elasticsearch/search/suggest/DirectSpellcheckerSettings.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,13 @@
2121
import org.apache.lucene.search.spell.DirectSpellChecker;
2222
import org.apache.lucene.search.spell.StringDistance;
2323
import org.apache.lucene.search.spell.SuggestMode;
24+
import org.apache.lucene.search.spell.SuggestWord;
25+
import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
26+
import org.apache.lucene.search.spell.SuggestWordQueue;
2427
import org.apache.lucene.util.automaton.LevenshteinAutomata;
2528

29+
import java.util.Comparator;
30+
2631
public class DirectSpellcheckerSettings {
2732

2833
// NB: If this changes, make sure to change the default in TermBuilderSuggester
@@ -49,6 +54,9 @@ public class DirectSpellcheckerSettings {
4954
private int minWordLength = DEFAULT_MIN_WORD_LENGTH;
5055
private float minDocFreq = DEFAULT_MIN_DOC_FREQ;
5156

57+
private static final Comparator<SuggestWord> LUCENE_FREQUENCY = new SuggestWordFrequencyComparator();
58+
private static final Comparator<SuggestWord> SCORE_COMPARATOR = SuggestWordQueue.DEFAULT_COMPARATOR;
59+
5260
public SuggestMode suggestMode() {
5361
return suggestMode;
5462
}
@@ -129,6 +137,33 @@ public void minDocFreq(float minDocFreq) {
129137
this.minDocFreq = minDocFreq;
130138
}
131139

140+
public DirectSpellChecker createDirectSpellChecker() {
141+
142+
DirectSpellChecker directSpellChecker = new DirectSpellChecker();
143+
directSpellChecker.setAccuracy(accuracy());
144+
Comparator<SuggestWord> comparator;
145+
switch (sort()) {
146+
case SCORE:
147+
comparator = SCORE_COMPARATOR;
148+
break;
149+
case FREQUENCY:
150+
comparator = LUCENE_FREQUENCY;
151+
break;
152+
default:
153+
throw new IllegalArgumentException("Illegal suggest sort: " + sort());
154+
}
155+
directSpellChecker.setComparator(comparator);
156+
directSpellChecker.setDistance(stringDistance());
157+
directSpellChecker.setMaxEdits(maxEdits());
158+
directSpellChecker.setMaxInspections(maxInspections());
159+
directSpellChecker.setMaxQueryFrequency(maxTermFreq());
160+
directSpellChecker.setMinPrefix(prefixLength());
161+
directSpellChecker.setMinQueryLength(minWordLength());
162+
directSpellChecker.setThresholdFrequency(minDocFreq());
163+
directSpellChecker.setLowerCaseTerms(false);
164+
return directSpellChecker;
165+
}
166+
132167
@Override
133168
public String toString() {
134169
return "[" +

core/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java

Lines changed: 0 additions & 162 deletions
This file was deleted.

core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionBuilder.java

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
import org.elasticsearch.index.mapper.MapperService;
3838
import org.elasticsearch.index.query.QueryParseContext;
3939
import org.elasticsearch.index.query.QueryShardContext;
40-
import org.elasticsearch.search.suggest.SuggestUtils;
4140
import org.elasticsearch.search.suggest.SuggestionBuilder;
4241
import org.elasticsearch.search.suggest.SuggestionSearchContext.SuggestionContext;
4342
import org.elasticsearch.search.suggest.completion.context.ContextMapping;
@@ -48,7 +47,6 @@
4847

4948
import java.io.IOException;
5049
import java.util.ArrayList;
51-
import java.util.Collections;
5250
import java.util.HashMap;
5351
import java.util.List;
5452
import java.util.Map;
@@ -90,10 +88,10 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
9088
TLP_PARSER.declareField((parser, completionSuggestionContext, context) ->
9189
completionSuggestionContext.regexOptions = RegexOptions.parse(parser, context),
9290
RegexOptions.REGEX_OPTIONS, ObjectParser.ValueType.OBJECT);
93-
TLP_PARSER.declareString(CompletionSuggestionBuilder.InnerBuilder::field, SuggestUtils.Fields.FIELD);
94-
TLP_PARSER.declareString(CompletionSuggestionBuilder.InnerBuilder::analyzer, SuggestUtils.Fields.ANALYZER);
95-
TLP_PARSER.declareInt(CompletionSuggestionBuilder.InnerBuilder::size, SuggestUtils.Fields.SIZE);
96-
TLP_PARSER.declareInt(CompletionSuggestionBuilder.InnerBuilder::shardSize, SuggestUtils.Fields.SHARD_SIZE);
91+
TLP_PARSER.declareString(CompletionSuggestionBuilder.InnerBuilder::field, FIELDNAME_FIELD);
92+
TLP_PARSER.declareString(CompletionSuggestionBuilder.InnerBuilder::analyzer, ANALYZER_FIELD);
93+
TLP_PARSER.declareInt(CompletionSuggestionBuilder.InnerBuilder::size, SIZE_FIELD);
94+
TLP_PARSER.declareInt(CompletionSuggestionBuilder.InnerBuilder::shardSize, SHARDSIZE_FIELD);
9795
TLP_PARSER.declareField((p, v, c) -> {
9896
// Copy the current structure. We will parse, once the mapping is provided
9997
XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON);
@@ -353,7 +351,7 @@ static CompletionSuggestionBuilder innerFromXContent(QueryParseContext parseCont
353351
// now we should have field name, check and copy fields over to the suggestion builder we return
354352
if (field == null) {
355353
throw new ElasticsearchParseException(
356-
"the required field option [" + SuggestUtils.Fields.FIELD.getPreferredName() + "] is missing");
354+
"the required field option [" + FIELDNAME_FIELD.getPreferredName() + "] is missing");
357355
}
358356
return new CompletionSuggestionBuilder(field, builder);
359357
}

core/src/main/java/org/elasticsearch/search/suggest/phrase/Correction.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020

2121
import org.apache.lucene.util.BytesRef;
2222
import org.apache.lucene.util.BytesRefBuilder;
23-
import org.elasticsearch.search.suggest.SuggestUtils;
2423
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
2524

2625
import java.util.Arrays;
@@ -73,7 +72,7 @@ public BytesRef join(BytesRef separator, BytesRefBuilder result, BytesRef preTag
7372
len += toJoin[i].length;
7473
}
7574
result.grow(len);
76-
return SuggestUtils.join(separator, result, toJoin);
75+
return WordScorer.join(separator, result, toJoin);
7776
}
7877

7978
/** Lower scores sorts first; if scores are equal,

core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGenerator.java

Lines changed: 67 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
package org.elasticsearch.search.suggest.phrase;
2020

2121
import org.apache.lucene.analysis.Analyzer;
22+
import org.apache.lucene.analysis.TokenStream;
23+
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
24+
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
25+
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
2226
import org.apache.lucene.index.IndexReader;
2327
import org.apache.lucene.index.MultiFields;
2428
import org.apache.lucene.index.Term;
@@ -29,8 +33,10 @@
2933
import org.apache.lucene.search.spell.SuggestWord;
3034
import org.apache.lucene.util.BytesRef;
3135
import org.apache.lucene.util.BytesRefBuilder;
36+
import org.apache.lucene.util.CharsRef;
3237
import org.apache.lucene.util.CharsRefBuilder;
33-
import org.elasticsearch.search.suggest.SuggestUtils;
38+
import org.apache.lucene.util.IOUtils;
39+
import org.elasticsearch.common.io.FastCharArrayReader;
3440

3541
import java.io.IOException;
3642
import java.util.ArrayList;
@@ -44,7 +50,7 @@
4450
import static java.lang.Math.max;
4551
import static java.lang.Math.round;
4652

47-
final class DirectCandidateGenerator extends CandidateGenerator {
53+
public final class DirectCandidateGenerator extends CandidateGenerator {
4854

4955
private final DirectSpellChecker spellchecker;
5056
private final String field;
@@ -140,7 +146,7 @@ protected BytesRef preFilter(final BytesRef term, final CharsRefBuilder spare, f
140146
return term;
141147
}
142148
final BytesRefBuilder result = byteSpare;
143-
SuggestUtils.analyze(preFilter, term, field, new SuggestUtils.TokenConsumer() {
149+
analyze(preFilter, term, field, new TokenConsumer() {
144150

145151
@Override
146152
public void nextToken() throws IOException {
@@ -156,7 +162,7 @@ protected void postFilter(final Candidate candidate, final CharsRefBuilder spare
156162
candidates.add(candidate);
157163
} else {
158164
final BytesRefBuilder result = byteSpare;
159-
SuggestUtils.analyze(postFilter, candidate.term, field, new SuggestUtils.TokenConsumer() {
165+
analyze(postFilter, candidate.term, field, new TokenConsumer() {
160166
@Override
161167
public void nextToken() throws IOException {
162168
this.fillBytesRef(result);
@@ -189,6 +195,27 @@ protected long thresholdFrequency(long termFrequency, long dictionarySize) {
189195

190196
}
191197

198+
public abstract static class TokenConsumer {
199+
protected CharTermAttribute charTermAttr;
200+
protected PositionIncrementAttribute posIncAttr;
201+
protected OffsetAttribute offsetAttr;
202+
203+
public void reset(TokenStream stream) {
204+
charTermAttr = stream.addAttribute(CharTermAttribute.class);
205+
posIncAttr = stream.addAttribute(PositionIncrementAttribute.class);
206+
offsetAttr = stream.addAttribute(OffsetAttribute.class);
207+
}
208+
209+
protected BytesRef fillBytesRef(BytesRefBuilder spare) {
210+
spare.copyChars(charTermAttr);
211+
return spare.get();
212+
}
213+
214+
public abstract void nextToken() throws IOException;
215+
216+
public void end() {}
217+
}
218+
192219
public static class CandidateSet {
193220
public Candidate[] candidates;
194221
public final Candidate originalTerm;
@@ -283,4 +310,40 @@ public Candidate createCandidate(BytesRef term, long frequency, double channelSc
283310
return new Candidate(term, frequency, channelScore, score(frequency, channelScore, dictSize), userInput);
284311
}
285312

313+
public static int analyze(Analyzer analyzer, BytesRef toAnalyze, String field, TokenConsumer consumer, CharsRefBuilder spare)
314+
throws IOException {
315+
spare.copyUTF8Bytes(toAnalyze);
316+
CharsRef charsRef = spare.get();
317+
try (TokenStream ts = analyzer.tokenStream(
318+
field, new FastCharArrayReader(charsRef.chars, charsRef.offset, charsRef.length))) {
319+
return analyze(ts, consumer);
320+
}
321+
}
322+
323+
/** NOTE: this method closes the TokenStream, even on exception, which is awkward
324+
* because really the caller who called {@link Analyzer#tokenStream} should close it,
325+
* but when trying that there are recursion issues when we try to use the same
326+
* TokenStream twice in the same recursion... */
327+
public static int analyze(TokenStream stream, TokenConsumer consumer) throws IOException {
328+
int numTokens = 0;
329+
boolean success = false;
330+
try {
331+
stream.reset();
332+
consumer.reset(stream);
333+
while (stream.incrementToken()) {
334+
consumer.nextToken();
335+
numTokens++;
336+
}
337+
consumer.end();
338+
success = true;
339+
} finally {
340+
if (success) {
341+
stream.close();
342+
} else {
343+
IOUtils.closeWhileHandlingException(stream);
344+
}
345+
}
346+
return numTokens;
347+
}
348+
286349
}

0 commit comments

Comments
 (0)