1919package org .elasticsearch .search .suggest .phrase ;
2020
2121import org .apache .lucene .analysis .Analyzer ;
22+ import org .apache .lucene .analysis .TokenStream ;
23+ import org .apache .lucene .analysis .tokenattributes .CharTermAttribute ;
24+ import org .apache .lucene .analysis .tokenattributes .OffsetAttribute ;
25+ import org .apache .lucene .analysis .tokenattributes .PositionIncrementAttribute ;
2226import org .apache .lucene .index .IndexReader ;
2327import org .apache .lucene .index .MultiFields ;
2428import org .apache .lucene .index .Term ;
2933import org .apache .lucene .search .spell .SuggestWord ;
3034import org .apache .lucene .util .BytesRef ;
3135import org .apache .lucene .util .BytesRefBuilder ;
36+ import org .apache .lucene .util .CharsRef ;
3237import org .apache .lucene .util .CharsRefBuilder ;
33- import org .elasticsearch .search .suggest .SuggestUtils ;
38+ import org .apache .lucene .util .IOUtils ;
39+ import org .elasticsearch .common .io .FastCharArrayReader ;
3440
3541import java .io .IOException ;
3642import java .util .ArrayList ;
4450import static java .lang .Math .max ;
4551import static java .lang .Math .round ;
4652
47- final class DirectCandidateGenerator extends CandidateGenerator {
53+ public final class DirectCandidateGenerator extends CandidateGenerator {
4854
4955 private final DirectSpellChecker spellchecker ;
5056 private final String field ;
@@ -140,7 +146,7 @@ protected BytesRef preFilter(final BytesRef term, final CharsRefBuilder spare, f
140146 return term ;
141147 }
142148 final BytesRefBuilder result = byteSpare ;
143- SuggestUtils . analyze (preFilter , term , field , new SuggestUtils . TokenConsumer () {
149+ analyze (preFilter , term , field , new TokenConsumer () {
144150
145151 @ Override
146152 public void nextToken () throws IOException {
@@ -156,7 +162,7 @@ protected void postFilter(final Candidate candidate, final CharsRefBuilder spare
156162 candidates .add (candidate );
157163 } else {
158164 final BytesRefBuilder result = byteSpare ;
159- SuggestUtils . analyze (postFilter , candidate .term , field , new SuggestUtils . TokenConsumer () {
165+ analyze (postFilter , candidate .term , field , new TokenConsumer () {
160166 @ Override
161167 public void nextToken () throws IOException {
162168 this .fillBytesRef (result );
@@ -189,6 +195,27 @@ protected long thresholdFrequency(long termFrequency, long dictionarySize) {
189195
190196 }
191197
198+ public abstract static class TokenConsumer {
199+ protected CharTermAttribute charTermAttr ;
200+ protected PositionIncrementAttribute posIncAttr ;
201+ protected OffsetAttribute offsetAttr ;
202+
203+ public void reset (TokenStream stream ) {
204+ charTermAttr = stream .addAttribute (CharTermAttribute .class );
205+ posIncAttr = stream .addAttribute (PositionIncrementAttribute .class );
206+ offsetAttr = stream .addAttribute (OffsetAttribute .class );
207+ }
208+
209+ protected BytesRef fillBytesRef (BytesRefBuilder spare ) {
210+ spare .copyChars (charTermAttr );
211+ return spare .get ();
212+ }
213+
214+ public abstract void nextToken () throws IOException ;
215+
216+ public void end () {}
217+ }
218+
192219 public static class CandidateSet {
193220 public Candidate [] candidates ;
194221 public final Candidate originalTerm ;
@@ -283,4 +310,40 @@ public Candidate createCandidate(BytesRef term, long frequency, double channelSc
283310 return new Candidate (term , frequency , channelScore , score (frequency , channelScore , dictSize ), userInput );
284311 }
285312
313+ public static int analyze (Analyzer analyzer , BytesRef toAnalyze , String field , TokenConsumer consumer , CharsRefBuilder spare )
314+ throws IOException {
315+ spare .copyUTF8Bytes (toAnalyze );
316+ CharsRef charsRef = spare .get ();
317+ try (TokenStream ts = analyzer .tokenStream (
318+ field , new FastCharArrayReader (charsRef .chars , charsRef .offset , charsRef .length ))) {
319+ return analyze (ts , consumer );
320+ }
321+ }
322+
323+ /** NOTE: this method closes the TokenStream, even on exception, which is awkward
324+ * because really the caller who called {@link Analyzer#tokenStream} should close it,
325+ * but when trying that there are recursion issues when we try to use the same
326+ * TokenStream twice in the same recursion... */
327+ public static int analyze (TokenStream stream , TokenConsumer consumer ) throws IOException {
328+ int numTokens = 0 ;
329+ boolean success = false ;
330+ try {
331+ stream .reset ();
332+ consumer .reset (stream );
333+ while (stream .incrementToken ()) {
334+ consumer .nextToken ();
335+ numTokens ++;
336+ }
337+ consumer .end ();
338+ success = true ;
339+ } finally {
340+ if (success ) {
341+ stream .close ();
342+ } else {
343+ IOUtils .closeWhileHandlingException (stream );
344+ }
345+ }
346+ return numTokens ;
347+ }
348+
286349}
0 commit comments