Skip to content

Commit c65d5a7

Browse files
committed
reuse non analyzed token stream for string types
so heavyweight token stream won't be created each time
1 parent fc35fd8 commit c65d5a7

File tree

1 file changed

+87
-1
lines changed

1 file changed

+87
-1
lines changed

src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
package org.elasticsearch.index.mapper.core;
2121

2222
import org.apache.lucene.analysis.Analyzer;
23+
import org.apache.lucene.analysis.TokenStream;
24+
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
25+
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
2326
import org.apache.lucene.document.Field;
2427
import org.apache.lucene.document.FieldType;
2528
import org.apache.lucene.index.FieldInfo.IndexOptions;
@@ -294,7 +297,7 @@ protected Field parseCreateField(ParseContext context) throws IOException {
294297
context.ignoredValue(names.indexName(), value);
295298
return null;
296299
}
297-
Field field = new Field(names.indexName(), value, fieldType);
300+
Field field = new StringField(names.indexName(), value, fieldType);
298301
field.setBoost(boost);
299302
return field;
300303
}
@@ -364,4 +367,87 @@ protected void doXContentBody(XContentBuilder builder) throws IOException {
364367
builder.field("ignore_above", ignoreAbove);
365368
}
366369
}
370+
371+
/**
372+
* Extension of {@link Field} supporting reuse of a cached TokenStream for not-tokenized values.
373+
*/
374+
static class StringField extends Field {
375+
376+
public StringField(String name, String value, FieldType fieldType) {
377+
super(name, value, fieldType);
378+
}
379+
380+
@Override
381+
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
382+
if (!fieldType().indexed()) {
383+
return null;
384+
}
385+
// Only use the cached TokenStream if the value is indexed and not-tokenized
386+
if (fieldType().tokenized()) {
387+
return super.tokenStream(analyzer);
388+
}
389+
return NOT_ANALYZED_TOKENSTREAM.get().setValue((String) fieldsData);
390+
}
391+
}
392+
393+
private static final ThreadLocal<StringTokenStream> NOT_ANALYZED_TOKENSTREAM = new ThreadLocal<StringTokenStream>() {
394+
@Override
395+
protected StringTokenStream initialValue() {
396+
return new StringTokenStream();
397+
}
398+
};
399+
400+
401+
// Copied from Field.java
402+
static final class StringTokenStream extends TokenStream {
403+
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
404+
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
405+
private boolean used = false;
406+
private String value = null;
407+
408+
/**
409+
* Creates a new TokenStream that returns a String as single token.
410+
* <p>Warning: Does not initialize the value, you must call
411+
* {@link #setValue(String)} afterwards!
412+
*/
413+
StringTokenStream() {
414+
}
415+
416+
/**
417+
* Sets the string value.
418+
*/
419+
StringTokenStream setValue(String value) {
420+
this.value = value;
421+
return this;
422+
}
423+
424+
@Override
425+
public boolean incrementToken() {
426+
if (used) {
427+
return false;
428+
}
429+
clearAttributes();
430+
termAttribute.append(value);
431+
offsetAttribute.setOffset(0, value.length());
432+
used = true;
433+
return true;
434+
}
435+
436+
@Override
437+
public void end() {
438+
final int finalOffset = value.length();
439+
offsetAttribute.setOffset(finalOffset, finalOffset);
440+
value = null;
441+
}
442+
443+
@Override
444+
public void reset() {
445+
used = false;
446+
}
447+
448+
@Override
449+
public void close() {
450+
value = null;
451+
}
452+
}
367453
}

0 commit comments

Comments
 (0)