Skip to content

Commit f72d5c1

Browse files
committed
Expose fragmenter option for plain / normal highlighter.
Closes elastic#2465
1 parent c2f8ee1 commit f72d5c1

File tree

5 files changed

+109
-6
lines changed

5 files changed

+109
-6
lines changed

src/main/java/org/elasticsearch/search/highlight/HighlightBuilder.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ public class HighlightBuilder implements ToXContent {
5050

5151
private String highlighterType;
5252

53+
private String fragmenter;
5354

5455
/**
5556
* Adds a field to be highlighted with default fragment size of 100 characters, and
@@ -188,6 +189,15 @@ public HighlightBuilder highlighterType(String highlighterType) {
188189
return this;
189190
}
190191

192+
/**
193+
* Sets what fragmenter to use to break up text that is eligible for highlighting.
194+
* This option is only applicable when using plain / normal highlighter.
195+
*/
196+
public HighlightBuilder fragmenter(String fragmenter) {
197+
this.fragmenter = fragmenter;
198+
return this;
199+
}
200+
191201
@Override
192202
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
193203
builder.startObject("highlight");
@@ -212,6 +222,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
212222
if (highlighterType != null) {
213223
builder.field("type", highlighterType);
214224
}
225+
if (fragmenter != null) {
226+
builder.field("fragmenter", fragmenter);
227+
}
215228
if (fields != null) {
216229
builder.startObject("fields");
217230
for (Field field : fields) {
@@ -231,6 +244,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
231244
if (field.highlighterType != null) {
232245
builder.field("type", field.highlighterType);
233246
}
247+
if (field.fragmenter != null) {
248+
builder.field("fragmenter", field.fragmenter);
249+
}
234250

235251
builder.endObject();
236252
}
@@ -248,6 +264,7 @@ public static class Field {
248264
int numOfFragments = -1;
249265
Boolean requireFieldMatch;
250266
String highlighterType;
267+
String fragmenter;
251268

252269
public Field(String name) {
253270
this.name = name;
@@ -281,5 +298,10 @@ public Field highlighterType(String highlighterType) {
281298
this.highlighterType = highlighterType;
282299
return this;
283300
}
301+
302+
public Field fragmenter(String fragmenter) {
303+
this.fragmenter = fragmenter;
304+
return this;
305+
}
284306
}
285307
}

src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import org.apache.lucene.search.highlight.Formatter;
3131
import org.apache.lucene.search.vectorhighlight.*;
3232
import org.elasticsearch.ElasticSearchException;
33+
import org.elasticsearch.ElasticSearchIllegalArgumentException;
3334
import org.elasticsearch.common.component.AbstractComponent;
3435
import org.elasticsearch.common.inject.Inject;
3536
import org.elasticsearch.common.io.FastStringReader;
@@ -131,13 +132,13 @@ public void hitExecute(SearchContext context, HitContext hitContext) throws Elas
131132
useFastVectorHighlighter = mapper.storeTermVectors() && mapper.storeTermVectorOffsets() && mapper.storeTermVectorPositions();
132133
} else if (field.highlighterType().equals("fast-vector-highlighter") || field.highlighterType().equals("fvh")) {
133134
if (!(mapper.storeTermVectors() && mapper.storeTermVectorOffsets() && mapper.storeTermVectorPositions())) {
134-
throw new FetchPhaseExecutionException(context, "the field [" + field.field() + "] should be indexed with term vector with position offsets to be used with fast vector highlighter");
135+
throw new ElasticSearchIllegalArgumentException("the field [" + field.field() + "] should be indexed with term vector with position offsets to be used with fast vector highlighter");
135136
}
136137
useFastVectorHighlighter = true;
137138
} else if (field.highlighterType().equals("highlighter") || field.highlighterType().equals("plain")) {
138139
useFastVectorHighlighter = false;
139140
} else {
140-
throw new FetchPhaseExecutionException(context, "unknown highlighter type [" + field.highlighterType() + "] for the field [" + field.field() + "]");
141+
throw new ElasticSearchIllegalArgumentException("unknown highlighter type [" + field.highlighterType() + "] for the field [" + field.field() + "]");
141142
}
142143
if (!useFastVectorHighlighter) {
143144
MapperHighlightEntry entry = cache.mappers.get(mapper);
@@ -151,8 +152,14 @@ public void hitExecute(SearchContext context, HitContext hitContext) throws Elas
151152
Fragmenter fragmenter;
152153
if (field.numberOfFragments() == 0) {
153154
fragmenter = new NullFragmenter();
154-
} else {
155+
} else if (field.fragmenter() == null) {
156+
fragmenter = new SimpleSpanFragmenter(queryScorer, field.fragmentCharSize());
157+
} else if ("simple".equals(field.fragmenter())) {
158+
fragmenter = new SimpleFragmenter(field.fragmentCharSize());
159+
} else if ("span".equals(field.fragmenter())) {
155160
fragmenter = new SimpleSpanFragmenter(queryScorer, field.fragmentCharSize());
161+
} else {
162+
throw new ElasticSearchIllegalArgumentException("unknown fragmenter option [" + field.fragmenter() + "] for the field [" + field.field() + "]");
156163
}
157164
Formatter formatter = new SimpleHTMLFormatter(field.preTags()[0], field.postTags()[0]);
158165

src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ public void parse(XContentParser parser, SearchContext context) throws Exception
7777
int globalBoundaryMaxScan = SimpleBoundaryScanner2.DEFAULT_MAX_SCAN;
7878
char[] globalBoundaryChars = SimpleBoundaryScanner2.DEFAULT_BOUNDARY_CHARS;
7979
String globalHighlighterType = null;
80+
String globalFragmenter = null;
8081

8182
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
8283
if (token == XContentParser.Token.FIELD_NAME) {
@@ -120,6 +121,8 @@ public void parse(XContentParser parser, SearchContext context) throws Exception
120121
globalBoundaryChars = parser.text().toCharArray();
121122
} else if ("type".equals(topLevelFieldName)) {
122123
globalHighlighterType = parser.text();
124+
} else if ("fragmenter".equals(topLevelFieldName)) {
125+
globalFragmenter = parser.text();
123126
}
124127
} else if (token == XContentParser.Token.START_OBJECT) {
125128
if ("fields".equals(topLevelFieldName)) {
@@ -166,6 +169,8 @@ public void parse(XContentParser parser, SearchContext context) throws Exception
166169
field.boundaryChars(parser.text().toCharArray());
167170
} else if ("type".equals(fieldName)) {
168171
field.highlighterType(parser.text());
172+
} else if ("fragmenter".equals(fieldName)) {
173+
field.fragmenter(parser.text());
169174
}
170175
}
171176
}
@@ -214,6 +219,9 @@ public void parse(XContentParser parser, SearchContext context) throws Exception
214219
if (field.highlighterType() == null) {
215220
field.highlighterType(globalHighlighterType);
216221
}
222+
if (field.fragmenter() == null) {
223+
field.fragmenter(globalFragmenter);
224+
}
217225
}
218226

219227
context.highlight(new SearchContextHighlight(fields));

src/main/java/org/elasticsearch/search/highlight/SearchContextHighlight.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ public static class Field {
6060

6161
private String highlighterType;
6262

63+
private String fragmenter;
64+
6365
private int boundaryMaxScan = -1;
6466
private char[] boundaryChars = null;
6567

@@ -151,6 +153,14 @@ public void highlighterType(String type) {
151153
this.highlighterType = type;
152154
}
153155

156+
public String fragmenter() {
157+
return fragmenter;
158+
}
159+
160+
public void fragmenter(String fragmenter) {
161+
this.fragmenter = fragmenter;
162+
}
163+
154164
public int boundaryMaxScan() {
155165
return boundaryMaxScan;
156166
}

src/test/java/org/elasticsearch/test/integration/search/highlight/HighlighterSearchTests.java

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,17 @@
2020
package org.elasticsearch.test.integration.search.highlight;
2121

2222
import org.elasticsearch.ElasticSearchException;
23+
import org.elasticsearch.action.search.SearchPhaseExecutionException;
2324
import org.elasticsearch.action.search.SearchResponse;
2425
import org.elasticsearch.action.search.SearchType;
2526
import org.elasticsearch.client.Client;
2627
import org.elasticsearch.common.settings.ImmutableSettings;
2728
import org.elasticsearch.common.xcontent.XContentBuilder;
2829
import org.elasticsearch.common.xcontent.XContentFactory;
30+
import org.elasticsearch.index.query.MatchQueryBuilder;
2931
import org.elasticsearch.index.query.QueryBuilders;
3032
import org.elasticsearch.indices.IndexMissingException;
33+
import org.elasticsearch.rest.RestStatus;
3134
import org.elasticsearch.search.SearchHit;
3235
import org.elasticsearch.search.builder.SearchSourceBuilder;
3336
import org.elasticsearch.search.highlight.HighlightBuilder;
@@ -49,6 +52,7 @@
4952
import static org.hamcrest.MatcherAssert.assertThat;
5053
import static org.hamcrest.Matchers.equalTo;
5154
import static org.hamcrest.Matchers.instanceOf;
55+
import static org.testng.Assert.fail;
5256

5357
/**
5458
*
@@ -915,8 +919,60 @@ public void testFSHHighlightAllMvFragments() throws Exception {
915919
.addHighlightedField("tags", -1, 0)
916920
.execute().actionGet();
917921

918-
assertThat(2, equalTo(response.hits().hits()[0].highlightFields().get("tags").fragments().length));
919-
assertThat("this is a really long <em>tag</em> i would like to highlight", equalTo(response.hits().hits()[0].highlightFields().get("tags").fragments()[0].string()));
920-
assertThat("here is another one that is very long and has the <em>tag</em> token near the end", equalTo(response.hits().hits()[0].highlightFields().get("tags").fragments()[1].string()));
922+
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments().length, equalTo(2));
923+
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[0].string(), equalTo("this is a really long <em>tag</em> i would like to highlight"));
924+
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[1].string(), equalTo("here is another one that is very long and has the <em>tag</em> token near the end"));
921925
}
926+
927+
@Test
928+
public void testPlainHighlightDifferentFragmenter() throws Exception {
929+
try {
930+
client.admin().indices().prepareDelete("test").execute().actionGet();
931+
} catch (Exception e) {
932+
// ignore
933+
}
934+
935+
client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder()
936+
.put("number_of_shards", 1).put("number_of_replicas", 0))
937+
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
938+
.startObject("tags").field("type", "string").endObject()
939+
.endObject().endObject().endObject())
940+
.execute().actionGet();
941+
942+
client.prepareIndex("test", "type1", "1")
943+
.setSource(jsonBuilder().startObject().field("tags",
944+
"this is a really long tag i would like to highlight",
945+
"here is another one that is very long tag and has the tag token near the end").endObject())
946+
.setRefresh(true).execute().actionGet();
947+
948+
SearchResponse response = client.prepareSearch("test")
949+
.setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQueryBuilder.Type.PHRASE))
950+
.addHighlightedField(new HighlightBuilder.Field("tags")
951+
.fragmentSize(-1).numOfFragments(2).fragmenter("simple"))
952+
.execute().actionGet();
953+
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments().length, equalTo(2));
954+
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[0].string(), equalTo("this is a really <em>long</em> <em>tag</em> i would like to highlight"));
955+
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[1].string(), equalTo("here is another one that is very <em>long</em> <em>tag</em> and has the tag token near the end"));
956+
957+
response = client.prepareSearch("test")
958+
.setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQueryBuilder.Type.PHRASE))
959+
.addHighlightedField(new HighlightBuilder.Field("tags")
960+
.fragmentSize(-1).numOfFragments(2).fragmenter("span"))
961+
.execute().actionGet();
962+
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments().length, equalTo(2));
963+
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[0].string(), equalTo("this is a really <em>long</em> <em>tag</em> i would like to highlight"));
964+
assertThat(response.hits().hits()[0].highlightFields().get("tags").fragments()[1].string(), equalTo("here is another one that is very <em>long</em> <em>tag</em> and has the tag token near the end"));
965+
966+
try {
967+
client.prepareSearch("test")
968+
.setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQueryBuilder.Type.PHRASE))
969+
.addHighlightedField(new HighlightBuilder.Field("tags")
970+
.fragmentSize(-1).numOfFragments(2).fragmenter("invalid"))
971+
.execute().actionGet();
972+
fail("Shouldn't get here");
973+
} catch (SearchPhaseExecutionException e) {
974+
assertThat(e.shardFailures()[0].status(), equalTo(RestStatus.BAD_REQUEST));
975+
}
976+
}
977+
922978
}

0 commit comments

Comments
 (0)