Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/128702.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 128702
summary: Fix missing highlighting in `match_all` queries for `semantic_text` fields
area: Search
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ public Set<NodeFeature> getFeatures() {

private static final NodeFeature SEMANTIC_TEXT_HIGHLIGHTER = new NodeFeature("semantic_text.highlighter");
private static final NodeFeature SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT = new NodeFeature("semantic_text.highlighter.default");
private static final NodeFeature SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER = new NodeFeature("semantic_text.match_all_highlighter");

@Override
public Set<NodeFeature> getTestFeatures() {
Expand All @@ -57,7 +58,8 @@ public Set<NodeFeature> getTestFeatures() {
TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_ALIAS_HANDLING_FIX,
SemanticInferenceMetadataFieldsMapper.INFERENCE_METADATA_FIELDS_ENABLED_BY_DEFAULT,
SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT,
SEMANTIC_KNN_FILTER_FIX
SEMANTIC_KNN_FILTER_FIX,
SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.KnnByteVectorQuery;
import org.apache.lucene.search.KnnFloatVectorQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
Expand Down Expand Up @@ -267,6 +268,8 @@ public void visitLeaf(Query query) {
queries.add(fieldType.createExactKnnQuery(VectorData.fromFloats(knnQuery.getTargetCopy()), null));
} else if (query instanceof KnnByteVectorQuery knnQuery) {
queries.add(fieldType.createExactKnnQuery(VectorData.fromBytes(knnQuery.getTargetCopy()), null));
} else if (query instanceof MatchAllDocsQuery) {
queries.add(new MatchAllDocsQuery());
}
}
});
Expand All @@ -293,6 +296,13 @@ public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) {
}
return this;
}

@Override
public void visitLeaf(Query query) {
if (query instanceof MatchAllDocsQuery) {
queries.add(new MatchAllDocsQuery());
}
}
});
return queries;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -291,3 +291,44 @@ setup:
- match: { hits.hits.0._id: "doc_1" }
- not_exists: hits.hits.0.highlight.title

---
"Highlighting with match_all query":
- requires:
cluster_features: "semantic_text.match_all_highlighter"
reason: semantic text field supports match_all query with semantic highlighter.

- do:
search:
index: test-sparse-index
body:
query:
match_all: {}
highlight:
fields:
body:
type: "semantic"
number_of_fragments: 2

- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight.body: 2 }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }

- do:
search:
index: test-dense-index
body:
query:
match_all: {}
highlight:
fields:
body:
type: "semantic"
number_of_fragments: 2

- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight.body: 2 }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
Original file line number Diff line number Diff line change
Expand Up @@ -243,4 +243,60 @@ setup:
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
- match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }

---
"Highlighting with match_all query":
- requires:
cluster_features: "semantic_text.match_all_highlighter"
reason: semantic text field supports match_all query with semantic highlighter.

- do:
index:
index: test-sparse-index
id: doc_1
body:
body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
refresh: true

- do:
search:
index: test-sparse-index
body:
query:
match_all: {}
highlight:
fields:
body:
type: "semantic"
number_of_fragments: 2

- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight.body: 2 }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }

- do:
index:
index: test-dense-index
id: doc_1
body:
body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
refresh: true

- do:
search:
index: test-dense-index
body:
query:
match_all: {}
highlight:
fields:
body:
type: "semantic"
number_of_fragments: 2

- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight.body: 2 }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }