Skip to content
Merged
Show file tree
Hide file tree
Changes from 49 commits
Commits
Show all changes
65 commits
Select commit Hold shift + click to select a range
78fa6af
Per component normalizer is removed
mridula-s109 Jun 25, 2025
d4b1ced
Modified LinearRetrieverBuilder to propagate top level normalizer to …
mridula-s109 Jun 25, 2025
83763e9
[CI] Auto commit changes from spotless
Jul 3, 2025
7066b10
Component is modified
mridula-s109 Jul 9, 2025
d8de361
Retriever builder is also modified according to the new changes:
mridula-s109 Jul 9, 2025
36c1a13
[CI] Auto commit changes from spotless
Jul 9, 2025
3b87c15
Spotless check done
mridula-s109 Jul 9, 2025
e3326e2
Code changes made
mridula-s109 Jul 10, 2025
d36ada2
FIX: Cast rewritten builder in LinearRetrieverBuilder
mridula-s109 Jul 11, 2025
330e32b
modified the builder
mridula-s109 Jul 25, 2025
c35a28e
Update retrievers.md
mridula-s109 Jul 25, 2025
b3d7f5f
Update retrievers.md
mridula-s109 Jul 25, 2025
8ac90e6
Update docs/changelog/129693.yaml
mridula-s109 Jul 25, 2025
0309a65
Update docs/changelog/129693.yaml
mridula-s109 Jul 25, 2025
299a222
Update retrievers.md
mridula-s109 Jul 25, 2025
cdef674
Merge branch 'main' into SEARCH-1027-linear-retriever-top-level-optio…
mridula-s109 Jul 25, 2025
b6d5109
WIP
mridula-s109 Jul 25, 2025
0872925
[CI] Auto commit changes from spotless
Jul 25, 2025
ff65d27
WIP
mridula-s109 Jul 25, 2025
3d605ee
Resolved errors
mridula-s109 Jul 31, 2025
3ec7110
Fixed the retrievers
mridula-s109 Jul 31, 2025
2a571ec
Reverted it to main
mridula-s109 Jul 31, 2025
7808fc1
reverted
mridula-s109 Jul 31, 2025
a44cf1d
cleaned up
mridula-s109 Jul 31, 2025
89edd4f
cleaned it up
mridula-s109 Jul 31, 2025
08f68ab
Modified and cleaned code
mridula-s109 Aug 1, 2025
c13f614
Compilation and styling clean
mridula-s109 Aug 7, 2025
182afee
Parsing issues resolved
mridula-s109 Aug 7, 2025
2ab7912
Unittestspass but parsing issue equality persists
mridula-s109 Aug 7, 2025
1cf1871
Parsing and builder tests are passing
mridula-s109 Aug 7, 2025
e087362
Add comprehensive normalizer testing and cleanup duplicate files
mridula-s109 Aug 14, 2025
c36772f
Resolve merge conflict in linear-retriever.md documentation
mridula-s109 Aug 14, 2025
a964d08
Apply spotless code formatting
mridula-s109 Aug 14, 2025
47d2086
Final changes
mridula-s109 Aug 14, 2025
9c7539b
Merge branch 'main' into SEARCH-1027-linear-retriever-top-level-optio…
mridula-s109 Aug 14, 2025
22e51aa
Removed unnecessary changes from doc
mridula-s109 Aug 14, 2025
b03d586
Cleaned up test
mridula-s109 Aug 14, 2025
a670936
Cleaned up
mridula-s109 Aug 14, 2025
f3659cb
Merge branch 'main' into SEARCH-1027-linear-retriever-top-level-optio…
mridula-s109 Aug 14, 2025
f6a8d4c
Reviewed the code
mridula-s109 Aug 14, 2025
a8e1d5f
Cleaned up comments'
mridula-s109 Aug 14, 2025
04f715b
Reverted RetrieverBuilder
mridula-s109 Aug 14, 2025
a8235c4
Cleaned up yaml
mridula-s109 Aug 14, 2025
7625b9d
Merge branch 'main' into SEARCH-1027-linear-retriever-top-level-optio…
mridula-s109 Aug 14, 2025
19d8ce5
Merge branch 'main' into SEARCH-1027-linear-retriever-top-level-optio…
mridula-s109 Aug 14, 2025
3d6b32b
Fixed samuel comments
mridula-s109 Aug 15, 2025
976d9e8
Merge branch 'main' into SEARCH-1027-linear-retriever-top-level-optio…
mridula-s109 Aug 15, 2025
5c0f006
Worked on Michael comments
mridula-s109 Aug 15, 2025
2b2ee94
Merge branch 'main' into SEARCH-1027-linear-retriever-top-level-optio…
mridula-s109 Aug 15, 2025
2e569d7
Reverted the retriever example change
mridula-s109 Aug 19, 2025
29c4f24
The test was modified to include equalioty check
mridula-s109 Aug 19, 2025
078515c
cleaned up resolve normalizer
mridula-s109 Aug 19, 2025
34e14f7
optimised the parsing test
mridula-s109 Aug 19, 2025
fdb0f88
cleaned up duplicates
mridula-s109 Aug 19, 2025
f6484e1
Added cluster features
mridula-s109 Aug 19, 2025
d399593
Merge branch 'main' into SEARCH-1027-linear-retriever-top-level-optio…
mridula-s109 Aug 19, 2025
45e2f7e
Modified docs
mridula-s109 Aug 20, 2025
d7c6dbd
worked on all the changes
mridula-s109 Aug 20, 2025
adc7ad2
Merge branch 'main' into SEARCH-1027-linear-retriever-top-level-optio…
mridula-s109 Aug 20, 2025
acc4614
Merge branch 'main' into SEARCH-1027-linear-retriever-top-level-optio…
mridula-s109 Aug 20, 2025
7ae5800
Update 10_linear_retriever.yml
mridula-s109 Aug 20, 2025
e6f9921
Merge branch 'main' into SEARCH-1027-linear-retriever-top-level-optio…
mridula-s109 Aug 20, 2025
bd0d4ce
Merge branch 'main' into SEARCH-1027-linear-retriever-top-level-optio…
mridula-s109 Aug 20, 2025
2bc12ec
Nitpicks and some other enhancement comments resolved
mridula-s109 Aug 21, 2025
e27722f
Merge branch 'main' into SEARCH-1027-linear-retriever-top-level-optio…
mridula-s109 Aug 21, 2025
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/129693.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 129693
summary: Add top level normalizer for linear retriever
area: Search
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,12 @@ Combining `query` and `retrievers` is not supported.
`normalizer` {applies_to}`stack: ga 9.1`
: (Optional, String)

The normalizer to use when using the [multi-field query format](../retrievers.md#multi-field-query-format).
The normalizer to use when combining results.
See [normalizers](#linear-retriever-normalizers) for supported values.
Required when `query` is specified.

When used with the [multi-field query format](../retrievers.md#multi-field-query-format) (`query` parameter), normalizes scores per [field grouping](../retrievers.md#multi-field-field-grouping).
When used with `retrievers`, serves as the default normalizer for any sub-retriever that doesn't specify its own normalizer. Per-retriever normalizers always take precedence over the top-level normalizer.

::::{warning}
Avoid using `none` as that will disable normalization and may bias the result set towards lexical matches.
Expand Down Expand Up @@ -74,9 +77,9 @@ Each entry in the `retrievers` array specifies the following parameters:
`normalizer`
: (Optional, String)

Specifies how the retrievers score will be normalized before applying the specified `weight`.
Specifies how the retriever's score will be normalized before applying the specified `weight`.
See [normalizers](#linear-retriever-normalizers) for supported values.
Defaults to `none`.
If not specified, uses the top-level `normalizer` or defaults to `none` if no top-level normalizer is set.

See also [this hybrid search example](retrievers-examples.md#retrievers-examples-linear-retriever) using a linear retriever on how to independently configure and apply normalizers to retrievers.

Expand All @@ -94,7 +97,7 @@ The `linear` retriever supports the following normalizers:

## Example

This example of a hybrid search weights KNN results five times more heavily than BM25 results in the final ranking.
This example of a hybrid search weights KNN results five times more heavily than BM25 results in the final ranking, with a top-level normalizer applied to all retrievers.

```console
GET my_index/_search
Expand All @@ -105,23 +108,33 @@ GET my_index/_search
{
"retriever": {
"knn": {
...
"field": "title_vector",
"query_vector": [0.1, 0.2, 0.3],
"k": 10,
"num_candidates": 100
}
},
"weight": 5 # KNN query weighted 5x
},
{
"retriever": {
"standard": {
...
"query": {
"match": {
"title": "elasticsearch"
}
}
}
},
"weight": 1.5 # BM25 query weighted 1.5x
}
]
],
"normalizer": "minmax"
}
}
}
```

In this example, the `minmax` normalizer is applied to both the kNN retriever and the standard retriever. The top-level normalizer serves as a default that can be overridden by individual sub-retrievers. When using the multi-field query format, the top-level normalizer is applied to all generated inner retrievers.

See also [this hybrid search example](retrievers-examples.md#retrievers-examples-linear-retriever).
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ GET /retrievers_example/_search
}
},
"weight": 1.5,
"normalizer": "minmax"
"normalizer": "l2_norm"
}
],
"rank_window_size": 10
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -835,4 +835,43 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
);
assertThat(numAsyncCalls.get(), equalTo(4));
}

public void testMixedNormalizerInheritance() throws IOException {
client().prepareIndex(INDEX)
.setId("1")
.setSource("field1", "elasticsearch search", "field2", "database technology", "score", 10)
.get();
client().prepareIndex(INDEX).setId("2").setSource("field1", "lucene engine", "field2", "search technology", "score", 5).get();
client().prepareIndex(INDEX)
.setId("3")
.setSource("field1", "information retrieval", "field2", "database search", "score", 15)
.get();
refresh(INDEX);

LinearRetrieverBuilder linearRetriever = new LinearRetrieverBuilder(
List.of(
CompoundRetrieverBuilder.RetrieverSource.from(
new StandardRetrieverBuilder(QueryBuilders.matchQuery("field1", "elasticsearch"))
),
CompoundRetrieverBuilder.RetrieverSource.from(
new StandardRetrieverBuilder(QueryBuilders.matchQuery("field2", "technology"))
),
CompoundRetrieverBuilder.RetrieverSource.from(new StandardRetrieverBuilder(QueryBuilders.matchQuery("field1", "search")))
),
null,
null,
MinMaxScoreNormalizer.INSTANCE,
10,
new float[] { 1.0f, 1.0f, 1.0f },
new ScoreNormalizer[] { null, L2ScoreNormalizer.INSTANCE, null }
);

assertThat(linearRetriever.getNormalizers()[0], equalTo(MinMaxScoreNormalizer.INSTANCE));
assertThat(linearRetriever.getNormalizers()[1], equalTo(L2ScoreNormalizer.INSTANCE));
assertThat(linearRetriever.getNormalizers()[2], equalTo(MinMaxScoreNormalizer.INSTANCE));

assertResponse(client().prepareSearch(INDEX).setSource(new SearchSourceBuilder().retriever(linearRetriever)), searchResponse -> {
assertThat(searchResponse.getHits().getTotalHits().value() > 0L, is(true));
});
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import static org.elasticsearch.action.ValidateActions.addValidationError;
import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg;
import static org.elasticsearch.xpack.rank.RankRRFFeatures.LINEAR_RETRIEVER_SUPPORTED;
import static org.elasticsearch.xpack.rank.linear.LinearRetrieverComponent.DEFAULT_NORMALIZER;
import static org.elasticsearch.xpack.rank.linear.LinearRetrieverComponent.DEFAULT_WEIGHT;

/**
Expand Down Expand Up @@ -74,6 +75,16 @@ public final class LinearRetrieverBuilder extends CompoundRetrieverBuilder<Linea
private final String query;
private final ScoreNormalizer normalizer;

private static ScoreNormalizer resolveNormalizer(ScoreNormalizer componentNormalizer, ScoreNormalizer topLevelNormalizer) {
if (componentNormalizer != null) {
return componentNormalizer;
}
if (topLevelNormalizer != null) {
return topLevelNormalizer;
}
return DEFAULT_NORMALIZER;
}

@SuppressWarnings("unchecked")
static final ConstructingObjectParser<LinearRetrieverBuilder, RetrieverParserContext> PARSER = new ConstructingObjectParser<>(
NAME,
Expand All @@ -92,7 +103,7 @@ public final class LinearRetrieverBuilder extends CompoundRetrieverBuilder<Linea
for (LinearRetrieverComponent component : retrieverComponents) {
innerRetrievers.add(RetrieverSource.from(component.retriever));
weights[index] = component.weight;
normalizers[index] = component.normalizer;
normalizers[index] = resolveNormalizer(component.normalizer, normalizer);
index++;
}
return new LinearRetrieverBuilder(innerRetrievers, fields, query, normalizer, rankWindowSize, weights, normalizers);
Expand All @@ -118,7 +129,7 @@ private static float[] getDefaultWeight(List<RetrieverSource> innerRetrievers) {
private static ScoreNormalizer[] getDefaultNormalizers(List<RetrieverSource> innerRetrievers) {
int size = innerRetrievers != null ? innerRetrievers.size() : 0;
ScoreNormalizer[] normalizers = new ScoreNormalizer[size];
Arrays.fill(normalizers, IdentityScoreNormalizer.INSTANCE);
Arrays.fill(normalizers, DEFAULT_NORMALIZER);
return normalizers;
}

Expand Down Expand Up @@ -167,7 +178,10 @@ public LinearRetrieverBuilder(
this.query = query;
this.normalizer = normalizer;
this.weights = weights;
this.normalizers = normalizers;
this.normalizers = new ScoreNormalizer[normalizers.length];
for (int i = 0; i < normalizers.length; i++) {
this.normalizers[i] = resolveNormalizer(normalizers[i], normalizer);
}
}

public LinearRetrieverBuilder(
Expand Down Expand Up @@ -221,19 +235,7 @@ public ActionRequestValidationException validate(
),
validationException
);
} else if (innerRetrievers.isEmpty() == false && normalizer != null) {
validationException = addValidationError(
String.format(
Locale.ROOT,
"[%s] [%s] cannot be provided when [%s] is specified",
getName(),
NORMALIZER_FIELD.getPreferredName(),
RETRIEVERS_FIELD.getPreferredName()
),
validationException
);
}

return validationException;
}

Expand Down Expand Up @@ -339,7 +341,7 @@ protected RetrieverBuilder doRewrite(QueryRewriteContext ctx) {
for (var weightedRetriever : r) {
retrievers.add(weightedRetriever.retrieverSource());
weights[index] = weightedRetriever.weight();
normalizers[index] = normalizer;
normalizers[index] = resolveNormalizer(null, normalizer);
index++;
}

Expand All @@ -357,7 +359,7 @@ protected RetrieverBuilder doRewrite(QueryRewriteContext ctx) {
Arrays.fill(weights, DEFAULT_WEIGHT);

ScoreNormalizer[] normalizers = new ScoreNormalizer[fieldsInnerRetrievers.size()];
Arrays.fill(normalizers, normalizer);
Arrays.fill(normalizers, resolveNormalizer(null, normalizer));

// TODO: This is a incomplete solution as it does not address other incomplete copy issues
// (such as dropping the retriever name and min score)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public LinearRetrieverComponent(RetrieverBuilder retrieverBuilder, Float weight,
assert retrieverBuilder != null;
this.retriever = retrieverBuilder;
this.weight = weight == null ? DEFAULT_WEIGHT : weight;
this.normalizer = normalizer == null ? DEFAULT_NORMALIZER : normalizer;
this.normalizer = normalizer; // Don't default to identity, allow null for top-level fallback
if (this.weight < 0) {
throw new IllegalArgumentException("[weight] must be non-negative");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.elasticsearch.xcontent.NamedXContentRegistry;
import org.elasticsearch.xcontent.ParseField;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentType;
import org.junit.AfterClass;
import org.junit.BeforeClass;

Expand All @@ -27,6 +28,7 @@
import java.util.List;

import static java.util.Collections.emptyList;
import static org.hamcrest.Matchers.instanceOf;

public class LinearRetrieverBuilderParsingTests extends AbstractXContentTestCase<LinearRetrieverBuilder> {
private static List<NamedXContentRegistry.Entry> xContentRegistryEntries;
Expand Down Expand Up @@ -67,7 +69,9 @@ protected LinearRetrieverBuilder createTestInstance() {
new CompoundRetrieverBuilder.RetrieverSource(TestRetrieverBuilder.createRandomTestRetrieverBuilder(), null)
);
weights[i] = randomFloat();
normalizers[i] = randomScoreNormalizer();
normalizers[i] = randomFrom(
new ScoreNormalizer[] { null, MinMaxScoreNormalizer.INSTANCE, L2ScoreNormalizer.INSTANCE, IdentityScoreNormalizer.INSTANCE }
);
}

return new LinearRetrieverBuilder(innerRetrievers, fields, query, normalizer, rankWindowSize, weights, normalizers);
Expand Down Expand Up @@ -108,10 +112,51 @@ protected NamedXContentRegistry xContentRegistry() {
}

private static ScoreNormalizer randomScoreNormalizer() {
if (randomBoolean()) {
return MinMaxScoreNormalizer.INSTANCE;
} else {
return IdentityScoreNormalizer.INSTANCE;
int random = randomInt(2);
return switch (random) {
case 0 -> MinMaxScoreNormalizer.INSTANCE;
case 1 -> L2ScoreNormalizer.INSTANCE;
default -> IdentityScoreNormalizer.INSTANCE;
};
}

public void testTopLevelNormalizer() throws IOException {
String json = """
{
"linear": {
"retrievers": [
{
"retriever": {
"test": {
"value": "test1"
}
},
"weight": 1.0,
"normalizer": "none"
},
{
"retriever": {
"test": {
"value": "test2"
}
},
"weight": 1.0,
"normalizer": "none"
}
],
"normalizer": "minmax"
}
}""";

try (XContentParser parser = createParser(XContentType.JSON.xContent(), json)) {
LinearRetrieverBuilder builder = doParseInstance(parser);
// Test that the top-level normalizer is properly applied - the individual
// Per-retriever 'none' should override top-level 'minmax'
ScoreNormalizer[] normalizers = builder.getNormalizers();
assertEquals(2, normalizers.length);
for (ScoreNormalizer normalizer : normalizers) {
assertThat(normalizer, instanceOf(IdentityScoreNormalizer.class));
}
}
}
}
Loading
Loading