elastic
diff --git a/‎docs/changelog/113757.yaml‎
Lines changed: 5 additions & 0 deletions b/‎docs/changelog/113757.yaml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎rest-api-spec/build.gradle‎
Lines changed: 4 additions & 0 deletions b/‎rest-api-spec/build.gradle‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml‎
Lines changed: 1 addition & 1 deletion b/‎rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/21_synthetic_source_stored.yml‎
Lines changed: 1 addition & 1 deletion b/‎rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/21_synthetic_source_stored.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎server/src/main/java/org/elasticsearch/index/IndexVersions.java‎
Lines changed: 1 addition & 0 deletions b/‎server/src/main/java/org/elasticsearch/index/IndexVersions.java‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java‎
Lines changed: 21 additions & 1 deletion b/‎server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java‎
Lines changed: 21 additions & 1 deletion
diff --git a/‎server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java‎
Lines changed: 54 additions & 0 deletions b/‎server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java‎
Lines changed: 54 additions & 0 deletions
diff --git a/‎server/src/main/java/org/elasticsearch/index/mapper/FieldArrayContext.java‎
Lines changed: 93 additions & 0 deletions b/‎server/src/main/java/org/elasticsearch/index/mapper/FieldArrayContext.java‎
Lines changed: 93 additions & 0 deletions
diff --git a/‎server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java‎
Lines changed: 2 additions & 2 deletions b/‎server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java‎
Lines changed: 2 additions & 2 deletions
@@ -0,0 +1,5 @@
+pr: 113757
+summary: Store arrays offsets for keyword fields natively with synthetic source instead of falling back to ignored source.
+area: Mapping
+type: enhancement
+issues: []
@@ -83,4 +83,8 @@ tasks.named("yamlRestCompatTestTransform").configure ({ task ->
  "node_version warning is removed in 9.0"
  )
  task.skipTest("tsdb/20_mapping/nested fields", "nested field support in tsdb indices is now supported")
+ task.skipTest("logsdb/10_settings/routing path allowed in logs mode with routing on sort fields", "Unknown feature routing.logsb_route_on_sort_fields")
+ task.skipTest("indices.create/21_synthetic_source_stored/index param - field ordering", "Synthetic source keep arrays now stores leaf arrays natively")
+ task.skipTest("indices.create/21_synthetic_source_stored/field param - keep nested array", "Synthetic source keep arrays now stores leaf arrays natively")
+ task.skipTest("indices.create/21_synthetic_source_stored/field param - keep root array", "Synthetic source keep arrays now stores leaf arrays natively")
 })
@@ -922,7 +922,7 @@ subobjects auto:
  - match: { hits.hits.0._source.foo: 10 }
  - match: { hits.hits.0._source.foo\.bar: 100 }
  - match: { hits.hits.0._source.regular.span.id: "1" }
- - match: { hits.hits.0._source.regular.trace.id: [ "a", "b" ] }
+ - match: { hits.hits.0._source.regular.trace.id: ["a", "b" ] }
  - match: { hits.hits.1._source.id: 2 }
  - match: { hits.hits.1._source.foo: 20 }
  - match: { hits.hits.1._source.foo\.bar: 200 }
 
@@ -1024,7 +1024,7 @@ index param - field ordering:
  index: test
 
  - length: { hits.hits.0._source: 4 }
- - match: { hits.hits.0._source: { "a": "2", "b": [ { "bb": 100, "aa": 200 }, { "aa": 300, "bb": 400 } ], "c": [30, 20, 10], "d": [ { "bb": 10, "aa": 20 }, { "aa": 30, "bb": 40 } ] } }
+ - match: { hits.hits.0._source: { "a": "2", "b": [ { "bb": 100, "aa": 200 }, { "aa": 300, "bb": 400 } ], "c": ["30", "20", "10"], "d": [ { "bb": 10, "aa": 20 }, { "aa": 30, "bb": 40 } ] } }
 
 
 ---
 
@@ -148,6 +148,7 @@ private static Version parseUnchecked(String version) {
  public static final IndexVersion USE_SYNTHETIC_SOURCE_FOR_RECOVERY_BY_DEFAULT = def(9_010_00_0, Version.LUCENE_10_1_0);
  public static final IndexVersion TIMESTAMP_DOC_VALUES_SPARSE_INDEX = def(9_011_0_00, Version.LUCENE_10_1_0);
  public static final IndexVersion TIME_SERIES_ID_DOC_VALUES_SPARSE_INDEX = def(9_012_0_00, Version.LUCENE_10_1_0);
+ public static final IndexVersion SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_KEYWORD = def(9_013_0_00, Version.LUCENE_10_1_0);
  /*
  * STOP! READ THIS FIRST! No, really,
  * ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _
 
@@ -154,6 +154,7 @@ private void internalParseDocument(MetadataFieldMapper[] metadataFieldsMappers,
 
  executeIndexTimeScripts(context);
 
+ context.processArrayOffsets(context);
  for (MetadataFieldMapper metadataMapper : metadataFieldsMappers) {
  metadataMapper.postParse(context);
  }
@@ -519,6 +520,7 @@ private static void throwOnCopyToOnObject(Mapper mapper, List<String> copyToFiel
 
  private static void parseObject(final DocumentParserContext context, String currentFieldName) throws IOException {
  assert currentFieldName != null;
+ context.setImmediateXContentParent(context.parser().currentToken());
  Mapper objectMapper = context.getMapper(currentFieldName);
  if (objectMapper != null) {
  doParseObject(context, currentFieldName, objectMapper);
@@ -611,6 +613,12 @@ private static void throwOnCreateDynamicNestedViaCopyTo(Mapper dynamicObjectMapp
  }
 
  private static void parseArray(DocumentParserContext context, String lastFieldName) throws IOException {
+ // Record previous immediate parent, so that it can be reset after array has been parsed.
+ // This is for recording array offset with synthetic source. Only if the immediate parent is an array,
+ // then the offsets can be accounted accurately.
+ var prev = context.getImmediateXContentParent();
+ context.setImmediateXContentParent(context.parser().currentToken());
+
  Mapper mapper = getLeafMapper(context, lastFieldName);
  if (mapper != null) {
  // There is a concrete mapper for this field already. Need to check if the mapper
@@ -624,6 +632,8 @@ private static void parseArray(DocumentParserContext context, String lastFieldNa
  } else {
  parseArrayDynamic(context, lastFieldName);
  }
+ // Reset previous immediate parent
+ context.setImmediateXContentParent(prev);
  }
 
  private static void parseArrayDynamic(DocumentParserContext context, String currentFieldName) throws IOException {
@@ -688,11 +698,12 @@ private static void parseNonDynamicArray(
  final String lastFieldName,
  String arrayFieldName
  ) throws IOException {
+ boolean supportStoringArrayOffsets = mapper != null && mapper.supportStoringArrayOffsets();
  String fullPath = context.path().pathAsText(arrayFieldName);
 
  // Check if we need to record the array source. This only applies to synthetic source.
  boolean canRemoveSingleLeafElement = false;
- if (context.canAddIgnoredField()) {
+ if (context.canAddIgnoredField() && supportStoringArrayOffsets == false) {
  Mapper.SourceKeepMode mode = Mapper.SourceKeepMode.NONE;
  boolean objectWithFallbackSyntheticSource = false;
  if (mapper instanceof ObjectMapper objectMapper) {
@@ -736,6 +747,7 @@ private static void parseNonDynamicArray(
 
  XContentParser parser = context.parser();
  XContentParser.Token token;
+ XContentParser.Token previousToken = parser.currentToken();
  int elements = 0;
  while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
  if (token == XContentParser.Token.START_OBJECT) {
@@ -754,6 +766,14 @@ private static void parseNonDynamicArray(
  elements++;
  parseValue(context, lastFieldName);
  }
+ previousToken = token;
+ }
+ if (mapper != null
+ && context.canAddIgnoredField()
+ && mapper.supportStoringArrayOffsets()
+ && previousToken == XContentParser.Token.START_ARRAY
+ && context.isImmediateParentAnArray()) {
+ context.getOffSetContext().maybeRecordEmptyArray(mapper.getOffsetFieldName());
  }
  if (elements <= 1 && canRemoveSingleLeafElement) {
  context.removeLastIgnoredField(fullPath);
 
@@ -91,6 +91,31 @@ public LuceneDocument doc() {
  protected void addDoc(LuceneDocument doc) {
  in.addDoc(doc);
  }
+
+ @Override
+ public void processArrayOffsets(DocumentParserContext context) throws IOException {
+ in.processArrayOffsets(context);
+ }
+
+ @Override
+ public FieldArrayContext getOffSetContext() {
+ return in.getOffSetContext();
+ }
+
+ @Override
+ public void setImmediateXContentParent(XContentParser.Token token) {
+ in.setImmediateXContentParent(token);
+ }
+
+ @Override
+ public XContentParser.Token getImmediateXContentParent() {
+ return in.getImmediateXContentParent();
+ }
+
+ @Override
+ public boolean isImmediateParentAnArray() {
+ return in.isImmediateParentAnArray();
+ }
  }
 
  /**
@@ -141,6 +166,8 @@ private enum Scope {
  private final SeqNoFieldMapper.SequenceIDFields seqID;
  private final Set<String> fieldsAppliedFromTemplates;
 
+ private FieldArrayContext fieldArrayContext;
+
  /**
  * Fields that are copied from values of other fields via copy_to.
  * This per-document state is needed since it is possible
@@ -460,6 +487,33 @@ public boolean isCopyToDestinationField(String name) {
  return copyToFields.contains(name);
  }
 
+ public void processArrayOffsets(DocumentParserContext context) throws IOException {
+ if (fieldArrayContext != null) {
+ fieldArrayContext.addToLuceneDocument(context);
+ }
+ }
+
+ public FieldArrayContext getOffSetContext() {
+ if (fieldArrayContext == null) {
+ fieldArrayContext = new FieldArrayContext();
+ }
+ return fieldArrayContext;
+ }
+
+ private XContentParser.Token lastSetToken;
+
+ public void setImmediateXContentParent(XContentParser.Token token) {
+ this.lastSetToken = token;
+ }
+
+ public XContentParser.Token getImmediateXContentParent() {
+ return lastSetToken;
+ }
+
+ public boolean isImmediateParentAnArray() {
+ return lastSetToken == XContentParser.Token.START_ARRAY;
+ }
+
  /**
  * Add a new mapper dynamically created while parsing.
  *
 
@@ -0,0 +1,93 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.index.mapper;
+
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.util.BitUtil;
+import org.elasticsearch.common.io.stream.BytesStreamOutput;
+import org.elasticsearch.common.io.stream.StreamInput;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+public class FieldArrayContext {
+
+ private final Map<String, Offsets> offsetsPerField = new HashMap<>();
+
+ void recordOffset(String field, String value) {
+ Offsets arrayOffsets = offsetsPerField.computeIfAbsent(field, k -> new Offsets());
+ int nextOffset = arrayOffsets.currentOffset++;
+ var offsets = arrayOffsets.valueToOffsets.computeIfAbsent(value, s -> new ArrayList<>(2));
+ offsets.add(nextOffset);
+ }
+
+ void recordNull(String field) {
+ Offsets arrayOffsets = offsetsPerField.computeIfAbsent(field, k -> new Offsets());
+ int nextOffset = arrayOffsets.currentOffset++;
+ arrayOffsets.nullValueOffsets.add(nextOffset);
+ }
+
+ void maybeRecordEmptyArray(String field) {
+ offsetsPerField.computeIfAbsent(field, k -> new Offsets());
+ }
+
+ void addToLuceneDocument(DocumentParserContext context) throws IOException {
+ for (var entry : offsetsPerField.entrySet()) {
+ var fieldName = entry.getKey();
+ var offset = entry.getValue();
+
+ int currentOrd = 0;
+ // This array allows to retain the original ordering of elements in leaf arrays and retain duplicates.
+ int[] offsetToOrd = new int[offset.currentOffset];
+ for (var offsetEntry : offset.valueToOffsets.entrySet()) {
+ for (var offsetAndLevel : offsetEntry.getValue()) {
+ offsetToOrd[offsetAndLevel] = currentOrd;
+ }
+ currentOrd++;
+ }
+ for (var nullOffset : offset.nullValueOffsets) {
+ offsetToOrd[nullOffset] = -1;
+ }
+
+ try (var streamOutput = new BytesStreamOutput()) {
+ // Could just use vint for array length, but this allows for decoding my_field: null as -1
+ streamOutput.writeVInt(BitUtil.zigZagEncode(offsetToOrd.length));
+ for (int ord : offsetToOrd) {
+ streamOutput.writeVInt(BitUtil.zigZagEncode(ord));
+ }
+ context.doc().add(new SortedDocValuesField(fieldName, streamOutput.bytes().toBytesRef()));
+ }
+ }
+ }
+
+ static int[] parseOffsetArray(StreamInput in) throws IOException {
+ int[] offsetToOrd = new int[BitUtil.zigZagDecode(in.readVInt())];
+ for (int i = 0; i < offsetToOrd.length; i++) {
+ offsetToOrd[i] = BitUtil.zigZagDecode(in.readVInt());
+ }
+ return offsetToOrd;
+ }
+
+ private static class Offsets {
+
+ int currentOffset;
+ // Need to use TreeMap here, so that we maintain the order in which each value (with offset) stored inserted,
+ // (which is in the same order the document gets parsed) so we store offsets in right order. This is the same
+ // order in what the values get stored in SortedSetDocValues.
+ final Map<String, List<Integer>> valueToOffsets = new TreeMap<>();
+ final List<Integer> nullValueOffsets = new ArrayList<>(2);
+
+ }
+
+}
@@ -200,15 +200,15 @@ public void parse(DocumentParserContext context) throws IOException {
  }
  }
 
- private void doParseMultiFields(DocumentParserContext context) throws IOException {
+ protected void doParseMultiFields(DocumentParserContext context) throws IOException {
  context.path().add(leafName());
  for (FieldMapper mapper : builderParams.multiFields.mappers) {
  mapper.parse(context);
  }
  context.path().remove();
  }
 
- private static void throwIndexingWithScriptParam() {
+ protected static void throwIndexingWithScriptParam() {
  throw new IllegalArgumentException("Cannot index data directly into a field with a [script] parameter");
  }
Original file line number	Diff line number	Diff line change
`@@ -83,4 +83,8 @@ tasks.named("yamlRestCompatTestTransform").configure ({ task ->`
`83`	`83`	`"node_version warning is removed in 9.0"`
`84`	`84`	`)`
`85`	`85`	`task.skipTest("tsdb/20_mapping/nested fields", "nested field support in tsdb indices is now supported")`
	`86`	`+ task.skipTest("logsdb/10_settings/routing path allowed in logs mode with routing on sort fields", "Unknown feature routing.logsb_route_on_sort_fields")`
	`87`	`+ task.skipTest("indices.create/21_synthetic_source_stored/index param - field ordering", "Synthetic source keep arrays now stores leaf arrays natively")`
	`88`	`+ task.skipTest("indices.create/21_synthetic_source_stored/field param - keep nested array", "Synthetic source keep arrays now stores leaf arrays natively")`
	`89`	`+ task.skipTest("indices.create/21_synthetic_source_stored/field param - keep root array", "Synthetic source keep arrays now stores leaf arrays natively")`
`86`	`90`	`})`
Original file line number	Diff line number	Diff line change
`@@ -200,15 +200,15 @@ public void parse(DocumentParserContext context) throws IOException {`
`200`	`200`	`}`
`201`	`201`	`}`
`202`	`202`
`203`		`- private void doParseMultiFields(DocumentParserContext context) throws IOException {`
	`203`	`+ protected void doParseMultiFields(DocumentParserContext context) throws IOException {`
`204`	`204`	`context.path().add(leafName());`
`205`	`205`	`for (FieldMapper mapper : builderParams.multiFields.mappers) {`
`206`	`206`	`mapper.parse(context);`
`207`	`207`	`}`
`208`	`208`	`context.path().remove();`
`209`	`209`	`}`
`210`	`210`
`211`		`- private static void throwIndexingWithScriptParam() {`
	`211`	`+ protected static void throwIndexingWithScriptParam() {`
`212`	`212`	`throw new IllegalArgumentException("Cannot index data directly into a field with a [script] parameter");`
`213`	`213`	`}`
`214`	`214`