Skip to content

Commit f4d2dd2

Browse files
authored
Merge pull request #35311 from vespa-engine/arnej/wip-simpler-annotations-7
Add SimpleIndexingAnnotations for memory reduction in indexing
2 parents f17bc9a + ee4177b commit f4d2dd2

File tree

24 files changed

+2077
-113
lines changed

24 files changed

+2077
-113
lines changed

config-model-api/abi-spec.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1333,6 +1333,7 @@
13331333
"public double logserverNodeMemory()",
13341334
"public double clusterControllerNodeMemory()",
13351335
"public boolean useLegacyWandQueryParsing()",
1336+
"public boolean useSimpleAnnotations()",
13361337
"public boolean sendProtobufQuerytree()",
13371338
"public boolean forwardAllLogLevels()",
13381339
"public long zookeeperPreAllocSize()",

config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ interface FeatureFlags {
108108
@ModelFeatureFlag(owners = {"arnej"}) default double logserverNodeMemory() { return 0.0; }
109109
@ModelFeatureFlag(owners = {"arnej"}) default double clusterControllerNodeMemory() { return 0.0; }
110110
@ModelFeatureFlag(owners = {"arnej"}) default boolean useLegacyWandQueryParsing() { return true; }
111+
@ModelFeatureFlag(owners = {"arnej"}) default boolean useSimpleAnnotations() { return false; }
111112
@ModelFeatureFlag(owners = {"arnej"}) default boolean sendProtobufQuerytree() { return false; }
112113
@ModelFeatureFlag(owners = {"hmusum"}) default boolean forwardAllLogLevels() { return true; }
113114
@ModelFeatureFlag(owners = {"hmusum"}) default long zookeeperPreAllocSize() { return 65536L; }

config-model/src/main/java/com/yahoo/vespa/model/container/docproc/ContainerDocproc.java

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,18 @@ public class ContainerDocproc extends ContainerSubsystem<DocprocChains> implemen
2929

3030
public final Options options;
3131
private final Map<Pair<String, String>, String> fieldNameSchemaMap = new HashMap<>();
32+
private final boolean useSimpleAnnotations;
3233

3334
public ContainerDocproc(ContainerCluster<?> cluster, DocprocChains chains) {
34-
this(cluster, chains, Options.empty());
35+
this(cluster, chains, Options.empty(), null);
3536
}
3637

37-
private ContainerDocproc(ContainerCluster<?> cluster, DocprocChains chains, Options options) {
38-
this(cluster, chains, options, true);
38+
public ContainerDocproc(ContainerCluster<?> cluster, DocprocChains chains, DeployState deployState) {
39+
this(cluster, chains, Options.empty(), deployState, true);
40+
}
41+
42+
private ContainerDocproc(ContainerCluster<?> cluster, DocprocChains chains, Options options, DeployState deployState) {
43+
this(cluster, chains, options, deployState, true);
3944
}
4045

4146
private void addSource(ContainerCluster<?> cluster, String name, SessionConfig.Type.Enum type) {
@@ -44,10 +49,11 @@ private void addSource(ContainerCluster<?> cluster, String name, SessionConfig.T
4449
cluster.addComponent(mbusClient);
4550
}
4651

47-
public ContainerDocproc(ContainerCluster<?> cluster, DocprocChains chains, Options options, boolean addSourceClientProvider) {
52+
public ContainerDocproc(ContainerCluster<?> cluster, DocprocChains chains, Options options, DeployState deployState, boolean addSourceClientProvider) {
4853
super(chains);
4954
assert (options != null) : "Null Options for " + this + " under cluster " + cluster.getName();
5055
this.options = options;
56+
this.useSimpleAnnotations = deployState != null && deployState.featureFlags().useSimpleAnnotations();
5157

5258
if (addSourceClientProvider) {
5359
addSource(cluster, "source", SessionConfig.Type.SOURCE);
@@ -80,6 +86,7 @@ public void getConfig(DocprocConfig.Builder builder) {
8086
if (getMaxQueueTimeMs() != null) {
8187
builder.maxqueuetimems(getMaxQueueTimeMs());
8288
}
89+
builder.simpleAnnotations(useSimpleAnnotations);
8390
}
8491

8592
@Override

config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1360,7 +1360,7 @@ private ContainerDocproc buildDocproc(DeployState deployState, ApplicationContai
13601360
DocprocChains chains = new DomDocprocChainsBuilder(docprocHandlerThreadpool).build(deployState, cluster, docprocElement);
13611361

13621362
ContainerDocproc.Options docprocOptions = DocprocOptionsBuilder.build(docprocElement, deployState.getDeployLogger());
1363-
return new ContainerDocproc(cluster, chains, docprocOptions, !standaloneBuilder);
1363+
return new ContainerDocproc(cluster, chains, docprocOptions, deployState, !standaloneBuilder);
13641364
}
13651365

13661366
private void addIncludes(Element parentElement) {

config-model/src/main/java/com/yahoo/vespa/model/content/Content.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ private void addDocproc(ContainerCluster<?> cluster, DeployState deployState) {
354354
if (cluster.getDocproc() == null) {
355355
DocprocChains chains = new DocprocChains(cluster, "docprocchains",
356356
new ContainerDocproc.Threadpool(deployState, null));
357-
ContainerDocproc containerDocproc = new ContainerDocproc(cluster, chains);
357+
ContainerDocproc containerDocproc = new ContainerDocproc(cluster, chains, deployState);
358358
cluster.setDocproc(containerDocproc);
359359
}
360360
}

configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ public static class FeatureFlags implements ModelContext.FeatureFlags {
200200
private final double logserverNodeMemory;
201201
private final double clusterControllerNodeMemory;
202202
private final boolean useLegacyWandQueryParsing;
203+
private final boolean useSimpleAnnotations;
203204
private final boolean sendProtobufQuerytree;
204205
private final boolean forwardAllLogLevels;
205206
private final long zookeeperPreAllocSize;
@@ -247,6 +248,7 @@ public FeatureFlags(FlagSource source, ApplicationId appId, Version version) {
247248
this.logserverNodeMemory = PermanentFlags.LOGSERVER_NODE_MEMORY.bindTo(source).with(appId).with(version).value();
248249
this.clusterControllerNodeMemory = PermanentFlags.CLUSTER_CONTROLLER_NODE_MEMORY.bindTo(source).with(appId).with(version).value();
249250
this.useLegacyWandQueryParsing = Flags.USE_LEGACY_WAND_QUERY_PARSING.bindTo(source).with(appId).with(version).value();
251+
this.useSimpleAnnotations = Flags.USE_SIMPLE_ANNOTATIONS.bindTo(source).with(appId).with(version).value();
250252
this.sendProtobufQuerytree = Flags.SEND_PROTOBUF_QUERYTREE.bindTo(source).with(appId).with(version).value();
251253
this.forwardAllLogLevels = PermanentFlags.FORWARD_ALL_LOG_LEVELS.bindTo(source).with(appId).with(version).value();
252254
this.zookeeperPreAllocSize = Flags.ZOOKEEPER_PRE_ALLOC_SIZE_KIB.bindTo(source).value();
@@ -295,6 +297,7 @@ public FeatureFlags(FlagSource source, ApplicationId appId, Version version) {
295297
@Override public double logserverNodeMemory() { return logserverNodeMemory; }
296298
@Override public double clusterControllerNodeMemory() { return clusterControllerNodeMemory; }
297299
@Override public boolean useLegacyWandQueryParsing() { return useLegacyWandQueryParsing; }
300+
@Override public boolean useSimpleAnnotations() { return useSimpleAnnotations; }
298301
@Override public boolean sendProtobufQuerytree() { return sendProtobufQuerytree; }
299302
@Override public boolean forwardAllLogLevels() { return forwardAllLogLevels; }
300303
@Override public long zookeeperPreAllocSize() { return zookeeperPreAllocSize; }

docproc/src/main/java/com/yahoo/docproc/jdisc/DocumentProcessingHandler.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,11 @@ public DocumentProcessingHandler(ComponentRegistry<DocumentProcessor> documentPr
109109
.setMetric(metric)
110110
.setContainerDocumentConfig(containerDocConfig),
111111
threadPool);
112+
113+
// Set simple annotations flag based on config
114+
com.yahoo.document.annotation.internal.SimpleIndexingAnnotations.setEnabled(
115+
docprocConfig.simpleAnnotations());
116+
112117
docprocServiceRegistry.freeze();
113118
}
114119

docproc/src/main/resources/configdefinitions/config.docproc.docproc.def

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,7 @@ maxqueuetimems int default=-1
88
# The number of threads in the DocprocHandler worker thread pool
99
# numthreads no longer has an effect.
1010
numthreads int default=-1
11+
12+
# Enable lightweight annotation representation for StringFieldValue.
13+
# When enabled, uses SimpleIndexingAnnotations (flat arrays) instead of full SpanTree objects.
14+
simpleAnnotations bool default=false

document/abi-spec.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2173,8 +2173,12 @@
21732173
"public java.util.Collection getSpanTrees()",
21742174
"public final java.util.Map getSpanTreeMap()",
21752175
"public com.yahoo.document.annotation.SpanTree getSpanTree(java.lang.String)",
2176+
"public boolean hasSpanTree(java.lang.String)",
21762177
"public com.yahoo.document.annotation.SpanTree setSpanTree(com.yahoo.document.annotation.SpanTree)",
21772178
"public com.yahoo.document.annotation.SpanTree removeSpanTree(java.lang.String)",
2179+
"public com.yahoo.document.annotation.internal.SimpleIndexingAnnotations createSimpleAnnotations()",
2180+
"public com.yahoo.document.annotation.internal.SimpleIndexingAnnotations getSimpleAnnotations()",
2181+
"public void setSimpleAnnotations(com.yahoo.document.annotation.internal.SimpleIndexingAnnotations)",
21782182
"public java.lang.String getString()",
21792183
"public java.lang.Object getWrappedValue()",
21802184
"public void printXml(com.yahoo.document.serialization.XmlStream)",
Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
2+
package com.yahoo.document.annotation.internal;
3+
4+
import com.yahoo.document.annotation.Annotation;
5+
import com.yahoo.document.annotation.AnnotationTypes;
6+
import com.yahoo.document.annotation.Span;
7+
import com.yahoo.document.annotation.SpanTree;
8+
import com.yahoo.document.annotation.SpanTrees;
9+
import com.yahoo.document.datatypes.StringFieldValue;
10+
11+
import java.util.logging.Logger;
12+
13+
/**
14+
* Lightweight representation of TERM annotations for indexing.
15+
* Uses flat arrays instead of object graphs.
16+
* NOT part of public API - internal optimization for indexing performance.
17+
*
18+
* This class can only represent simple TERM annotations with:
19+
* - Position (from, length)
20+
* - Optional term override (when term differs from substring)
21+
*
22+
* @author havardpe
23+
*/
24+
public final class SimpleIndexingAnnotations {
25+
26+
private static final Logger log = Logger.getLogger(SimpleIndexingAnnotations.class.getName());
27+
28+
/**
29+
* Global feature flag to enable/disable simple annotations representation.
30+
* When enabled, uses SimpleIndexingAnnotations (flat arrays) instead of full SpanTree objects.
31+
*/
32+
private static volatile boolean enabled = false;
33+
34+
/**
35+
* Returns whether simple annotations are enabled.
36+
* @return true if simple annotations should be used, false otherwise
37+
*/
38+
public static boolean isEnabled() {
39+
return enabled;
40+
}
41+
42+
/**
43+
* Sets whether simple annotations should be enabled.
44+
* @param value true to enable simple annotations, false to disable
45+
*/
46+
public static void setEnabled(boolean value) {
47+
log.info("SimpleIndexingAnnotations enabled? " + enabled + " -> " + value);
48+
enabled = value;
49+
}
50+
51+
// Flat arrays for maximum memory density
52+
private int[] positions; // [from1, len1, from2, len2, ...]
53+
private String[] terms; // [term1, term2, ...] - null when term equals substring
54+
private int count;
55+
56+
public SimpleIndexingAnnotations() {
57+
this.positions = new int[32]; // Start with capacity for 16 annotations
58+
this.terms = new String[16];
59+
this.count = 0;
60+
}
61+
62+
/**
63+
* Add a TERM annotation.
64+
*
65+
* @param from the start position in the text (character offset)
66+
* @param length the length of the span (in characters)
67+
* @param term the term to index, or null if term equals the substring of original text
68+
*/
69+
public void add(int from, int length, String term) {
70+
ensureCapacity();
71+
positions[count * 2] = from;
72+
positions[count * 2 + 1] = length;
73+
terms[count] = term;
74+
count++;
75+
}
76+
77+
private void ensureCapacity() {
78+
if (count * 2 >= positions.length) {
79+
// Grow by 2x
80+
int[] newPos = new int[positions.length * 2];
81+
String[] newTerms = new String[terms.length * 2];
82+
System.arraycopy(positions, 0, newPos, 0, count * 2);
83+
System.arraycopy(terms, 0, newTerms, 0, count);
84+
positions = newPos;
85+
terms = newTerms;
86+
}
87+
}
88+
89+
public int getCount() {
90+
return count;
91+
}
92+
93+
public int getFrom(int idx) {
94+
return positions[idx * 2];
95+
}
96+
97+
public int getLength(int idx) {
98+
return positions[idx * 2 + 1];
99+
}
100+
101+
/**
102+
* Get the term override for annotation at index, or null if term equals substring.
103+
*/
104+
public String getTerm(int idx) {
105+
return terms[idx];
106+
}
107+
108+
/**
109+
* Convert to full SpanTree representation for API compatibility.
110+
* This is only called when code actually needs to iterate over annotations,
111+
* which is rare (mainly deprecated FlattenExpression and tests).
112+
* Serialization uses direct path and never calls this.
113+
*/
114+
public SpanTree toSpanTree(String name) {
115+
SpanTree tree = new SpanTree(name);
116+
Span currentSpan = null;
117+
int currentFrom = -1;
118+
int currentLength = -1;
119+
120+
for (int i = 0; i < count; i++) {
121+
int from = getFrom(i);
122+
int length = getLength(i);
123+
124+
// Check if this annotation is for the same span as the previous one
125+
if (from != currentFrom || length != currentLength) {
126+
// Different span, create a new one
127+
currentSpan = tree.spanList().span(from, length);
128+
currentFrom = from;
129+
currentLength = length;
130+
}
131+
// else: same span, reuse currentSpan
132+
133+
String term = getTerm(i);
134+
if (term != null) {
135+
tree.annotate(currentSpan, new Annotation(AnnotationTypes.TERM,
136+
new StringFieldValue(term)));
137+
} else {
138+
tree.annotate(currentSpan, new Annotation(AnnotationTypes.TERM));
139+
}
140+
}
141+
return tree;
142+
}
143+
144+
@Override
145+
public String toString() {
146+
return "SimpleIndexingAnnotations with " + count + " TERM annotations";
147+
}
148+
149+
@Override
150+
public boolean equals(Object o) {
151+
if (this == o) return true;
152+
if (!(o instanceof SimpleIndexingAnnotations that)) return false;
153+
154+
if (count != that.count) return false;
155+
156+
// Compare the relevant portions of the arrays
157+
for (int i = 0; i < count; i++) {
158+
if (positions[i * 2] != that.positions[i * 2] ||
159+
positions[i * 2 + 1] != that.positions[i * 2 + 1]) {
160+
return false;
161+
}
162+
if (!java.util.Objects.equals(terms[i], that.terms[i])) {
163+
return false;
164+
}
165+
}
166+
return true;
167+
}
168+
169+
@Override
170+
public int hashCode() {
171+
int result = count;
172+
for (int i = 0; i < count; i++) {
173+
result = 31 * result + positions[i * 2];
174+
result = 31 * result + positions[i * 2 + 1];
175+
result = 31 * result + java.util.Objects.hashCode(terms[i]);
176+
}
177+
return result;
178+
}
179+
}

0 commit comments

Comments
 (0)