Skip to content

Commit d984a14

Browse files
authored
Skip SortingDigest when merging a large digest in HybridDigest (#97099)
* Skip SortingDigest when merging a large digest in HybridDigest. This is a small performance optimization that avoids creating an intermediate SortingDigest when merging a digest tracking many samples. The current behavior is to keep adding values to SortingDigest until we cross the threshold for switching to MergingDigest, at which point we copy all values from SortingDigest to MergingDigest and release the former. As a side cleanup, remove the methods for adding a list of digests. It's not used anywhere and it can be tricky to get right - the current implementation for HybridDigest is buggy. * Update docs/changelog/97099.yaml
1 parent 9744a1e commit d984a14

File tree

10 files changed

+16
-136
lines changed

10 files changed

+16
-136
lines changed

docs/changelog/97099.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 97099
2+
summary: Skip `SortingDigest` when merging a large digest in `HybridDigest`
3+
area: Aggregations
4+
type: enhancement
5+
issues: []

libs/tdigest/src/main/java/org/elasticsearch/tdigest/AVLTreeDigest.java

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import java.util.Collection;
2525
import java.util.Collections;
2626
import java.util.Iterator;
27-
import java.util.List;
2827
import java.util.Random;
2928

3029
import static org.elasticsearch.tdigest.IntAVLTree.NIL;
@@ -68,16 +67,6 @@ public int centroidCount() {
6867
return summary.size();
6968
}
7069

71-
@Override
72-
public void add(List<? extends TDigest> others) {
73-
for (TDigest other : others) {
74-
setMinMax(Math.min(min, other.getMin()), Math.max(max, other.getMax()));
75-
for (Centroid centroid : other.centroids()) {
76-
add(centroid.mean(), centroid.count());
77-
}
78-
}
79-
}
80-
8170
@Override
8271
public void add(double x, int w) {
8372
checkValue(x);

libs/tdigest/src/main/java/org/elasticsearch/tdigest/HybridDigest.java

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
package org.elasticsearch.tdigest;
2121

2222
import java.util.Collection;
23-
import java.util.List;
2423

2524
/**
2625
* Uses a {@link SortingDigest} implementation under the covers for small sample populations, then switches to {@link MergingDigest}.
@@ -80,6 +79,16 @@ public void add(double x, int w) {
8079
}
8180
}
8281

82+
@Override
83+
public void add(TDigest other) {
84+
reserve(other.size());
85+
if (mergingDigest != null) {
86+
mergingDigest.add(other);
87+
} else {
88+
sortingDigest.add(other);
89+
}
90+
}
91+
8392
@Override
8493
public void reserve(long size) {
8594
if (mergingDigest != null) {
@@ -101,15 +110,6 @@ public void reserve(long size) {
101110
}
102111
}
103112

104-
@Override
105-
public void add(List<? extends TDigest> others) {
106-
if (mergingDigest != null) {
107-
mergingDigest.add(others);
108-
} else {
109-
sortingDigest.add(others);
110-
}
111-
}
112-
113113
@Override
114114
public void compress() {
115115
if (mergingDigest != null) {

libs/tdigest/src/main/java/org/elasticsearch/tdigest/MergingDigest.java

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import java.util.AbstractCollection;
2525
import java.util.Collection;
2626
import java.util.Iterator;
27-
import java.util.List;
2827

2928
/**
3029
* Maintains a t-digest by collecting new points in a buffer that is then sorted occasionally and merged
@@ -234,56 +233,6 @@ public void add(double x, int w) {
234233
}
235234
}
236235

237-
private void add(double[] m, double[] w, int count) {
238-
if (m.length != w.length) {
239-
throw new IllegalArgumentException("Arrays not same length");
240-
}
241-
if (m.length < count + lastUsedCell) {
242-
// make room to add existing centroids
243-
double[] m1 = new double[count + lastUsedCell];
244-
System.arraycopy(m, 0, m1, 0, count);
245-
m = m1;
246-
double[] w1 = new double[count + lastUsedCell];
247-
System.arraycopy(w, 0, w1, 0, count);
248-
w = w1;
249-
}
250-
double total = 0;
251-
for (int i = 0; i < count; i++) {
252-
total += w[i];
253-
}
254-
merge(m, w, count, null, total, false, compression);
255-
}
256-
257-
@Override
258-
public void add(List<? extends TDigest> others) {
259-
if (others.size() == 0) {
260-
return;
261-
}
262-
int size = 0;
263-
for (TDigest other : others) {
264-
other.compress();
265-
size += other.centroidCount();
266-
}
267-
268-
double[] m = new double[size];
269-
double[] w = new double[size];
270-
int offset = 0;
271-
for (TDigest other : others) {
272-
if (other instanceof MergingDigest md) {
273-
System.arraycopy(md.mean, 0, m, offset, md.lastUsedCell);
274-
System.arraycopy(md.weight, 0, w, offset, md.lastUsedCell);
275-
offset += md.lastUsedCell;
276-
} else {
277-
for (Centroid centroid : other.centroids()) {
278-
m[offset] = centroid.mean();
279-
w[offset] = centroid.count();
280-
offset++;
281-
}
282-
}
283-
}
284-
add(m, w, size);
285-
}
286-
287236
private void mergeNewValues() {
288237
mergeNewValues(compression);
289238
}

libs/tdigest/src/main/java/org/elasticsearch/tdigest/SortingDigest.java

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import java.util.Collection;
2525
import java.util.Collections;
2626
import java.util.Iterator;
27-
import java.util.List;
2827

2928
/**
3029
* Simple implementation of the TDigest interface that stores internally and sorts all samples to calculate quantiles and CDFs.
@@ -50,21 +49,6 @@ public void add(double x, int w) {
5049
min = Math.min(min, x);
5150
}
5251

53-
@Override
54-
public void add(List<? extends TDigest> others) {
55-
long valuesToAddCount = 0;
56-
for (TDigest other : others) {
57-
valuesToAddCount += other.size();
58-
}
59-
reserve(valuesToAddCount);
60-
61-
for (TDigest other : others) {
62-
for (Centroid centroid : other.centroids()) {
63-
add(centroid.mean(), centroid.count());
64-
}
65-
}
66-
}
67-
6852
@Override
6953
public void compress() {
7054
if (isSorted == false) {

libs/tdigest/src/main/java/org/elasticsearch/tdigest/TDigest.java

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
package org.elasticsearch.tdigest;
2323

2424
import java.util.Collection;
25-
import java.util.List;
2625
import java.util.Locale;
2726

2827
/**
@@ -112,8 +111,6 @@ final void checkValue(double x) {
112111
}
113112
}
114113

115-
public abstract void add(List<? extends TDigest> others);
116-
117114
/**
118115
* Re-examines a t-digest to determine whether some centroids are redundant. If your data are
119116
* perversely ordered, this may be a good idea. Even if not, this may save 20% or so in space.
@@ -202,12 +199,4 @@ public double getMin() {
202199
public double getMax() {
203200
return max;
204201
}
205-
206-
/**
207-
* Override the min and max values for testing purposes
208-
*/
209-
void setMinMax(double min, double max) {
210-
this.min = min;
211-
this.max = max;
212-
}
213202
}

libs/tdigest/src/test/java/org/elasticsearch/tdigest/MergingDigestTests.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,7 @@ public void testNanDueToBadInitialization() {
6262

6363
// Merge all mds one at a time into md.
6464
for (int i = 0; i < M; ++i) {
65-
List<MergingDigest> singleton = new ArrayList<>();
66-
singleton.add(mds.get(i));
67-
md.add(singleton);
65+
md.add(mds.get(i));
6866
}
6967
Assert.assertFalse(Double.isNaN(md.quantile(0.01)));
7068

server/src/main/java/org/elasticsearch/search/aggregations/metrics/EmptyTDigestState.java

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,6 @@
88

99
package org.elasticsearch.search.aggregations.metrics;
1010

11-
import org.elasticsearch.tdigest.TDigest;
12-
13-
import java.util.List;
14-
1511
public final class EmptyTDigestState extends TDigestState {
1612
public EmptyTDigestState() {
1713
// Use the sorting implementation to minimize memory allocation.
@@ -28,16 +24,6 @@ public void add(double x) {
2824
throw new UnsupportedOperationException("Immutable Empty TDigest");
2925
}
3026

31-
@Override
32-
public void add(List<? extends TDigestState> others) {
33-
throw new UnsupportedOperationException("Immutable Empty TDigest");
34-
}
35-
36-
@Override
37-
public void add(TDigest other) {
38-
throw new UnsupportedOperationException("Immutable Empty TDigest");
39-
}
40-
4127
@Override
4228
public void add(TDigestState other) {
4329
throw new UnsupportedOperationException("Immutable Empty TDigest");

server/src/main/java/org/elasticsearch/search/aggregations/metrics/TDigestState.java

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,8 @@
1414
import org.elasticsearch.tdigest.TDigest;
1515

1616
import java.io.IOException;
17-
import java.util.ArrayList;
1817
import java.util.Collection;
1918
import java.util.Iterator;
20-
import java.util.List;
2119

2220
/**
2321
* Decorates {@link org.elasticsearch.tdigest.TDigest} with custom serialization. The underlying implementation for TDigest is selected
@@ -215,18 +213,6 @@ public void add(double x) {
215213
tdigest.add(x, 1);
216214
}
217215

218-
public void add(List<? extends TDigestState> others) {
219-
List<TDigest> otherTdigests = new ArrayList<>();
220-
for (TDigestState other : others) {
221-
otherTdigests.add(other.tdigest);
222-
}
223-
tdigest.add(otherTdigests);
224-
}
225-
226-
public void add(TDigest other) {
227-
tdigest.add(other);
228-
}
229-
230216
public final void compress() {
231217
tdigest.compress();
232218
}

server/src/test/java/org/elasticsearch/search/aggregations/metrics/EmptyTDigestStateTests.java

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010

1111
import org.elasticsearch.test.ESTestCase;
1212

13-
import java.util.List;
14-
1513
public class EmptyTDigestStateTests extends ESTestCase {
1614

1715
private static final TDigestState singleton = new EmptyTDigestState();
@@ -31,8 +29,4 @@ public void testTestAddWithWeight() {
3129
public void testTestAddList() {
3230
expectThrows(UnsupportedOperationException.class, () -> singleton.add(randomDouble(), randomInt(10)));
3331
}
34-
35-
public void testTestAddListTDigest() {
36-
expectThrows(UnsupportedOperationException.class, () -> singleton.add(List.of(new EmptyTDigestState(), new EmptyTDigestState())));
37-
}
3832
}

0 commit comments

Comments
 (0)