Skip to content

Commit 7423ce0

Browse files
committed
Aggregations: Added percentile rank aggregation
Percentile Rank Aggregation is the reverse of the Percetiles aggregation. It determines the percentile rank (the proportion of values less than a given value) of the provided array of values. Closes elastic#6386
1 parent fe89ea1 commit 7423ce0

23 files changed

+1293
-284
lines changed

docs/reference/search/aggregations/metrics.asciidoc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ include::metrics/valuecount-aggregation.asciidoc[]
1616

1717
include::metrics/percentile-aggregation.asciidoc[]
1818

19+
include::metrics/percentile-rank-aggregation.asciidoc[]
20+
1921
include::metrics/cardinality-aggregation.asciidoc[]
2022

2123
include::metrics/geobounds-aggregation.asciidoc[]

docs/reference/search/aggregations/metrics/percentile-aggregation.asciidoc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ a script to convert them on-the-fly:
125125
script to generate values which percentiles are calculated on
126126
<2> Scripting supports parameterized input just like any other script
127127

128+
[[search-aggregations-metrics-percentile-aggregation-approximation]]
128129
==== Percentiles are (usually) approximate
129130

130131
There are many different algorithms to calculate percentiles. The naive
@@ -161,6 +162,7 @@ for large number of values is that the law of large numbers makes the distributi
161162
values more and more uniform and the t-digest tree can do a better job at summarizing
162163
it. It would not be the case on more skewed distributions.
163164

165+
[[search-aggregations-metrics-percentile-aggregation-compression]]
164166
==== Compression
165167

166168
Approximate algorithms must balance memory utilization with estimation accuracy.
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
[[search-aggregations-metrics-percentile-rank-aggregation]]
2+
=== Percentile Ranks Aggregation
3+
4+
coming[1.3.0]
5+
6+
A `multi-value` metrics aggregation that calculates one or more percentile ranks
7+
over numeric values extracted from the aggregated documents. These values
8+
can be extracted either from specific numeric fields in the documents, or
9+
be generated by a provided script.
10+
11+
.Experimental!
12+
[IMPORTANT]
13+
=====
14+
This feature is marked as experimental, and may be subject to change in the
15+
future. If you use this feature, please let us know your experience with it!
16+
=====
17+
18+
[NOTE]
19+
==================================================
20+
Please see <<search-aggregations-metrics-percentile-aggregation-approximation>>
21+
and <<search-aggregations-metrics-percentile-aggregation-compression>> for advice
22+
regarding approximation and memory use of the percentile ranks aggregation
23+
==================================================
24+
25+
Percentile rank show the percentage of observed values which are below certain
26+
value. For example, if a value is greater than or equal to 95% of the observed values
27+
it is said to be at the 95th percentile rank.
28+
29+
Assume your data consists of website load times. You may have a service agreement that
30+
95% of page loads completely within 15ms and 99% of page loads complete within 30ms.
31+
32+
Let's look at a range of percentiles representing load time:
33+
34+
[source,js]
35+
--------------------------------------------------
36+
{
37+
"aggs" : {
38+
"load_time_outlier" : {
39+
"percentile_ranks" : {
40+
"field" : "load_time" <1>
41+
"values" : [15, 30]
42+
}
43+
}
44+
}
45+
}
46+
--------------------------------------------------
47+
<1> The field `load_time` must be a numeric field
48+
49+
The response will look like this:
50+
51+
[source,js]
52+
--------------------------------------------------
53+
{
54+
...
55+
56+
"aggregations": {
57+
"load_time_outlier": {
58+
"values" : {
59+
"15": 92,
60+
"30": 100
61+
}
62+
}
63+
}
64+
}
65+
--------------------------------------------------
66+
67+
From this information you can determine you are hitting the 99% load time target but not quite
68+
hitting the 95% load time target
69+
70+
71+
==== Script
72+
73+
The percentile rank metric supports scripting. For example, if our load times
74+
are in milliseconds but we want to specify values in seconds, we could use
75+
a script to convert them on-the-fly:
76+
77+
[source,js]
78+
--------------------------------------------------
79+
{
80+
"aggs" : {
81+
"load_time_outlier" : {
82+
"percentile_ranks" : {
83+
"values" : [3, 5],
84+
"script" : "doc['load_time'].value / timeUnit", <1>
85+
"params" : {
86+
"timeUnit" : 1000 <2>
87+
}
88+
}
89+
}
90+
}
91+
}
92+
--------------------------------------------------
93+
<1> The `field` parameter is replaced with a `script` parameter, which uses the
94+
script to generate values which percentile ranks are calculated on
95+
<2> Scripting supports parameterized input just like any other script

src/main/java/org/elasticsearch/search/aggregations/AggregationBuilders.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
import org.elasticsearch.search.aggregations.metrics.max.MaxBuilder;
4040
import org.elasticsearch.search.aggregations.metrics.min.MinBuilder;
4141
import org.elasticsearch.search.aggregations.metrics.percentiles.PercentilesBuilder;
42+
import org.elasticsearch.search.aggregations.metrics.percentiles.PercentileRanksBuilder;
4243
import org.elasticsearch.search.aggregations.metrics.stats.StatsBuilder;
4344
import org.elasticsearch.search.aggregations.metrics.stats.extended.ExtendedStatsBuilder;
4445
import org.elasticsearch.search.aggregations.metrics.sum.SumBuilder;
@@ -140,6 +141,10 @@ public static PercentilesBuilder percentiles(String name) {
140141
return new PercentilesBuilder(name);
141142
}
142143

144+
public static PercentileRanksBuilder percentileRanks(String name) {
145+
return new PercentileRanksBuilder(name);
146+
}
147+
143148
public static CardinalityBuilder cardinality(String name) {
144149
return new CardinalityBuilder(name);
145150
}

src/main/java/org/elasticsearch/search/aggregations/AggregationModule.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import org.elasticsearch.search.aggregations.metrics.max.MaxParser;
4343
import org.elasticsearch.search.aggregations.metrics.min.MinParser;
4444
import org.elasticsearch.search.aggregations.metrics.percentiles.PercentilesParser;
45+
import org.elasticsearch.search.aggregations.metrics.percentiles.PercentileRanksParser;
4546
import org.elasticsearch.search.aggregations.metrics.stats.StatsParser;
4647
import org.elasticsearch.search.aggregations.metrics.stats.extended.ExtendedStatsParser;
4748
import org.elasticsearch.search.aggregations.metrics.sum.SumParser;
@@ -65,6 +66,7 @@ public AggregationModule() {
6566
parsers.add(ExtendedStatsParser.class);
6667
parsers.add(ValueCountParser.class);
6768
parsers.add(PercentilesParser.class);
69+
parsers.add(PercentileRanksParser.class);
6870
parsers.add(CardinalityParser.class);
6971

7072
parsers.add(GlobalParser.class);

src/main/java/org/elasticsearch/search/aggregations/TransportAggregationModule.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
import org.elasticsearch.search.aggregations.metrics.max.InternalMax;
4646
import org.elasticsearch.search.aggregations.metrics.min.InternalMin;
4747
import org.elasticsearch.search.aggregations.metrics.percentiles.InternalPercentiles;
48+
import org.elasticsearch.search.aggregations.metrics.percentiles.InternalPercentileRanks;
4849
import org.elasticsearch.search.aggregations.metrics.stats.InternalStats;
4950
import org.elasticsearch.search.aggregations.metrics.stats.extended.InternalExtendedStats;
5051
import org.elasticsearch.search.aggregations.metrics.sum.InternalSum;
@@ -67,6 +68,7 @@ protected void configure() {
6768
InternalExtendedStats.registerStreams();
6869
InternalValueCount.registerStreams();
6970
InternalPercentiles.registerStreams();
71+
InternalPercentileRanks.registerStreams();
7072
InternalCardinality.registerStreams();
7173

7274
// buckets
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.search.aggregations.metrics.percentiles;
21+
22+
import org.elasticsearch.ElasticsearchIllegalArgumentException;
23+
import org.elasticsearch.Version;
24+
import org.elasticsearch.common.io.stream.StreamInput;
25+
import org.elasticsearch.common.io.stream.StreamOutput;
26+
import org.elasticsearch.common.xcontent.XContentBuilder;
27+
import org.elasticsearch.search.aggregations.InternalAggregation;
28+
import org.elasticsearch.search.aggregations.metrics.InternalNumericMetricsAggregation;
29+
import org.elasticsearch.search.aggregations.metrics.percentiles.tdigest.TDigestState;
30+
import org.elasticsearch.search.aggregations.support.format.ValueFormatterStreams;
31+
32+
import java.io.IOException;
33+
import java.util.List;
34+
35+
abstract class AbstractInternalPercentiles extends InternalNumericMetricsAggregation.MultiValue {
36+
37+
protected double[] keys;
38+
protected TDigestState state;
39+
private boolean keyed;
40+
41+
AbstractInternalPercentiles() {} // for serialization
42+
43+
public AbstractInternalPercentiles(String name, double[] keys, TDigestState state, boolean keyed) {
44+
super(name);
45+
this.keys = keys;
46+
this.state = state;
47+
this.keyed = keyed;
48+
}
49+
50+
@Override
51+
public double value(String name) {
52+
return value(Double.parseDouble(name));
53+
}
54+
55+
public abstract double value(double key);
56+
57+
@Override
58+
public AbstractInternalPercentiles reduce(ReduceContext reduceContext) {
59+
List<InternalAggregation> aggregations = reduceContext.aggregations();
60+
TDigestState merged = null;
61+
for (InternalAggregation aggregation : aggregations) {
62+
final AbstractInternalPercentiles percentiles = (AbstractInternalPercentiles) aggregation;
63+
if (merged == null) {
64+
merged = new TDigestState(percentiles.state.compression());
65+
}
66+
merged.add(percentiles.state);
67+
}
68+
return createReduced(getName(), keys, merged, keyed);
69+
}
70+
71+
protected abstract AbstractInternalPercentiles createReduced(String name, double[] keys, TDigestState merged, boolean keyed);
72+
73+
@Override
74+
public void readFrom(StreamInput in) throws IOException {
75+
name = in.readString();
76+
valueFormatter = ValueFormatterStreams.readOptional(in);
77+
if (in.getVersion().before(Version.V_1_2_0)) {
78+
final byte id = in.readByte();
79+
if (id != 0) {
80+
throw new ElasticsearchIllegalArgumentException("Unexpected percentiles aggregator id [" + id + "]");
81+
}
82+
}
83+
keys = new double[in.readInt()];
84+
for (int i = 0; i < keys.length; ++i) {
85+
keys[i] = in.readDouble();
86+
}
87+
state = TDigestState.read(in);
88+
keyed = in.readBoolean();
89+
}
90+
91+
@Override
92+
public void writeTo(StreamOutput out) throws IOException {
93+
out.writeString(name);
94+
ValueFormatterStreams.writeOptional(valueFormatter, out);
95+
if (out.getVersion().before(Version.V_1_2_0)) {
96+
out.writeByte((byte) 0);
97+
}
98+
out.writeInt(keys.length);
99+
for (int i = 0 ; i < keys.length; ++i) {
100+
out.writeDouble(keys[i]);
101+
}
102+
TDigestState.write(state, out);
103+
out.writeBoolean(keyed);
104+
}
105+
106+
@Override
107+
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
108+
builder.startObject(name);
109+
if (keyed) {
110+
builder.startObject(CommonFields.VALUES);
111+
for(int i = 0; i < keys.length; ++i) {
112+
String key = String.valueOf(keys[i]);
113+
double value = value(keys[i]);
114+
builder.field(key, value);
115+
if (valueFormatter != null) {
116+
builder.field(key + "_as_string", valueFormatter.format(value));
117+
}
118+
}
119+
builder.endObject();
120+
} else {
121+
builder.startArray(CommonFields.VALUES);
122+
for (int i = 0; i < keys.length; i++) {
123+
double value = value(keys[i]);
124+
builder.startObject();
125+
builder.field(CommonFields.KEY, keys[i]);
126+
builder.field(CommonFields.VALUE, value);
127+
if (valueFormatter != null) {
128+
builder.field(CommonFields.VALUE_AS_STRING, valueFormatter.format(value));
129+
}
130+
builder.endObject();
131+
}
132+
builder.endArray();
133+
}
134+
builder.endObject();
135+
return builder;
136+
}
137+
}

0 commit comments

Comments
 (0)