Skip to content

Commit bc212a5

Browse files
committed
Feat: SELECT APPROXIMATE ~ WITHIN n PERCENT ERROR
The best synopsis is choosed according to the analyzed column statistics Issue: #50
1 parent 4940bf2 commit bc212a5

File tree

7 files changed

+70
-28
lines changed

7 files changed

+70
-28
lines changed

traindb-core/src/main/codegen/templates/Parser.jj

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2768,18 +2768,31 @@ SqlNode WindowRange() :
27682768
*/
27692769
void ApproxConstraintOpt(List<SqlNode> hints) :
27702770
{
2771-
SqlNumericLiteral timeConstraint;
2771+
SqlNumericLiteral approxConstraint;
27722772
List<SqlNode> list = new ArrayList<SqlNode>();
27732773
SqlParserPos pos;
27742774
}
27752775
{
2776-
[ <WITHIN> ] timeConstraint = UnsignedNumericLiteral() [ <SECONDS> ] {
2777-
list.add(timeConstraint);
2778-
pos = getPos();
2779-
hints.add(new SqlHint(pos, new SqlIdentifier("approx_time", pos),
2780-
SqlNodeList.of(pos, list), SqlHint.HintOptionFormat.LITERAL_LIST));
2781-
return;
2782-
}
2776+
[ <WITHIN> ] approxConstraint = UnsignedNumericLiteral()
2777+
[
2778+
(
2779+
<SECONDS> {
2780+
list.add(approxConstraint);
2781+
pos = getPos();
2782+
hints.add(new SqlHint(pos, new SqlIdentifier("approx_time", pos),
2783+
SqlNodeList.of(pos, list), SqlHint.HintOptionFormat.LITERAL_LIST));
2784+
return;
2785+
}
2786+
|
2787+
<PERCENT> <ERROR> {
2788+
list.add(approxConstraint);
2789+
pos = getPos();
2790+
hints.add(new SqlHint(pos, new SqlIdentifier("approx_error", pos),
2791+
SqlNodeList.of(pos, list), SqlHint.HintOptionFormat.LITERAL_LIST));
2792+
return;
2793+
}
2794+
)
2795+
]
27832796
|
27842797
{
27852798
return;

traindb-core/src/main/java/traindb/planner/TrainDBPlanner.java

Lines changed: 40 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@
4949
import org.apache.calcite.tools.RelBuilder;
5050
import org.apache.calcite.util.Holder;
5151
import org.checkerframework.checker.nullness.qual.Nullable;
52+
import org.json.simple.JSONArray;
53+
import org.json.simple.JSONObject;
54+
import org.json.simple.parser.JSONParser;
5255
import traindb.adapter.jdbc.JdbcConvention;
5356
import traindb.adapter.jdbc.JdbcTableScan;
5457
import traindb.adapter.jdbc.TrainDBJdbcTable;
@@ -177,7 +180,7 @@ public RelOptTable getSynopsisTable(MSynopsis synopsis, RelOptTable baseTable) {
177180
}
178181

179182
public MSynopsis getBestSynopsis(Collection<MSynopsis> synopses, TableScan scan,
180-
List<RelHint> hints) {
183+
List<RelHint> hints, List<String> requiredColumnNames) {
181184
Collection<MSynopsis> hintSynopses = new ArrayList<>();
182185
for (RelHint hint : hints) {
183186
if (hint.hintName.equals("synopsis")) {
@@ -196,21 +199,45 @@ public MSynopsis getBestSynopsis(Collection<MSynopsis> synopses, TableScan scan,
196199

197200
Collection<MSynopsis> filteredSynopses = new HashSet<>();
198201
for (RelHint hint : hints) {
199-
if (!hint.hintName.equals("approx_time")) {
200-
continue;
201-
}
202-
List<String> hintExecTime = hint.listOptions;
203-
for (String str : hintExecTime) {
204-
try {
205-
double hintTime = Double.valueOf(str);
206-
for (MSynopsis syn : synopses) {
207-
if (caqpExecutionTimePolicy.check(syn, hintTime)) {
208-
filteredSynopses.add(syn);
202+
try {
203+
if (hint.hintName.equals("approx_time")) {
204+
List<String> hintExecTime = hint.listOptions;
205+
for (String str : hintExecTime) {
206+
double hintTime = Double.valueOf(str);
207+
for (MSynopsis syn : synopses) {
208+
if (caqpExecutionTimePolicy.check(syn, hintTime)) {
209+
filteredSynopses.add(syn);
210+
}
211+
}
212+
}
213+
} else if (hint.hintName.equals("approx_error")) {
214+
List<String> hintErrors = hint.listOptions;
215+
for (String s : hintErrors) {
216+
double hintError = Double.valueOf(s) /* percent */ * 0.01;
217+
for (MSynopsis syn : synopses) {
218+
String synopsisStatistics = syn.getSynopsisStatistics();
219+
if (synopsisStatistics == null || synopsisStatistics.isEmpty()) {
220+
continue;
221+
}
222+
JSONParser parser = new JSONParser();
223+
JSONArray jsonColumnStats = (JSONArray) parser.parse(synopsisStatistics);
224+
double score = 0;
225+
for (int i = 0; i < jsonColumnStats.size(); i++) {
226+
JSONObject colstat = (JSONObject) jsonColumnStats.get(i);
227+
String columnName = (String) colstat.get("Column");
228+
if (requiredColumnNames.contains(columnName)) {
229+
score += (Double) colstat.get("Quality Score");
230+
}
231+
}
232+
double errorEstimate = 1.0 - (score / requiredColumnNames.size());
233+
if (errorEstimate > hintError) {
234+
filteredSynopses.add(syn);
235+
}
209236
}
210237
}
211-
} catch (Exception e) {
212-
// ignore
213238
}
239+
} catch (Exception e) {
240+
// ignore
214241
}
215242
}
216243
if (filteredSynopses.size() < synopses.size()) {

traindb-core/src/main/java/traindb/planner/rules/ApproxAggregateSynopsisAggregateScanRule.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,8 @@ public void onMatch(RelOptRuleCall call) {
7676
return;
7777
}
7878

79-
MSynopsis bestSynopsis = planner.getBestSynopsis(candidateSynopses, scan, aggregate.getHints());
79+
MSynopsis bestSynopsis = planner.getBestSynopsis(
80+
candidateSynopses, scan, aggregate.getHints(), requiredColumnNames);
8081

8182
List<Integer> targets = new ArrayList<>();
8283
for (int i = 0; i < inputColumns.size(); i++) {

traindb-core/src/main/java/traindb/planner/rules/ApproxAggregateSynopsisFilterScanRule.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,8 @@ public void onMatch(RelOptRuleCall call) {
101101
return;
102102
}
103103

104-
MSynopsis bestSynopsis =
105-
planner.getBestSynopsis(candidateSynopses, scan, aggregate.getHints());
104+
MSynopsis bestSynopsis = planner.getBestSynopsis(
105+
candidateSynopses, scan, aggregate.getHints(), requiredColumnNames);
106106

107107
List<Integer> targets = new ArrayList<>();
108108
for (int i = 0; i < inputColumns.size(); i++) {

traindb-core/src/main/java/traindb/planner/rules/ApproxAggregateSynopsisProjectScanRule.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ public void onMatch(RelOptRuleCall call) {
8787
return;
8888
}
8989

90-
MSynopsis bestSynopsis =
91-
planner.getBestSynopsis(candidateSynopses, scan, aggregate.getHints());
90+
MSynopsis bestSynopsis = planner.getBestSynopsis(
91+
candidateSynopses, scan, aggregate.getHints(), requiredColumnNames);
9292

9393
final List<String> synopsisColumns = bestSynopsis.getColumnNames();
9494

traindb-core/src/main/java/traindb/planner/rules/ApproxAggregateSynopsisRule.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,8 @@ public void onMatch(RelOptRuleCall call) {
165165
continue;
166166
}
167167

168-
MSynopsis bestSynopsis =
169-
planner.getBestSynopsis(candidateSynopses, scan, aggregate.getHints());
168+
MSynopsis bestSynopsis = planner.getBestSynopsis(
169+
candidateSynopses, scan, aggregate.getHints(), requiredColumnNames);
170170
RelOptTableImpl synopsisTable =
171171
(RelOptTableImpl) planner.getSynopsisTable(bestSynopsis, scan.getTable());
172172
TableScan newScan = new JdbcTableScan(scan.getCluster(), scan.getHints(), synopsisTable,

traindb-core/src/main/java/traindb/sql/calcite/TrainDBHintStrategyTable.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ static HintStrategyTable createHintStrategies(HintStrategyTable.Builder builder)
2929
.hintStrategy("approximate", HintPredicates.AGGREGATE)
3030
.hintStrategy("synopsis", HintPredicates.AGGREGATE)
3131
.hintStrategy("approx_time", HintPredicates.AGGREGATE)
32+
.hintStrategy("approx_error", HintPredicates.AGGREGATE)
3233
.build();
3334
}
3435
}

0 commit comments

Comments
 (0)