Skip to content
6 changes: 6 additions & 0 deletions docs/changelog/132167.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 132167
summary: Deal with internally created IN in a different way for EQL
area: EQL
type: bug
issues:
- 118621
7 changes: 7 additions & 0 deletions x-pack/plugin/eql/qa/common/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,10 @@ dependencies {
// TOML parser for EqlActionIT tests
api 'io.ous:jtoml:2.0.0'
}

tasks.register("loadTestData", JavaExec) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just for testing purposes, has nothing to do with the actual fix.

group = "Execution"
description = "Loads EQL Spec Tests data on a running stand-alone instance"
classpath = sourceSets.main.runtimeClasspath
mainClass = "org.elasticsearch.test.eql.DataLoader"
}
Original file line number Diff line number Diff line change
Expand Up @@ -76,47 +76,69 @@ private static Map<String, String[]> getReplacementPatterns() {
public static void main(String[] args) throws IOException {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changes in this class are just for testing purposes, has nothing to do with the actual fix.

main = true;
try (RestClient client = RestClient.builder(new HttpHost("localhost", 9200)).build()) {
loadDatasetIntoEs(client, DataLoader::createParser);
loadDatasetIntoEsWithIndexCreator(client, DataLoader::createParser, (restClient, indexName, indexMapping) -> {
// don't use ESRestTestCase methods here or, if you do, test running the main method before making the change
StringBuilder jsonBody = new StringBuilder("{");
jsonBody.append("\"settings\":{\"number_of_shards\":1},");
jsonBody.append("\"mappings\":");
jsonBody.append(indexMapping);
jsonBody.append("}");

Request request = new Request("PUT", "/" + indexName);
request.setJsonEntity(jsonBody.toString());
restClient.performRequest(request);
});
}
}

public static void loadDatasetIntoEs(RestClient client, CheckedBiFunction<XContent, InputStream, XContentParser, IOException> p)
throws IOException {
loadDatasetIntoEsWithIndexCreator(client, p, (restClient, indexName, indexMapping) -> {
ESRestTestCase.createIndex(restClient, indexName, Settings.builder().put("number_of_shards", 1).build(), indexMapping, null);
});
}

private static void loadDatasetIntoEsWithIndexCreator(
RestClient client,
CheckedBiFunction<XContent, InputStream, XContentParser, IOException> p,
IndexCreator indexCreator
) throws IOException {

//
// Main Index
//
load(client, TEST_INDEX, null, DataLoader::timestampToUnixMillis, p);
load(client, TEST_INDEX, null, DataLoader::timestampToUnixMillis, p, indexCreator);
//
// Aux Index
//
load(client, TEST_EXTRA_INDEX, null, null, p);
load(client, TEST_EXTRA_INDEX, null, null, p, indexCreator);
//
// Date_Nanos index
//
// The data for this index is loaded from the same endgame-140.data sample, only having the mapping for @timestamp changed: the
// chosen Windows filetime timestamps (2017+) can coincidentally also be readily used as nano-resolution unix timestamps (1973+).
// There are mixed values with and without nanos precision so that the filtering is properly tested for both cases.
load(client, TEST_NANOS_INDEX, TEST_INDEX, DataLoader::timestampToUnixNanos, p);
load(client, TEST_SAMPLE, null, null, p);
load(client, TEST_NANOS_INDEX, TEST_INDEX, DataLoader::timestampToUnixNanos, p, indexCreator);
load(client, TEST_SAMPLE, null, null, p, indexCreator);
//
// missing_events index
//
load(client, TEST_MISSING_EVENTS_INDEX, null, null, p);
load(client, TEST_SAMPLE_MULTI, null, null, p);
load(client, TEST_MISSING_EVENTS_INDEX, null, null, p, indexCreator);
load(client, TEST_SAMPLE_MULTI, null, null, p, indexCreator);
//
// index with a runtime field ("broken", type long) that causes shard failures.
// the rest of the mapping is the same as TEST_INDEX
//
load(client, TEST_SHARD_FAILURES_INDEX, null, DataLoader::timestampToUnixMillis, p);
load(client, TEST_SHARD_FAILURES_INDEX, null, DataLoader::timestampToUnixMillis, p, indexCreator);
}

private static void load(
RestClient client,
String indexNames,
String dataName,
Consumer<Map<String, Object>> datasetTransform,
CheckedBiFunction<XContent, InputStream, XContentParser, IOException> p
CheckedBiFunction<XContent, InputStream, XContentParser, IOException> p,
IndexCreator indexCreator
) throws IOException {
String[] splitNames = indexNames.split(",");
for (String indexName : splitNames) {
Expand All @@ -130,15 +152,11 @@ private static void load(
if (data == null) {
throw new IllegalArgumentException("Cannot find resource " + name);
}
createTestIndex(client, indexName, readMapping(mapping));
indexCreator.createIndex(client, indexName, readMapping(mapping));
loadData(client, indexName, datasetTransform, data, p);
}
}

private static void createTestIndex(RestClient client, String indexName, String mapping) throws IOException {
ESRestTestCase.createIndex(client, indexName, Settings.builder().put("number_of_shards", 1).build(), mapping, null);
}

/**
* Reads the mapping file, ignoring comments and replacing placeholders for random types.
*/
Expand Down Expand Up @@ -236,4 +254,8 @@ private static XContentParser createParser(XContent xContent, InputStream data)
NamedXContentRegistry contentRegistry = new NamedXContentRegistry(ClusterModule.getNamedXWriteables());
return xContent.createParser(contentRegistry, LoggingDeprecationHandler.INSTANCE, data);
}

private interface IndexCreator {
void createIndex(RestClient client, String indexName, String mapping) throws IOException;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.BinaryComparisonSimplification;
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.BooleanFunctionEqualsElimination;
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.BooleanSimplification;
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.CombineDisjunctionsToIn;
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.ConstantFolding;
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.LiteralsOnTheRight;
import org.elasticsearch.xpack.ql.optimizer.OptimizerRules.OptimizerRule;
Expand Down Expand Up @@ -252,6 +251,14 @@ protected Expression maybeSimplifyNegatable(Expression e) {

}

static class CombineDisjunctionsToIn extends org.elasticsearch.xpack.ql.optimizer.OptimizerRules.CombineDisjunctionsToIn {

@Override
protected boolean shouldValidateIn() {
return true;
}
}

static class PruneFilters extends org.elasticsearch.xpack.ql.optimizer.OptimizerRules.PruneFilters {

@Override
Expand Down
84 changes: 84 additions & 0 deletions x-pack/plugin/eql/src/test/resources/querytranslator_tests.txt
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,90 @@ process where process_name in ("python.exe", "SMSS.exe", "explorer.exe")
"terms":{"process_name":["python.exe","SMSS.exe","explorer.exe"],
;

mutipleOrEquals_As_InTranslation1
process where process_name == "python.exe" or process_name == "SMSS.exe" or process_name == "explorer.exe"
;
"terms":{"process_name":["python.exe","SMSS.exe","explorer.exe"],
;

multipleOrAndEquals_As_InTranslation
process where process_name == "python.exe" and process_name == "SMSS.exe" or process_name == "explorer.exe" or process_name == "test.exe"
;
{"bool":{"should":[{"bool":{"must":[{"term":{"process_name":{"value":"python.exe"}}},{"term":{"process_name":{"value":"SMSS.exe"}}}],"boost":1.0}},{"terms":{"process_name":["explorer.exe","test.exe"],"boost":1.0}}],"boost":1.0}}
;

mutipleOrEquals_As_InTranslation2
process where source_address == "123.12.1.1" or (opcode == 123 or opcode == 127)
;
{"bool":{"should":[{"term":{"source_address":{"value":"123.12.1.1"}}},{"terms":{"opcode":[123,127],"boost":1.0}}],"boost":1.0}}
;

mutipleOrEquals_As_InTranslation3
process where (source_address == "123.12.1.1" or source_address == "127.0.0.1") and (opcode == 123 or opcode == 127)
;
{"bool":{"should":[{"term":{"source_address":{"value":"123.12.1.1"}}},{"term":{"source_address":{"value":"127.0.0.1"}}}],"boost":1.0}},{"terms":{"opcode":[123,127],"boost":1.0}}
;

mutipleOrEquals_As_InTranslation4
process where (source_address == "123.12.1.1" or source_address == "127.0.0.1") and (opcode == 123 or opcode == 127)
;
"must":[{"bool":{"should":[{"term":{"source_address":{"value":"123.12.1.1"}}},{"term":{"source_address":{"value":"127.0.0.1"}}}],"boost":1.0}},{"terms":{"opcode":[123,127],"boost":1.0}},{"term":{"event.category":{"value":"process"}}}]
;

multipleOrIncompatibleTypes1
process where process_name == "python.exe" or process_name == 2 or process_name == "3"
;
{"bool":{"should":[{"term":{"process_name":{"value":"python.exe"}}},{"term":{"process_name":{"value":2}}},{"term":{"process_name":{"value":"3"}}}],"boost":1.0}}
;

multipleOrIncompatibleTypes2
process where process_name == "1" or process_name == 2 or process_name == "3"
;
{"bool":{"should":[{"term":{"process_name":{"value":"1"}}},{"term":{"process_name":{"value":2}}},{"term":{"process_name":{"value":"3"}}}],"boost":1.0}}
;

multipleOrIncompatibleTypes3
process where process_name == 1.2 or process_name == 2 or process_name == "3"
;
{"bool":{"should":[{"term":{"process_name":{"value":1.2}}},{"term":{"process_name":{"value":2}}},{"term":{"process_name":{"value":"3"}}}],"boost":1.0}}
;

// this query as an equivalent with
// process where process_name in (1.2, 2, 3)
// will result in a user error: 1st argument of [process_name in (1.2, 2, 3)] must be [keyword], found value [1.2] type [double]
multipleOrIncompatibleTypes4
process where process_name == 1.2 or process_name == 2 or process_name == 3
;
{"bool":{"should":[{"term":{"process_name":{"value":1.2}}},{"term":{"process_name":{"value":2}}},{"term":{"process_name":{"value":3}}}],"boost":1.0}}
;

// this query as an equivalent with
// process where source_address in ("123.12.1.1", "123.12.1.2")
// will result in a user error: 1st argument of [source_address in ("123.12.1.1", "123.12.1.2")] must be [ip], found value ["123.12.1.1"] type [keyword]
multipleOrIncompatibleTypes5
process where source_address == "123.12.1.1" or source_address == "123.12.1.2"
;
{"bool":{"should":[{"term":{"source_address":{"value":"123.12.1.1"}}},{"term":{"source_address":{"value":"123.12.1.2"}}}],"boost":1.0}}
;

multipleOrIncompatibleTypes6
process where source_address == "123.12.1.1" or source_address == concat("123.12.","1.2")
;
{"bool":{"should":[{"term":{"source_address":{"value":"123.12.1.1"}}},{"term":{"source_address":{"value":"123.12.1.2"}}}],"boost":1.0}}
;

multipleOrIncompatibleTypes7
process where source_address == "123.12.1.1" and (source_address == "123.12.1.2" or source_address >= "127.0.0.1")
;
"must":[{"term":{"source_address":{"value":"123.12.1.1"}}},{"bool":{"should":[{"term":{"source_address":{"value":"123.12.1.2"}}},{"range":{"source_address":{"gte":"127.0.0.1","boost":1.0}}}],"boost":1.0}},{"term":{"event.category":{"value":"process"}}}]
;

multipleOrIncompatibleTypes8
process where source_address == "123.12.1.1" and (source_address == "123.12.1.2" or source_address == "127.0.0.1")
;
"must":[{"term":{"source_address":{"value":"123.12.1.1"}}},{"bool":{"should":[{"term":{"source_address":{"value":"123.12.1.2"}}},{"term":{"source_address":{"value":"127.0.0.1"}}}],"boost":1.0}},{"term":{"event.category":{"value":"process"}}}]
;

inFilterWithScripting
process where substring(command_line, 5) in ("test*","best")
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,10 @@ protected TypeResolution resolveType() {
return super.resolveType();
}

public TypeResolution validateInTypes() {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added this one because I didn't want to change the resolveType method and to be able to call it from the optimizer rule.

return resolveType();
}

@Override
public int hashCode() {
return Objects.hash(value, list);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1203,8 +1203,8 @@ private static boolean notEqualsIsRemovableFromConjunction(NotEquals notEquals,
* 2. a == 1 OR a IN (2) becomes a IN (1, 2)
* 3. a IN (1) OR a IN (2) becomes a IN (1, 2)
*
* This rule does NOT check for type compatibility as that phase has been
* already be verified in the analyzer.
* By default (see {@link #shouldValidateIn()}), this rule does NOT check for type compatibility as that phase has
* already been verified in the analyzer, but this behavior can be changed by subclasses.
*/
public static class CombineDisjunctionsToIn extends OptimizerExpressionRule<Or> {
public CombineDisjunctionsToIn() {
Expand All @@ -1214,18 +1214,24 @@ public CombineDisjunctionsToIn() {
@Override
protected Expression rule(Or or) {
Expression e = or;
// look only at equals and In
// look only at Equals and In
List<Expression> exps = splitOr(e);

Map<Expression, Set<Expression>> found = new LinkedHashMap<>();
Map<Expression, List<Expression>> originalOrs = new LinkedHashMap<>();
ZoneId zoneId = null;
List<Expression> ors = new LinkedList<>();

for (Expression exp : exps) {
if (exp instanceof Equals eq) {
// consider only equals against foldables
// consider only Equals against foldables
if (eq.right().foldable()) {
found.computeIfAbsent(eq.left(), k -> new LinkedHashSet<>()).add(eq.right());
if (shouldValidateIn()) {
// in case there is an optimized In being built and its validation fails, rebuild the original ORs
// so, keep around the original Expressions
originalOrs.computeIfAbsent(eq.left(), k -> new ArrayList<>()).add(eq);
}
} else {
ors.add(exp);
}
Expand All @@ -1234,6 +1240,11 @@ protected Expression rule(Or or) {
}
} else if (exp instanceof In in) {
found.computeIfAbsent(in.value(), k -> new LinkedHashSet<>()).addAll(in.list());
if (shouldValidateIn()) {
// in case there is an optimized In being built and its validation fails, rebuild the original ORs
// so, keep around the original Expressions
originalOrs.computeIfAbsent(in.value(), k -> new ArrayList<>()).add(in);
}
if (zoneId == null) {
zoneId = in.zoneId();
}
Expand All @@ -1243,11 +1254,31 @@ protected Expression rule(Or or) {
}

if (found.isEmpty() == false) {
// combine equals alongside the existing ors
// combine Equals alongside the existing ORs
final ZoneId finalZoneId = zoneId;
found.forEach(
(k, v) -> { ors.add(v.size() == 1 ? createEquals(k, v, finalZoneId) : createIn(k, new ArrayList<>(v), finalZoneId)); }
);
found.forEach((k, v) -> {
if (v.size() == 1) {
ors.add(createEquals(k, v.iterator().next(), finalZoneId));
} else {
In in = createIn(k, new ArrayList<>(v), finalZoneId);
// IN has its own particularities when it comes to type resolution and not all implementations
// double check the validity of an internally created IN (like the one created here). EQL is one where the IN
// implementation is like this mechanism here has been specifically created for it
if (shouldValidateIn()) {
Expression.TypeResolution resolution = in.validateInTypes();
if (resolution.unresolved()) {
// if the internally created In is not valid, fall back to the original ORs
assert originalOrs.containsKey(k);
assert originalOrs.get(k).isEmpty() == false;
ors.add(combineOr(originalOrs.get(k)));
} else {
ors.add(in);
}
} else {
ors.add(in);
}
}
});

Expression combineOr = combineOr(ors);
// check the result semantically since the result might different in order
Expand All @@ -1261,13 +1292,17 @@ protected Expression rule(Or or) {
return e;
}

protected Equals createEquals(Expression k, Set<Expression> v, ZoneId finalZoneId) {
return new Equals(k.source(), k, v.iterator().next(), finalZoneId);
}

protected In createIn(Expression key, List<Expression> values, ZoneId zoneId) {
return new In(key.source(), key, values, zoneId);
}

protected boolean shouldValidateIn() {
return false;
}

private Equals createEquals(Expression key, Expression value, ZoneId finalZoneId) {
return new Equals(key.source(), key, value, finalZoneId);
}
}

public static class PushDownAndCombineFilters extends OptimizerRule<Filter> {
Expand Down
Loading