Skip to content
6 changes: 6 additions & 0 deletions docs/changelog/128895.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 128895
summary: Workaround for RLike handling of empty lang pattern
area: ES|QL
type: bug
issues:
- 128813
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@ public boolean matchesAll() {

@Override
public String exactMatch() {
IntsRef singleton = Operations.getSingleton(automaton());
Automaton a = automaton();
if (a.getNumStates() == 0) { // workaround for https://github.com/elastic/elasticsearch/pull/128887
return null; // Empty automaton has no matches
}
IntsRef singleton = Operations.getSingleton(a);
return singleton != null ? UnicodeUtil.newString(singleton.ints, singleton.offset, singleton.length) : null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,12 @@ private boolean rlikeMatchesAll(String pattern) {
return rlike(pattern).matchesAll();
}

private String exactMatchRLike(String pattern) {
return rlike(pattern).exactMatch();
}

private boolean rlikeExactMatch(String pattern) {
return pattern.equals(rlike(pattern).exactMatch());
return pattern.equals(exactMatchRLike(pattern));
}

public void testWildcardMatchAll() {
Expand Down Expand Up @@ -86,4 +90,20 @@ public void testRegexExactMatch() {
assertTrue(rlikeExactMatch("abc"));
assertTrue(rlikeExactMatch("12345"));
}

public void testRegexExactMatchWithEmptyMatch() {
// As soon as there's one no conditional `#` in the pattern, it'll match nothing
assertNull(exactMatchRLike("#"));
assertNull(exactMatchRLike("##"));
assertNull(exactMatchRLike("#foo"));
assertNull(exactMatchRLike("#foo#"));
assertNull(exactMatchRLike("f#oo"));
assertNull(exactMatchRLike("foo#"));
assertNull(exactMatchRLike("#[A-Z]*"));
assertNull(exactMatchRLike("foo(#)"));

assertNotNull(exactMatchRLike("foo#?"));
assertNotNull(exactMatchRLike("#|foo"));
assertNotNull(exactMatchRLike("foo|#"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ public static String stripThrough(String input) {
/** Returns the input string, but with parts of it having the letter casing changed. */
public static String randomCasing(String input) {
StringBuilder sb = new StringBuilder(input.length());
for (int i = 0, inputLen = input.length(), step = (int) Math.sqrt(inputLen), chunkEnd; i < inputLen; i += step) {
chunkEnd = Math.min(i + step, inputLen);
for (int i = 0, inputLen = input.length(), step = (int) Math.sqrt(inputLen); i < inputLen; i += step) {
var chunkEnd = Math.min(i + step, inputLen);
var chunk = input.substring(i, chunkEnd);
sb.append(randomBoolean() ? chunk.toLowerCase(Locale.ROOT) : chunk.toUpperCase(Locale.ROOT));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1440,6 +1440,46 @@ public void testReplaceStringCasingWithInsensitiveWildcardMatch() throws IOExcep
assertThat(answer.get("values"), equalTo(List.of(List.of("_\"_$_(_)_+_._[_]_^_{_|_}___", "_#_&_<_>___"))));
}

public void testRLikeHandlingOfEmptyLanguagePattern() throws IOException {
createIndex(testIndexName(), Settings.EMPTY, """
{
"properties": {
"field": {
"type": "keyword"
}
}
}
""");
for (var val : List.of("#", "foo#bar")) {
Request doc = new Request("POST", testIndexName() + "/_doc?refresh=true");
doc.setJsonEntity("""
{
"field": "%s"
}
""".formatted(val));
client().performRequest(doc);
}
// pushed down, matches nothing
var query = "FROM " + testIndexName() + " | WHERE TO_LOWER(field) RLIKE \"#\"";
var answer = runEsql(requestObjectBuilder().query(query));
assertThat(answer.get("values"), equalTo(List.of()));

// matches nothing
query = "FROM " + testIndexName() + " | WHERE field RLIKE \"#\"";
answer = runEsql(requestObjectBuilder().query(query));
assertThat(answer.get("values"), equalTo(List.of()));

// matches one doc
query = "FROM " + testIndexName() + " | WHERE field RLIKE \"\\\\#\"";
answer = runEsql(requestObjectBuilder().query(query));
assertThat(answer.get("values"), equalTo(List.of(List.of("#"))));

// matches both docs
query = "FROM " + testIndexName() + " | WHERE field RLIKE \".*\\\\#.*\" | SORT field";
answer = runEsql(requestObjectBuilder().query(query));
assertThat(answer.get("values"), equalTo(List.of(List.of("#"), List.of("foo#bar"))));
}

protected static Request prepareRequestWithOptions(RequestObjectBuilder requestObject, Mode mode) throws IOException {
requestObject.build();
Request request = prepareRequest(mode);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -423,3 +423,13 @@ emp_no:integer |first_name:keyword
10001 |Georgi
10055 |Georgy
;

# test for https://github.com/elastic/elasticsearch/issues/128813
rlikeWithEmptyLanguagePattern
required_capability: rlike_with_empty_language_pattern
ROW x = "abc" | EVAL bool = x RLIKE "#"
;

x:keyword | bool:boolean
abc | false
;
Original file line number Diff line number Diff line change
Expand Up @@ -1177,6 +1177,11 @@ public enum Cap {
*/
ENABLE_LOOKUP_JOIN_ON_ALIASES,

/**
* Allows RLIKE to correctly handle the "empty language" flag, `#`.
*/
RLIKE_WITH_EMPTY_LANGUAGE_PATTERN,

/**
* MATCH PHRASE function
*/
Expand Down
Loading