Skip to content

Commit 25124b0

Browse files
committed
Query: Provide an option to analyze wildcard/prefix in query_string / field queries, closes elastic#787.
1 parent 96d06d6 commit 25124b0

File tree

7 files changed

+195
-8
lines changed

7 files changed

+195
-8
lines changed

modules/elasticsearch/src/main/java/org/apache/lucene/queryParser/MapperQueryParser.java

Lines changed: 124 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,14 @@
1919

2020
package org.apache.lucene.queryParser;
2121

22+
import org.apache.lucene.analysis.TokenStream;
23+
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
2224
import org.apache.lucene.index.Term;
2325
import org.apache.lucene.search.BooleanClause;
2426
import org.apache.lucene.search.MultiTermQuery;
2527
import org.apache.lucene.search.Query;
2628
import org.elasticsearch.common.collect.ImmutableMap;
29+
import org.elasticsearch.common.io.FastStringReader;
2730
import org.elasticsearch.common.lucene.Lucene;
2831
import org.elasticsearch.common.lucene.search.Queries;
2932
import org.elasticsearch.index.mapper.AllFieldMapper;
@@ -32,6 +35,9 @@
3235
import org.elasticsearch.index.mapper.MapperService;
3336
import org.elasticsearch.index.query.xcontent.QueryParseContext;
3437

38+
import java.io.IOException;
39+
import java.io.StringReader;
40+
import java.util.ArrayList;
3541
import java.util.List;
3642

3743
import static org.elasticsearch.common.lucene.search.Queries.*;
@@ -61,6 +67,8 @@ public class MapperQueryParser extends QueryParser {
6167

6268
private FieldMapper currentMapper;
6369

70+
private boolean analyzeWildcard;
71+
6472
public MapperQueryParser(QueryParseContext parseContext) {
6573
super(Lucene.QUERYPARSER_VERSION, null, null);
6674
this.parseContext = parseContext;
@@ -83,6 +91,7 @@ public void reset(QueryParserSettings settings) {
8391
setDefaultOperator(settings.defaultOperator());
8492
setFuzzyMinSim(settings.fuzzyMinSim());
8593
setFuzzyPrefixLength(settings.fuzzyPrefixLength());
94+
this.analyzeWildcard = settings.analyzeWildcard();
8695
}
8796

8897
@Override protected Query newTermQuery(Term term) {
@@ -145,7 +154,7 @@ public void reset(QueryParserSettings settings) {
145154
return newRangeQuery(field, part1, part2, inclusive);
146155
}
147156

148-
@Override protected Query getPrefixQuery(String field, String termStr) throws ParseException {
157+
@Override protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException {
149158
String indexedNameField = field;
150159
currentMapper = null;
151160
if (parseContext.mapperService() != null) {
@@ -155,13 +164,13 @@ public void reset(QueryParserSettings settings) {
155164
if (currentMapper != null) {
156165
indexedNameField = currentMapper.names().indexName();
157166
}
158-
return wrapSmartNameQuery(super.getPrefixQuery(indexedNameField, termStr), fieldMappers, parseContext);
167+
return wrapSmartNameQuery(super.getFuzzyQuery(indexedNameField, termStr, minSimilarity), fieldMappers, parseContext);
159168
}
160169
}
161-
return super.getPrefixQuery(indexedNameField, termStr);
170+
return super.getFuzzyQuery(indexedNameField, termStr, minSimilarity);
162171
}
163172

164-
@Override protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException {
173+
@Override protected Query getPrefixQuery(String field, String termStr) throws ParseException {
165174
String indexedNameField = field;
166175
currentMapper = null;
167176
if (parseContext.mapperService() != null) {
@@ -171,10 +180,53 @@ public void reset(QueryParserSettings settings) {
171180
if (currentMapper != null) {
172181
indexedNameField = currentMapper.names().indexName();
173182
}
174-
return wrapSmartNameQuery(super.getFuzzyQuery(indexedNameField, termStr, minSimilarity), fieldMappers, parseContext);
183+
return wrapSmartNameQuery(getPossiblyAnalyzedPrefixQuery(indexedNameField, termStr), fieldMappers, parseContext);
175184
}
176185
}
177-
return super.getFuzzyQuery(indexedNameField, termStr, minSimilarity);
186+
return getPossiblyAnalyzedPrefixQuery(indexedNameField, termStr);
187+
}
188+
189+
private Query getPossiblyAnalyzedPrefixQuery(String field, String termStr) throws ParseException {
190+
if (!analyzeWildcard) {
191+
return super.getPrefixQuery(field, termStr);
192+
}
193+
// LUCENE MONITOR: TermAttribute deprecated in 3.1
194+
// get Analyzer from superclass and tokenize the term
195+
TokenStream source = null;
196+
try {
197+
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
198+
} catch (IOException e) {
199+
return super.getPrefixQuery(field, termStr);
200+
}
201+
List<String> tlist = new ArrayList<String>();
202+
TermAttribute termAtt = source.addAttribute(TermAttribute.class);
203+
204+
while (true) {
205+
try {
206+
if (!source.incrementToken()) break;
207+
} catch (IOException e) {
208+
break;
209+
}
210+
tlist.add(termAtt.term());
211+
}
212+
213+
try {
214+
source.close();
215+
} catch (IOException e) {
216+
// ignore
217+
}
218+
219+
if (tlist.size() == 1) {
220+
return super.getPrefixQuery(field, tlist.get(0));
221+
} else {
222+
return super.getPrefixQuery(field, termStr);
223+
/* this means that the analyzer used either added or consumed
224+
* (common for a stemmer) tokens, and we can't build a PrefixQuery */
225+
// throw new ParseException("Cannot build PrefixQuery with analyzer "
226+
// + getAnalyzer().getClass()
227+
// + (tlist.size() > 1 ? " - token(s) added" : " - token consumed"));
228+
}
229+
178230
}
179231

180232
@Override protected Query getWildcardQuery(String field, String termStr) throws ParseException {
@@ -190,10 +242,74 @@ public void reset(QueryParserSettings settings) {
190242
if (currentMapper != null) {
191243
indexedNameField = currentMapper.names().indexName();
192244
}
193-
return wrapSmartNameQuery(super.getWildcardQuery(indexedNameField, termStr), fieldMappers, parseContext);
245+
return wrapSmartNameQuery(getPossiblyAnalyzedWildcardQuery(indexedNameField, termStr), fieldMappers, parseContext);
246+
}
247+
}
248+
return getPossiblyAnalyzedWildcardQuery(indexedNameField, termStr);
249+
}
250+
251+
private Query getPossiblyAnalyzedWildcardQuery(String field, String termStr) throws ParseException {
252+
if (!analyzeWildcard) {
253+
return super.getWildcardQuery(field, termStr);
254+
}
255+
boolean isWithinToken = (!termStr.startsWith("?") && !termStr.startsWith("*"));
256+
StringBuilder aggStr = new StringBuilder();
257+
StringBuilder tmp = new StringBuilder();
258+
for (int i = 0; i < termStr.length(); i++) {
259+
char c = termStr.charAt(i);
260+
if (c == '?' || c == '*') {
261+
if (isWithinToken) {
262+
try {
263+
TokenStream source = getAnalyzer().reusableTokenStream(field, new FastStringReader(tmp.toString()));
264+
TermAttribute termAtt = source.addAttribute(TermAttribute.class);
265+
if (source.incrementToken()) {
266+
String term = termAtt.term();
267+
if (term.length() == 0) {
268+
// no tokens, just use what we have now
269+
aggStr.append(tmp);
270+
} else {
271+
aggStr.append(term);
272+
}
273+
} else {
274+
// no tokens, just use what we have now
275+
aggStr.append(tmp);
276+
}
277+
source.close();
278+
} catch (IOException e) {
279+
aggStr.append(tmp);
280+
}
281+
tmp.setLength(0);
282+
}
283+
isWithinToken = false;
284+
aggStr.append(c);
285+
} else {
286+
tmp.append(c);
287+
isWithinToken = true;
288+
}
289+
}
290+
if (isWithinToken) {
291+
try {
292+
TokenStream source = getAnalyzer().reusableTokenStream(field, new FastStringReader(tmp.toString()));
293+
TermAttribute termAtt = source.addAttribute(TermAttribute.class);
294+
if (source.incrementToken()) {
295+
String term = termAtt.term();
296+
if (term.length() == 0) {
297+
// no tokens, just use what we have now
298+
aggStr.append(tmp);
299+
} else {
300+
aggStr.append(term);
301+
}
302+
} else {
303+
// no tokens, just use what we have now
304+
aggStr.append(tmp);
305+
}
306+
source.close();
307+
} catch (IOException e) {
308+
aggStr.append(tmp);
194309
}
195310
}
196-
return super.getWildcardQuery(indexedNameField, termStr);
311+
312+
return super.getWildcardQuery(field, aggStr.toString());
197313
}
198314

199315
@Override protected Query getBooleanQuery(List<BooleanClause> clauses, boolean disableCoord) throws ParseException {

modules/elasticsearch/src/main/java/org/apache/lucene/queryParser/QueryParserSettings.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ public class QueryParserSettings {
3737
private int phraseSlop = 0;
3838
private float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
3939
private int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
40+
private boolean analyzeWildcard = false;
4041
private boolean escape = false;
4142
private Analyzer analyzer = null;
4243

@@ -136,6 +137,14 @@ public void analyzer(Analyzer analyzer) {
136137
this.analyzer = analyzer;
137138
}
138139

140+
public boolean analyzeWildcard() {
141+
return this.analyzeWildcard;
142+
}
143+
144+
public void analyzeWildcard(boolean analyzeWildcard) {
145+
this.analyzeWildcard = analyzeWildcard;
146+
}
147+
139148
@Override public boolean equals(Object o) {
140149
if (this == o) return true;
141150
if (o == null || getClass() != o.getClass()) return false;
@@ -146,6 +155,7 @@ public void analyzer(Analyzer analyzer) {
146155
if (Float.compare(that.boost, boost) != 0) return false;
147156
if (enablePositionIncrements != that.enablePositionIncrements) return false;
148157
if (escape != that.escape) return false;
158+
if (analyzeWildcard != that.analyzeWildcard) return false;
149159
if (Float.compare(that.fuzzyMinSim, fuzzyMinSim) != 0) return false;
150160
if (fuzzyPrefixLength != that.fuzzyPrefixLength) return false;
151161
if (lowercaseExpandedTerms != that.lowercaseExpandedTerms) return false;
@@ -171,6 +181,7 @@ public void analyzer(Analyzer analyzer) {
171181
result = 31 * result + fuzzyPrefixLength;
172182
result = 31 * result + (escape ? 1 : 0);
173183
result = 31 * result + (analyzer != null ? analyzer.hashCode() : 0);
184+
result = 31 * result + (analyzeWildcard ? 1 : 0);
174185
return result;
175186
}
176187
}

modules/elasticsearch/src/main/java/org/elasticsearch/index/query/xcontent/FieldQueryBuilder.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ public static enum Operator {
5151

5252
private Boolean enablePositionIncrements;
5353

54+
private Boolean analyzeWildcard;
55+
5456
private float fuzzyMinSim = -1;
5557

5658
private float boost = -1;
@@ -242,6 +244,15 @@ public FieldQueryBuilder phraseSlop(int phraseSlop) {
242244
return this;
243245
}
244246

247+
/**
248+
* Set to <tt>true</tt> to enable analysis on wildcard and prefix queries.
249+
*/
250+
public FieldQueryBuilder analyzeWildcard(boolean analyzeWildcard) {
251+
this.analyzeWildcard = analyzeWildcard;
252+
extraSet = true;
253+
return this;
254+
}
255+
245256
@Override public void doXContent(XContentBuilder builder, Params params) throws IOException {
246257
builder.startObject(FieldQueryParser.NAME);
247258
if (!extraSet) {
@@ -276,6 +287,9 @@ public FieldQueryBuilder phraseSlop(int phraseSlop) {
276287
if (phraseSlop != -1) {
277288
builder.field("phrase_slop", phraseSlop);
278289
}
290+
if (analyzeWildcard != null) {
291+
builder.field("analyze_wildcard", analyzeWildcard);
292+
}
279293
builder.endObject();
280294
}
281295
builder.endObject();

modules/elasticsearch/src/main/java/org/elasticsearch/index/query/xcontent/FieldQueryParser.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ public class FieldQueryParser extends AbstractIndexComponent implements XContent
104104
qpSettings.fuzzyPrefixLength(parser.intValue());
105105
} else if ("escape".equals(currentFieldName)) {
106106
qpSettings.escape(parser.booleanValue());
107+
} else if ("analyze_wildcard".equals(currentFieldName) || "analyzeWildcard".equals(currentFieldName)) {
108+
qpSettings.analyzeWildcard(parser.booleanValue());
107109
}
108110
}
109111
}

modules/elasticsearch/src/main/java/org/elasticsearch/index/query/xcontent/QueryStringQueryBuilder.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ public static enum Operator {
5858

5959
private Boolean enablePositionIncrements;
6060

61+
private Boolean analyzeWildcard;
62+
6163
private float fuzzyMinSim = -1;
6264

6365
private float boost = -1;
@@ -209,6 +211,14 @@ public QueryStringQueryBuilder phraseSlop(int phraseSlop) {
209211
return this;
210212
}
211213

214+
/**
215+
* Set to <tt>true</tt> to enable analysis on wildcard and prefix queries.
216+
*/
217+
public QueryStringQueryBuilder analyzeWildcard(boolean analyzeWildcard) {
218+
this.analyzeWildcard = analyzeWildcard;
219+
return this;
220+
}
221+
212222
/**
213223
* Sets the boost for this query. Documents matching this query will (in addition to the normal
214224
* weightings) have their score multiplied by the boost provided.
@@ -271,6 +281,9 @@ public QueryStringQueryBuilder boost(float boost) {
271281
if (phraseSlop != -1) {
272282
builder.field("phrase_slop", phraseSlop);
273283
}
284+
if (analyzeWildcard != null) {
285+
builder.field("analyze_wildcard", analyzeWildcard);
286+
}
274287
builder.endObject();
275288
}
276289
}

modules/elasticsearch/src/main/java/org/elasticsearch/index/query/xcontent/QueryStringQueryParser.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,8 @@ public class QueryStringQueryParser extends AbstractIndexComponent implements XC
155155
qpSettings.boost(parser.floatValue());
156156
} else if ("tie_breaker".equals(currentFieldName) || "tieBreaker".equals(currentFieldName)) {
157157
qpSettings.tieBreaker(parser.floatValue());
158+
} else if ("analyze_wildcard".equals(currentFieldName) || "analyzeWildcard".equals(currentFieldName)) {
159+
qpSettings.analyzeWildcard(parser.booleanValue());
158160
}
159161
}
160162
}

modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/query/SimpleQueryTests.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,35 @@ protected Client getClient() {
6868
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
6969
}
7070

71+
@Test public void queryStringAnalyzedWildcard() throws Exception {
72+
try {
73+
client.admin().indices().prepareDelete("test").execute().actionGet();
74+
} catch (Exception e) {
75+
// ignore
76+
}
77+
78+
client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("number_of_shards", 1)).execute().actionGet();
79+
80+
client.prepareIndex("test", "type1", "1").setSource("field1", "value_1", "field2", "value_2").execute().actionGet();
81+
82+
client.admin().indices().prepareRefresh().execute().actionGet();
83+
84+
SearchResponse searchResponse = client.prepareSearch().setQuery(queryString("value*").analyzeWildcard(true)).execute().actionGet();
85+
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
86+
87+
searchResponse = client.prepareSearch().setQuery(queryString("*ue*").analyzeWildcard(true)).execute().actionGet();
88+
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
89+
90+
searchResponse = client.prepareSearch().setQuery(queryString("*ue_1").analyzeWildcard(true)).execute().actionGet();
91+
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
92+
93+
searchResponse = client.prepareSearch().setQuery(queryString("val*e_1").analyzeWildcard(true)).execute().actionGet();
94+
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
95+
96+
searchResponse = client.prepareSearch().setQuery(queryString("v?l*e?1").analyzeWildcard(true)).execute().actionGet();
97+
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
98+
}
99+
71100
@Test public void filterExistsMissingTests() throws Exception {
72101
try {
73102
client.admin().indices().prepareDelete("test").execute().actionGet();

0 commit comments

Comments
 (0)