Skip to content

Commit 703dbff

Browse files
committed
Index field names of documents.
The `exists` and `missing` filters need to merge postings lists of all existing terms, which can be very costly, especially on high-cardinality fields. This commit indexes the field names of a document under `_field_names` and reuses it to speed up the `exists` and `missing` filters. This is only enabled for indices that are created on or after Elasticsearch 1.3.0. Close elastic#5659
1 parent e2da211 commit 703dbff

File tree

14 files changed

+507
-8
lines changed

14 files changed

+507
-8
lines changed

docs/reference/mapping/fields.asciidoc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ include::fields/boost-field.asciidoc[]
2121

2222
include::fields/parent-field.asciidoc[]
2323

24+
include::fields/field-names-field.asciidoc[]
25+
2426
include::fields/routing-field.asciidoc[]
2527

2628
include::fields/index-field.asciidoc[]
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
[[mapping-field-names-field]]
2+
=== `_field_names`
3+
4+
coming[1.3.0]
5+
6+
The `_field_names` field indexes the field names of a document, which can later
7+
be used to search for documents based on the fields that they contain typically
8+
using the `exists` and `missing` filters.
9+
10+
`_field_names` is indexed by default for indices that have been created after
11+
Elasticsearch 1.3.0.

src/main/java/org/elasticsearch/Version.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,14 @@
1919

2020
package org.elasticsearch;
2121

22+
import org.elasticsearch.cluster.metadata.IndexMetaData;
2223
import org.elasticsearch.common.Nullable;
2324
import org.elasticsearch.common.Strings;
2425
import org.elasticsearch.common.inject.AbstractModule;
2526
import org.elasticsearch.common.io.stream.StreamInput;
2627
import org.elasticsearch.common.io.stream.StreamOutput;
2728
import org.elasticsearch.common.lucene.Lucene;
29+
import org.elasticsearch.common.settings.Settings;
2830
import org.elasticsearch.monitor.jvm.JvmInfo;
2931

3032
import java.io.IOException;
@@ -344,6 +346,15 @@ public static Version fromId(int id) {
344346
}
345347
}
346348

349+
/**
350+
* Return the {@link Version} of Elasticsearch that has been used to create an index given its settings.
351+
*/
352+
public static Version indexCreated(Settings indexSettings) {
353+
assert indexSettings.get(IndexMetaData.SETTING_UUID) == null // if the UUDI is there the index has actually been created otherwise this might be a test
354+
|| indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, null) != null : IndexMetaData.SETTING_VERSION_CREATED + " not set in IndexSettings";
355+
return indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
356+
}
357+
347358
public static void writeVersion(Version version, StreamOutput out) throws IOException {
348359
out.writeVInt(version.id);
349360
}

src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,8 @@ public Builder(String index, @Nullable Settings indexSettings, RootObjectMapper.
180180
this.rootMappers.put(TTLFieldMapper.class, new TTLFieldMapper());
181181
this.rootMappers.put(VersionFieldMapper.class, new VersionFieldMapper());
182182
this.rootMappers.put(ParentFieldMapper.class, new ParentFieldMapper());
183+
// _field_names last so that it can see all other fields
184+
this.rootMappers.put(FieldNamesFieldMapper.class, new FieldNamesFieldMapper(indexSettings));
183185
}
184186

185187
public Builder meta(ImmutableMap<String, Object> meta) {

src/main/java/org/elasticsearch/index/mapper/DocumentMapperParser.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,7 @@
2121

2222
import com.google.common.collect.ImmutableMap;
2323
import com.google.common.collect.Maps;
24-
import org.elasticsearch.ElasticsearchParseException;
2524
import org.elasticsearch.Version;
26-
import org.elasticsearch.cluster.metadata.IndexMetaData;
2725
import org.elasticsearch.common.Nullable;
2826
import org.elasticsearch.common.Strings;
2927
import org.elasticsearch.common.collect.MapBuilder;
@@ -51,7 +49,6 @@
5149

5250
import java.util.Iterator;
5351
import java.util.Map;
54-
import java.util.Set;
5552

5653
import static org.elasticsearch.index.mapper.MapperBuilders.doc;
5754

@@ -122,10 +119,9 @@ public DocumentMapperParser(Index index, @IndexSettings Settings indexSettings,
122119
.put(UidFieldMapper.NAME, new UidFieldMapper.TypeParser())
123120
.put(VersionFieldMapper.NAME, new VersionFieldMapper.TypeParser())
124121
.put(IdFieldMapper.NAME, new IdFieldMapper.TypeParser())
122+
.put(FieldNamesFieldMapper.NAME, new FieldNamesFieldMapper.TypeParser())
125123
.immutableMap();
126-
assert indexSettings.get(IndexMetaData.SETTING_UUID) == null // if the UUDI is there the index has actually been created otherwise this might be a test
127-
|| indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, null) != null : IndexMetaData.SETTING_VERSION_CREATED + " not set in IndexSettings";
128-
indexVersionCreated = indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
124+
indexVersionCreated = Version.indexCreated(indexSettings);
129125
}
130126

131127
public void putTypeParser(String type, Mapper.TypeParser typeParser) {

src/main/java/org/elasticsearch/index/mapper/MapperBuilders.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ public static TypeFieldMapper.Builder type() {
7474
return new TypeFieldMapper.Builder();
7575
}
7676

77+
public static FieldNamesFieldMapper.Builder fieldNames() {
78+
return new FieldNamesFieldMapper.Builder();
79+
}
80+
7781
public static IndexFieldMapper.Builder index() {
7882
return new IndexFieldMapper.Builder();
7983
}
Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.index.mapper.internal;
21+
22+
import com.google.common.collect.UnmodifiableIterator;
23+
import org.apache.lucene.document.Field;
24+
import org.apache.lucene.document.FieldType;
25+
import org.apache.lucene.document.SortedSetDocValuesField;
26+
import org.apache.lucene.document.XStringField;
27+
import org.apache.lucene.index.FieldInfo.IndexOptions;
28+
import org.apache.lucene.index.IndexableField;
29+
import org.apache.lucene.util.BytesRef;
30+
import org.elasticsearch.Version;
31+
import org.elasticsearch.common.Nullable;
32+
import org.elasticsearch.common.lucene.Lucene;
33+
import org.elasticsearch.common.settings.Settings;
34+
import org.elasticsearch.common.xcontent.XContentBuilder;
35+
import org.elasticsearch.common.xcontent.XContentFactory;
36+
import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider;
37+
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
38+
import org.elasticsearch.index.fielddata.FieldDataType;
39+
import org.elasticsearch.index.mapper.*;
40+
import org.elasticsearch.index.mapper.core.AbstractFieldMapper;
41+
42+
import java.io.IOException;
43+
import java.util.ArrayList;
44+
import java.util.Iterator;
45+
import java.util.List;
46+
import java.util.Map;
47+
48+
import static org.elasticsearch.index.mapper.MapperBuilders.fieldNames;
49+
import static org.elasticsearch.index.mapper.core.TypeParsers.parseField;
50+
51+
/**
52+
* A mapper that indexes the field names of a document under <code>_field_names</code>. This mapper is typically useful in order
53+
* to have fast <code>exists</code> and <code>missing</code> queries/filters.
54+
*
55+
* Added in Elasticsearch 1.3.
56+
*/
57+
public class FieldNamesFieldMapper extends AbstractFieldMapper<String> implements InternalMapper, RootMapper {
58+
59+
public static final String NAME = "_field_names";
60+
61+
public static final String CONTENT_TYPE = "_field_names";
62+
63+
public static class Defaults extends AbstractFieldMapper.Defaults {
64+
public static final String NAME = FieldNamesFieldMapper.NAME;
65+
public static final String INDEX_NAME = FieldNamesFieldMapper.NAME;
66+
67+
public static final FieldType FIELD_TYPE = new FieldType(AbstractFieldMapper.Defaults.FIELD_TYPE);
68+
public static final FieldType FIELD_TYPE_PRE_1_3_0;
69+
70+
static {
71+
FIELD_TYPE.setIndexed(true);
72+
FIELD_TYPE.setTokenized(false);
73+
FIELD_TYPE.setStored(false);
74+
FIELD_TYPE.setOmitNorms(true);
75+
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_ONLY);
76+
FIELD_TYPE.freeze();
77+
FIELD_TYPE_PRE_1_3_0 = new FieldType(FIELD_TYPE);
78+
FIELD_TYPE_PRE_1_3_0.setIndexed(false);
79+
FIELD_TYPE_PRE_1_3_0.freeze();
80+
}
81+
}
82+
83+
public static class Builder extends AbstractFieldMapper.Builder<Builder, FieldNamesFieldMapper> {
84+
85+
private boolean indexIsExplicit;
86+
87+
public Builder() {
88+
super(Defaults.NAME, new FieldType(Defaults.FIELD_TYPE));
89+
indexName = Defaults.INDEX_NAME;
90+
}
91+
92+
@Override
93+
public Builder index(boolean index) {
94+
indexIsExplicit = true;
95+
return super.index(index);
96+
}
97+
98+
@Override
99+
public FieldNamesFieldMapper build(BuilderContext context) {
100+
if ((context.indexCreatedVersion() == null || context.indexCreatedVersion().before(Version.V_1_3_0)) && !indexIsExplicit) {
101+
fieldType.setIndexed(false);
102+
}
103+
return new FieldNamesFieldMapper(name, indexName, boost, fieldType, postingsProvider, docValuesProvider, fieldDataSettings, context.indexSettings());
104+
}
105+
}
106+
107+
public static class TypeParser implements Mapper.TypeParser {
108+
@Override
109+
public Mapper.Builder parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
110+
FieldNamesFieldMapper.Builder builder = fieldNames();
111+
parseField(builder, builder.name, node, parserContext);
112+
return builder;
113+
}
114+
}
115+
116+
private final FieldType defaultFieldType;
117+
118+
private static FieldType defaultFieldType(Settings indexSettings) {
119+
return indexSettings != null && Version.indexCreated(indexSettings).onOrAfter(Version.V_1_3_0) ? Defaults.FIELD_TYPE : Defaults.FIELD_TYPE_PRE_1_3_0;
120+
}
121+
122+
public FieldNamesFieldMapper(Settings indexSettings) {
123+
this(Defaults.NAME, Defaults.INDEX_NAME, indexSettings);
124+
}
125+
126+
protected FieldNamesFieldMapper(String name, String indexName, Settings indexSettings) {
127+
this(name, indexName, Defaults.BOOST, new FieldType(defaultFieldType(indexSettings)), null, null, null, indexSettings);
128+
}
129+
130+
public FieldNamesFieldMapper(String name, String indexName, float boost, FieldType fieldType, PostingsFormatProvider postingsProvider,
131+
DocValuesFormatProvider docValuesProvider, @Nullable Settings fieldDataSettings, Settings indexSettings) {
132+
super(new Names(name, indexName, indexName, name), boost, fieldType, null, Lucene.KEYWORD_ANALYZER,
133+
Lucene.KEYWORD_ANALYZER, postingsProvider, docValuesProvider, null, null, fieldDataSettings, indexSettings);
134+
this.defaultFieldType = defaultFieldType(indexSettings);
135+
}
136+
137+
@Override
138+
public FieldType defaultFieldType() {
139+
return defaultFieldType;
140+
}
141+
142+
@Override
143+
public FieldDataType defaultFieldDataType() {
144+
return new FieldDataType("string");
145+
}
146+
147+
@Override
148+
public String value(Object value) {
149+
if (value == null) {
150+
return null;
151+
}
152+
return value.toString();
153+
}
154+
155+
@Override
156+
public boolean useTermQueryWithQueryString() {
157+
return true;
158+
}
159+
160+
@Override
161+
public void preParse(ParseContext context) throws IOException {
162+
}
163+
164+
@Override
165+
public void postParse(ParseContext context) throws IOException {
166+
super.parse(context);
167+
}
168+
169+
@Override
170+
public void parse(ParseContext context) throws IOException {
171+
// we parse in post parse
172+
}
173+
174+
@Override
175+
public boolean includeInObject() {
176+
return false;
177+
}
178+
179+
static Iterable<String> extractFieldNames(final String fullPath) {
180+
return new Iterable<String>() {
181+
@Override
182+
public Iterator<String> iterator() {
183+
return new UnmodifiableIterator<String>() {
184+
185+
int endIndex = nextEndIndex(0);
186+
187+
private int nextEndIndex(int index) {
188+
while (index < fullPath.length() && fullPath.charAt(index) != '.') {
189+
index += 1;
190+
}
191+
return index;
192+
}
193+
194+
@Override
195+
public boolean hasNext() {
196+
return endIndex <= fullPath.length();
197+
}
198+
199+
@Override
200+
public String next() {
201+
final String result = fullPath.substring(0, endIndex);
202+
endIndex = nextEndIndex(endIndex + 1);
203+
return result;
204+
}
205+
206+
};
207+
}
208+
};
209+
}
210+
211+
@Override
212+
protected void parseCreateField(ParseContext context, List<Field> fields) throws IOException {
213+
if (!fieldType.indexed() && !fieldType.stored() && !hasDocValues()) {
214+
return;
215+
}
216+
for (ParseContext.Document document : context.docs()) {
217+
final List<String> paths = new ArrayList<>();
218+
for (IndexableField field : document.getFields()) {
219+
paths.add(field.name());
220+
}
221+
for (String path : paths) {
222+
for (String fieldName : extractFieldNames(path)) {
223+
if (fieldType.indexed() || fieldType.stored()) {
224+
document.add(new XStringField(names().indexName(), fieldName, fieldType));
225+
}
226+
if (hasDocValues()) {
227+
document.add(new SortedSetDocValuesField(names().indexName(), new BytesRef(fieldName)));
228+
}
229+
}
230+
}
231+
}
232+
}
233+
234+
@Override
235+
protected String contentType() {
236+
return CONTENT_TYPE;
237+
}
238+
239+
@Override
240+
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
241+
XContentBuilder json = XContentFactory.jsonBuilder();
242+
super.toXContent(json, params);
243+
if (json.string().equals("\"" + NAME + "\"{\"type\":\"" + CONTENT_TYPE + "\"}")) {
244+
return builder;
245+
}
246+
return super.toXContent(builder, params);
247+
}
248+
}

src/main/java/org/elasticsearch/index/query/ExistsFilterParser.java

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@
2727
import org.elasticsearch.common.lucene.search.XBooleanFilter;
2828
import org.elasticsearch.common.xcontent.XContentParser;
2929
import org.elasticsearch.index.cache.filter.support.CacheKeyFilter;
30+
import org.elasticsearch.index.mapper.FieldMappers;
3031
import org.elasticsearch.index.mapper.MapperService;
32+
import org.elasticsearch.index.mapper.internal.FieldNamesFieldMapper;
3133

3234
import java.io.IOException;
3335
import java.util.Set;
@@ -81,6 +83,8 @@ public Filter parse(QueryParseContext parseContext) throws IOException, QueryPar
8183
}
8284

8385
public static Filter newFilter(QueryParseContext parseContext, String fieldPattern, String filterName) {
86+
final FieldMappers fieldNamesMapper = parseContext.mapperService().indexName(FieldNamesFieldMapper.CONTENT_TYPE);
87+
8488
MapperService.SmartNameObjectMapper smartNameObjectMapper = parseContext.smartObjectMapper(fieldPattern);
8589
if (smartNameObjectMapper != null && smartNameObjectMapper.hasMapper()) {
8690
// automatic make the object mapper pattern
@@ -101,7 +105,17 @@ public static Filter newFilter(QueryParseContext parseContext, String fieldPatte
101105
nonNullFieldMappers = smartNameFieldMappers;
102106
}
103107
Filter filter = null;
104-
if (smartNameFieldMappers != null && smartNameFieldMappers.hasMapper()) {
108+
if (fieldNamesMapper!= null && fieldNamesMapper.mapper().fieldType().indexed()) {
109+
final String f;
110+
if (smartNameFieldMappers != null && smartNameFieldMappers.hasMapper()) {
111+
f = smartNameFieldMappers.mapper().names().indexName();
112+
} else {
113+
f = field;
114+
}
115+
filter = fieldNamesMapper.mapper().termFilter(f, parseContext);
116+
}
117+
// if _field_names are not indexed, we need to go the slow way
118+
if (filter == null && smartNameFieldMappers != null && smartNameFieldMappers.hasMapper()) {
105119
filter = smartNameFieldMappers.mapper().rangeFilter(null, null, true, true, parseContext);
106120
}
107121
if (filter == null) {

0 commit comments

Comments
 (0)