ncdc
diff --git a/‎docs/reference/mapping/types/core-types.asciidoc‎
Lines changed: 43 additions & 0 deletions b/‎docs/reference/mapping/types/core-types.asciidoc‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎src/main/java/org/elasticsearch/index/mapper/DocumentMapperParser.java‎
Lines changed: 1 addition & 0 deletions b/‎src/main/java/org/elasticsearch/index/mapper/DocumentMapperParser.java‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/main/java/org/elasticsearch/index/mapper/MapperBuilders.java‎
Lines changed: 4 additions & 0 deletions b/‎src/main/java/org/elasticsearch/index/mapper/MapperBuilders.java‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java‎
Lines changed: 7 additions & 0 deletions b/‎src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java‎
Lines changed: 76 additions & 35 deletions b/‎src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java‎
Lines changed: 76 additions & 35 deletions
@@ -212,6 +212,49 @@ defaults to `true` or to the parent `object` type setting.
 
 |=======================================================================
 
+[float]
+[[token_count]]
+==== Token Count
+added[0.90.8]
+The `token_count` type maps to the JSON string type but indexes and stores
+the number of tokens in the string rather than the string itself. For
+example:
+
+[source,js]
+--------------------------------------------------
+{
+ "tweet" : {
+ "properties" : {
+ "message" : {
+ "type" : "multi_field",
+ "fields" : {
+ "name": {
+ "type": "string"
+ },
+ "word_count": {
+ "type" : "token_count",
+ "store" : "yes",
+ "analyzer" : "standard"
+ }
+ }
+ }
+ }
+ }
+}
+--------------------------------------------------
+
+All the configuration that can be specified for a number can be specified
+for a token_count. The only extra configuration is the required
+`analyzer` field which specifies which analyzer to use to break the string
+into tokens. For best performance, use an analyzer with no token filters.
+
+[NOTE]
+===================================================================
+Technically the `token_count` type sums position increments rather than
+counting tokens. This means that even if the analyzer filters out stop
+words they are included in the count.
+===================================================================
+
 [float]
 [[date]]
 ==== Date
 
@@ -95,6 +95,7 @@ public DocumentMapperParser(Index index, @IndexSettings Settings indexSettings,
  .put(DateFieldMapper.CONTENT_TYPE, new DateFieldMapper.TypeParser())
  .put(IpFieldMapper.CONTENT_TYPE, new IpFieldMapper.TypeParser())
  .put(StringFieldMapper.CONTENT_TYPE, new StringFieldMapper.TypeParser())
+ .put(TokenCountFieldMapper.CONTENT_TYPE, new TokenCountFieldMapper.TypeParser())
  .put(ObjectMapper.CONTENT_TYPE, new ObjectMapper.TypeParser())
  .put(ObjectMapper.NESTED_CONTENT_TYPE, new ObjectMapper.TypeParser())
  .put(MultiFieldMapper.CONTENT_TYPE, new MultiFieldMapper.TypeParser())
 
@@ -133,6 +133,10 @@ public static IntegerFieldMapper.Builder integerField(String name) {
  return new IntegerFieldMapper.Builder(name);
  }
 
+ public static TokenCountFieldMapper.Builder tokenCountField(String name) {
+ return new TokenCountFieldMapper.Builder(name);
+ }
+
  public static LongFieldMapper.Builder longField(String name) {
  return new LongFieldMapper.Builder(name);
  }
 
@@ -313,7 +313,10 @@ protected void innerParseCreateField(ParseContext context, List<Field> fields) t
  }
  }
  }
+ addIntegerFields(fields, value, boost);
+ }
 
+ protected void addIntegerFields(List<Field> fields, int value, float boost) {
  if (fieldType.indexed() || fieldType.stored()) {
  CustomIntegerNumericField field = new CustomIntegerNumericField(this, value, fieldType);
  field.setBoost(boost);
@@ -326,6 +329,10 @@ protected void innerParseCreateField(ParseContext context, List<Field> fields) t
  }
  }
 
+ protected Integer nullValue() {
+ return nullValue;
+ }
+
  @Override
  protected String contentType() {
  return CONTENT_TYPE;
 
@@ -259,57 +259,69 @@ public Filter nullValueFilter() {
 
  @Override
  protected void parseCreateField(ParseContext context, List<Field> fields) throws IOException {
- String value = nullValue;
- float boost = this.boost;
- if (context.externalValueSet()) {
- value = (String) context.externalValue();
- } else {
- XContentParser parser = context.parser();
- if (parser.currentToken() == XContentParser.Token.VALUE_NULL) {
- value = nullValue;
- } else if (parser.currentToken() == XContentParser.Token.START_OBJECT) {
- XContentParser.Token token;
- String currentFieldName = null;
- while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
- if (token == XContentParser.Token.FIELD_NAME) {
- currentFieldName = parser.currentName();
- } else {
- if ("value".equals(currentFieldName) || "_value".equals(currentFieldName)) {
- value = parser.textOrNull();
- } else if ("boost".equals(currentFieldName) || "_boost".equals(currentFieldName)) {
- boost = parser.floatValue();
- } else {
- throw new ElasticSearchIllegalArgumentException("unknown property [" + currentFieldName + "]");
- }
- }
- }
- } else {
- value = parser.textOrNull();
- }
- }
- if (value == null) {
+ ValueAndBoost valueAndBoost = parseCreateFieldForString(context, nullValue, boost);
+ if (valueAndBoost.value() == null) {
  return;
  }
- if (ignoreAbove > 0 && value.length() > ignoreAbove) {
+ if (ignoreAbove > 0 && valueAndBoost.value().length() > ignoreAbove) {
  return;
  }
  if (context.includeInAll(includeInAll, this)) {
- context.allEntries().addText(names.fullName(), value, boost);
+ context.allEntries().addText(names.fullName(), valueAndBoost.value(), valueAndBoost.boost());
  }
 
  if (fieldType.indexed() || fieldType.stored()) {
- Field field = new StringField(names.indexName(), value, fieldType);
- field.setBoost(boost);
+ Field field = new StringField(names.indexName(), valueAndBoost.value(), fieldType);
+ field.setBoost(valueAndBoost.boost());
  fields.add(field);
  }
  if (hasDocValues()) {
- fields.add(new SortedSetDocValuesField(names.indexName(), new BytesRef(value)));
+ fields.add(new SortedSetDocValuesField(names.indexName(), new BytesRef(valueAndBoost.value())));
  }
  if (fields.isEmpty()) {
- context.ignoredValue(names.indexName(), value);
+ context.ignoredValue(names.indexName(), valueAndBoost.value());
  }
  }
 
+ /**
+ * Parse a field as though it were a string.
+ * @param context parse context used during parsing
+ * @param nullValue value to use for null
+ * @param defaultBoost default boost value returned unless overwritten in the field
+ * @return the parsed field and the boost either parsed or defaulted
+ * @throws IOException if thrown while parsing
+ */
+ public static ValueAndBoost parseCreateFieldForString(ParseContext context, String nullValue, float defaultBoost) throws IOException {
+ if (context.externalValueSet()) {
+ return new ValueAndBoost((String) context.externalValue(), defaultBoost);
+ }
+ XContentParser parser = context.parser();
+ if (parser.currentToken() == XContentParser.Token.VALUE_NULL) {
+ return new ValueAndBoost(nullValue, defaultBoost);
+ }
+ if (parser.currentToken() == XContentParser.Token.START_OBJECT) {
+ XContentParser.Token token;
+ String currentFieldName = null;
+ String value = nullValue;
+ float boost = defaultBoost;
+ while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
+ if (token == XContentParser.Token.FIELD_NAME) {
+ currentFieldName = parser.currentName();
+ } else {
+ if ("value".equals(currentFieldName) || "_value".equals(currentFieldName)) {
+ value = parser.textOrNull();
+ } else if ("boost".equals(currentFieldName) || "_boost".equals(currentFieldName)) {
+ boost = parser.floatValue();
+ } else {
+ throw new ElasticSearchIllegalArgumentException("unknown property [" + currentFieldName + "]");
+ }
+ }
+ }
+ return new ValueAndBoost(value, boost);
+ }
+ return new ValueAndBoost(parser.textOrNull(), defaultBoost);
+ }
+
  @Override
  protected String contentType() {
  return CONTENT_TYPE;
@@ -437,4 +449,33 @@ public void close() {
  value = null;
  }
  }
+
+ /**
+ * Parsed value and boost to be returned from {@link #parseCreateFieldForString}.
+ */
+ public static class ValueAndBoost {
+ private final String value;
+ private final float boost;
+
+ public ValueAndBoost(String value, float boost) {
+ this.value = value;
+ this.boost = boost;
+ }
+
+ /**
+ * Value of string field.
+ * @return value of string field
+ */
+ public String value() {
+ return value;
+ }
+
+ /**
+ * Boost either parsed from the document or defaulted.
+ * @return boost either parsed from the document or defaulted
+ */
+ public float boost() {
+ return boost;
+ }
+ }
 }
Original file line number	Diff line number	Diff line change
`@@ -133,6 +133,10 @@ public static IntegerFieldMapper.Builder integerField(String name) {`
`133`	`133`	`return new IntegerFieldMapper.Builder(name);`
`134`	`134`	`}`
`135`	`135`
	`136`	`+ public static TokenCountFieldMapper.Builder tokenCountField(String name) {`
	`137`	`+ return new TokenCountFieldMapper.Builder(name);`
	`138`	`+ }`
	`139`	`+`
`136`	`140`	`public static LongFieldMapper.Builder longField(String name) {`
`137`	`141`	`return new LongFieldMapper.Builder(name);`
`138`	`142`	`}`
Original file line number	Diff line number	Diff line change
`@@ -313,7 +313,10 @@ protected void innerParseCreateField(ParseContext context, List<Field> fields) t`
`313`	`313`	`}`
`314`	`314`	`}`
`315`	`315`	`}`
	`316`	`+ addIntegerFields(fields, value, boost);`
	`317`	`+ }`
`316`	`318`
	`319`	`+ protected void addIntegerFields(List<Field> fields, int value, float boost) {`
`317`	`320`	`if (fieldType.indexed() \|\| fieldType.stored()) {`
`318`	`321`	`CustomIntegerNumericField field = new CustomIntegerNumericField(this, value, fieldType);`
`319`	`322`	`field.setBoost(boost);`
`@@ -326,6 +329,10 @@ protected void innerParseCreateField(ParseContext context, List<Field> fields) t`
`326`	`329`	`}`
`327`	`330`	`}`
`328`	`331`
	`332`	`+ protected Integer nullValue() {`
	`333`	`+ return nullValue;`
	`334`	`+ }`
	`335`	`+`
`329`	`336`	`@Override`
`330`	`337`	`protected String contentType() {`
`331`	`338`	`return CONTENT_TYPE;`