Skip to content
24 changes: 21 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Elasticsearch Data Format Plugin
## Overview

Elasticsearch Data Format Plugin provides a feature to allow you to download a response of a search result as several formats other than JSON.
The supported formats are CSV, Excel, JSON(Bulk) and JSON(Object List).
The supported formats are CSV, Excel, JSON(Bulk), JSON(Object List) and GeoJSON.

## Version

Expand All @@ -31,7 +31,7 @@ If not, it's as scan query(all data are stored.).
| Request Parameter | Type | Description |
|:------------------|:-------:|:------------|
| append.header | boolean | Append column headers if true |
| fields_name | string | choose the fields to dump |
| fields_name | string | choose the fields to dump (comma separate format) |
| source | string | [Query DSL](http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl.html) |
| csv.separator | string | Separate character in CSV |
| csv.quote | string | Quote character in CSV|
Expand All @@ -46,7 +46,7 @@ If not, it's as scan query(all data are stored.).
| Request Parameter | Type | Description |
|:------------------|:-------:|:------------|
| append.header | boolean | Append column headers if true |
| fields_name | string | choose the fields to dump |
| fields_name | string | choose the fields to dump (comma separate format) |
| source | string | [Query DSL](http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl.html) |

### Excel 2007
Expand All @@ -55,6 +55,8 @@ If not, it's as scan query(all data are stored.).

| Request Parameter | Type | Description |
|:------------------|:-------:|:------------|
| append.header | boolean | Append column headers if true |
| fields_name | string | choose the fields to dump (comma separate format) |
| source | string | [Query DSL](http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl.html) |

### JSON (Elasticsearch Bulk format)
Expand All @@ -75,3 +77,19 @@ If not, it's as scan query(all data are stored.).
| :---------------- | :----: | :----------------------------------------------------------- |
| source | string | [Query DSL](http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl.html) |

### GeoJSON (Open GIS standard)

$ curl -o /tmp/data.json -XGET "localhost:9200/{index}/{type}/_data?format=geojson&source=..."

| Request Parameter | Type | Description |
| :----------------------- | :----: | :----------------------------------------------------------- |
| source | string | [Query DSL](http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl.html) |
| geometry.lon_field | string | Longitude field for coordinates (Support Geometry type "Point") |
| geometry.lat_field | string | Latitude field for coordinates (Support Geometry type "Point") |
| geometry.alt_field | string | Altitude field for coordinates (Support Geometry type "Point") |
| geometry.coord_field | string | Coordinates field. Support all Geometry types (see [GeoJSON Example](https://en.wikipedia.org/wiki/GeoJSON)).<br/>If set, overwrite `geometry.lon_field`, `geometry.lat_field` and `geometry.alt_field` |
| geometry.type_field | string | Geometry type field (see [GeoJSON Example](https://en.wikipedia.org/wiki/GeoJSON))<br/>Only used if `geometry.coord_field` param is set |
| keep_geometry_info | boolean | Keep or not the original geometry fields in final GeoJSON properties (default: false) |
| exclude_fields | string | Exclude fields in final geojson properties (comma separate format) |

**NB**: Field name can use basic style like `a` or JSONpath style like `a.b.c[2].d`
5 changes: 5 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,11 @@
<artifactId>poi-ooxml-schemas</artifactId>
<version>${poi.version}</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.8.6</version>
</dependency>
<dependency>
<groupId>org.codelibs</groupId>
<artifactId>elasticsearch-cluster-runner</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.codelibs.elasticsearch.df.content;

import org.codelibs.elasticsearch.df.content.csv.CsvContent;
import org.codelibs.elasticsearch.df.content.geojson.GeoJsonContent;
import org.codelibs.elasticsearch.df.content.json.JsonContent;
import org.codelibs.elasticsearch.df.content.json.JsonListContent;
import org.codelibs.elasticsearch.df.content.xls.XlsContent;
Expand Down Expand Up @@ -117,6 +118,27 @@ public String fileName(final RestRequest request) {
}
return index + ".json";
}
},
GEOJSON(60) {
@Override
public String contentType() {
return "application/geo+json";
}

@Override
public DataContent dataContent(final Client client,
final RestRequest request) {
return new GeoJsonContent(client, request, this);
}

@Override
public String fileName(final RestRequest request) {
final String index = request.param("index");
if (index == null) {
return "_all.geojson";
}
return index + ".geojson";
}
};

private int index;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
package org.codelibs.elasticsearch.df.content.geojson;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.elasticsearch.df.content.ContentType;
import org.codelibs.elasticsearch.df.content.DataContent;
import org.codelibs.elasticsearch.df.util.JsonUtils;
import org.codelibs.elasticsearch.df.util.RequestUtil;
import org.codelibs.elasticsearch.df.util.StringUtils;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.rest.RestChannel;
import org.elasticsearch.rest.RestRequest;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;

import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;

public class GeoJsonContent extends DataContent {
private static final Logger logger = LogManager.getLogger(GeoJsonContent.class);

private final String geometryCoordinatesLonField;
private final String geometryCoordinatesLatField;
private final String geometryCoordinatesAltField;
private final String geometryTypeField;
private final String geometryCoordinatesField;
private final boolean geometryKeepGeoInfo;
private final List<String> excludeFields;

public GeoJsonContent(final Client client, final RestRequest request, final ContentType contentType) {
super(client, request, contentType);

geometryCoordinatesLonField = request.param("geometry.lon_field",StringUtils.EMPTY_STRING);
geometryCoordinatesLatField = request.param("geometry.lat_field",StringUtils.EMPTY_STRING);
geometryCoordinatesAltField = request.param("geometry.alt_field",StringUtils.EMPTY_STRING);
geometryTypeField = request.param("geometry.type_field",StringUtils.EMPTY_STRING);
geometryCoordinatesField = request.param("geometry.coord_field",StringUtils.EMPTY_STRING);
geometryKeepGeoInfo = request.paramAsBoolean("keep_geometry_info",false);

final String[] fields = request.paramAsStringArray("exclude_fields", StringUtils.EMPTY_STRINGS);
if (fields.length == 0) {
excludeFields = new ArrayList<>();
} else {
final List<String> fieldList = new ArrayList<>();
for (final String field : fields) {
fieldList.add(field.trim());
}
excludeFields = Collections.unmodifiableList(fieldList);
}

if (logger.isDebugEnabled()) {
logger.debug("geometryTypeField: {}, geometryCoordinatesField: {}, geometryCoordinatesLonField: {}, " +
"geometryCoordinatesLatField: {}, geometryCoordinatesAltField: {}, geometryKeepGeoInfo: {}, excludeFields: {}",
geometryTypeField, geometryCoordinatesField, geometryCoordinatesLonField,
geometryCoordinatesLatField, geometryCoordinatesAltField, geometryKeepGeoInfo, excludeFields);
}
}

@Override
public void write(final File outputFile, final SearchResponse response, final RestChannel channel,
final ActionListener<Void> listener) {
try {
final OnLoadListener onLoadListener = new OnLoadListener(
outputFile, listener);
onLoadListener.onResponse(response);
} catch (final Exception e) {
listener.onFailure(new ElasticsearchException("Failed to write data.",
e));
}
}

protected class OnLoadListener implements ActionListener<SearchResponse> {
protected ActionListener<Void> listener;

protected Writer writer;

protected File outputFile;

private long currentCount = 0;

private boolean firstLine = true;

protected OnLoadListener(final File outputFile, final ActionListener<Void> listener) {
this.outputFile = outputFile;
this.listener = listener;
try {
writer = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(outputFile), "UTF-8"));
} catch (final Exception e) {
throw new ElasticsearchException("Could not open "
+ outputFile.getAbsolutePath(), e);
}
try {
writer.append("{\"type\": \"FeatureCollection\", \"features\": [");
}catch (final Exception e) {
onFailure(e);
}
}

@Override
public void onResponse(final SearchResponse response) {
final Gson gsonWriter = new GsonBuilder().create();
final String scrollId = response.getScrollId();
final SearchHits hits = response.getHits();
final int size = hits.getHits().length;
currentCount += size;
if (logger.isDebugEnabled()) {
logger.debug("scrollId: {}, totalHits: {}, hits: {}, current: {}",
scrollId, hits.getTotalHits(), size, currentCount);
}
try {
for (final SearchHit hit : hits) {
final String source = XContentHelper.convertToJson(
hit.getSourceRef(), true, false, XContentType.JSON);
if (!firstLine){
writer.append(',');
}else{
firstLine = false;
}

final JsonElement propertiesJson = JsonParser.parseString(source);
String geometryType = "";

JsonArray geometryCoordinates = new JsonArray();
if (!geometryCoordinatesField.isEmpty()){
JsonElement jsonEltCoord = JsonUtils.getJsonElement(propertiesJson,geometryCoordinatesField);
if (jsonEltCoord !=null && !jsonEltCoord.isJsonNull()){
geometryCoordinates = jsonEltCoord.getAsJsonArray​();
if (!geometryKeepGeoInfo){
JsonUtils.removeJsonElement(propertiesJson,geometryCoordinatesField);
}
}
if (!geometryTypeField.isEmpty()){
JsonElement jsonEltType = JsonUtils.getJsonElement(propertiesJson,geometryTypeField);
if (jsonEltType !=null && !jsonEltType.isJsonNull()){
geometryType = jsonEltType.getAsString();
if (!geometryKeepGeoInfo){
JsonUtils.removeJsonElement(propertiesJson,geometryTypeField);
}
}
}
}else{
if (!geometryCoordinatesLonField.isEmpty() && !geometryCoordinatesLatField.isEmpty()){
JsonElement jsonEltLon = JsonUtils.getJsonElement(propertiesJson,geometryCoordinatesLonField);
JsonElement jsonEltLat = JsonUtils.getJsonElement(propertiesJson,geometryCoordinatesLatField);
if (jsonEltLon !=null && !jsonEltLon.isJsonNull() && jsonEltLat !=null && !jsonEltLat.isJsonNull()){
geometryCoordinates.add(jsonEltLon.getAsNumber());
geometryCoordinates.add(jsonEltLat.getAsNumber());
if (!geometryKeepGeoInfo) {
JsonUtils.removeJsonElement(propertiesJson,geometryCoordinatesLonField);
JsonUtils.removeJsonElement(propertiesJson,geometryCoordinatesLatField);
}
}
}
if (!geometryCoordinatesAltField.isEmpty()){
JsonElement jsonElt = JsonUtils.getJsonElement(propertiesJson,geometryCoordinatesAltField);
if (jsonElt !=null && !jsonElt.isJsonNull()){
geometryCoordinates.add(jsonElt.getAsNumber());
if (!geometryKeepGeoInfo) {
JsonUtils.removeJsonElement(propertiesJson,geometryCoordinatesAltField);
}
}
}
geometryType = "Point";
}

for (String excludeField : excludeFields) {
JsonUtils.removeJsonElement(propertiesJson,excludeField);
}

JsonObject geometryObject = new JsonObject();
geometryObject.addProperty("type", geometryType);
geometryObject.add("coordinates", geometryCoordinates);

JsonObject featureObject = new JsonObject();
featureObject.addProperty("type", "Feature");
featureObject.add("geometry", geometryObject);
featureObject.add("properties", propertiesJson.getAsJsonObject());

writer.append('\n').append(gsonWriter.toJson(featureObject));
}

if (size == 0 || scrollId == null) {
// end
writer.append('\n').append("]}");
writer.flush();
close();
listener.onResponse(null);
} else {
client.prepareSearchScroll(scrollId)
.setScroll(RequestUtil.getScroll(request))
.execute(this);
}
} catch (final Exception e) {
onFailure(e);
}
}

@Override
public void onFailure(final Exception e) {
try {
close();
} catch (final Exception e1) {
// ignore
}
listener.onFailure(new ElasticsearchException("Failed to write data.",
e));
}

private void close() {
if (writer != null) {
try {
writer.close();
} catch (final IOException e) {
throw new ElasticsearchException("Could not close "
+ outputFile.getAbsolutePath(), e);
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ private ContentType getContentType(final RestRequest request) {
} else if ("application/list+json".equals(contentType)
|| "jsonlist".equals(contentType)) {
return ContentType.JSONLIST;
} else if ("application/geo+json".equals(contentType)
|| "application/geojson".equals(contentType)
|| "geojson".equals(contentType)) {
return ContentType.GEOJSON;
}

return null;
Expand Down
Loading