Skip to content

Commit 0b9873f

Browse files
JSON SerDe: date column in any time zone - for at least Hive-3.1+ (#183)
1 parent 146aaef commit 0b9873f

File tree

4 files changed

+85
-105
lines changed

4 files changed

+85
-105
lines changed

hive/src/main/java/com/esri/hadoop/hive/serde/BaseJsonSerDe.java

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,9 @@ public Object deserialize(Writable json_in) throws SerDeException {
202202
} catch (IOException e) {
203203
// TODO Auto-generated catch block
204204
e.printStackTrace();
205+
} catch (java.text.ParseException jtpe) {
206+
// probably should set field to null?
207+
jtpe.printStackTrace();
205208
}
206209

207210
return row;
@@ -321,7 +324,7 @@ private void generateJsonFromWritable(Writable value, int fieldIndex, String lab
321324
PrimitiveObjectInspector poi, JsonGenerator jsonGen)
322325
throws JsonProcessingException, IOException {
323326
Object prim = poi.getPrimitiveJavaObject(value);
324-
Long epoch = HiveShims.getPrimitiveEpoch(prim, tz);
327+
Long epoch = HiveShims.getPrimitiveEpoch(prim);
325328
if (epoch == null) { // anything but a recognized DATE or TIMESTAMP
326329
jsonGen.writeObjectField(label, prim);
327330
} else {
@@ -335,43 +338,45 @@ private void generateJsonFromWritable(Writable value, int fieldIndex, String lab
335338
// Parse OGCGeometry from JSON
336339
abstract protected OGCGeometry parseGeom(JsonParser parser);
337340

338-
private java.sql.Date parseDate(JsonParser parser) throws JsonParseException, IOException {
339-
java.sql.Date jsd = null;
341+
// See HIVE-12192.
342+
// Complete handling of absent/invalid date as SQL NULL would be broader scope.
343+
private long parseDate(JsonParser parser)
344+
throws JsonParseException, IOException, java.text.ParseException {
340345
if (JsonToken.VALUE_NUMBER_INT.equals(parser.getCurrentToken())) {
341-
// DateWritable#daysToMillis adjusts the numerical/epoch time
342-
// to midnight in the local time zone. See HIVE-12192.
343-
// Attempt to compensate, when date provided as epoch, which is unambiguously UTC.
344346
long epoch = parser.getLongValue();
345-
jsd = new java.sql.Date(epoch - tz.getOffset(epoch));
346-
} else try {
347+
return epoch;
348+
} else {
347349
long epoch = parseTime(parser.getText(), "yyyy-MM-dd");
348-
jsd = new java.sql.Date(epoch + 43200000); // midday rather than midnight
349-
} catch (java.text.ParseException e) {
350-
// null
351-
}
352-
return jsd;
350+
return epoch + 43200000; // midday rather than midnight
351+
}
353352
}
354353

355-
private java.sql.Timestamp parseTime(JsonParser parser) throws JsonParseException, IOException {
356-
java.sql.Timestamp jst = null;
354+
// Complete handling of absent/invalid date-time as SQL NULL would be broader scope.
355+
private long parseTime(JsonParser parser) throws JsonParseException, IOException, java.text.ParseException {
356+
long epoch = -9999L;
357+
java.text.ParseException jtpe = null;
357358
if (JsonToken.VALUE_NUMBER_INT.equals(parser.getCurrentToken())) {
358-
long epoch = parser.getLongValue();
359-
jst = new java.sql.Timestamp(epoch);
359+
epoch = parser.getLongValue();
360360
} else {
361361
String value = parser.getText();
362362
int point = value.indexOf('.');
363363
String dateStr = (point < 0) ? value : value.substring(0,point+4);
364364
String[] formats = {"yyyy-MM-dd HH:mm:ss.SSS", "yyyy-MM-dd HH:mm:ss", "yyyy-MM-dd HH:mm", "yyyy-MM-dd"};
365365
for (String format: formats) {
366366
try {
367-
jst = new java.sql.Timestamp(parseTime(dateStr, format));
367+
epoch = parseTime(dateStr, format);
368+
jtpe = null;
368369
break;
369-
} catch (java.text.ParseException e) {
370-
// remain null after this attempted format
370+
} catch (java.text.ParseException exc) {
371+
if (null == jtpe)
372+
jtpe = exc;
371373
}
372374
}
373375
} // else String value
374-
return jst;
376+
if (null == jtpe)
377+
return epoch;
378+
else
379+
throw jtpe;
375380
}
376381

377382
private long parseTime(String value, String format) throws java.text.ParseException { // epoch
@@ -388,8 +393,10 @@ private long parseTime(String value, String format) throws java.text.ParseExcept
388393
* @param parser JsonParser pointing to the attribute
389394
* @throws JsonParseException
390395
* @throws IOException
396+
* @throws ParseException
391397
*/
392-
private void setRowFieldFromParser(int fieldIndex, JsonParser parser) throws JsonParseException, IOException{
398+
private void setRowFieldFromParser(int fieldIndex, JsonParser parser) throws
399+
JsonParseException, IOException, java.text.ParseException {
393400

394401
PrimitiveObjectInspector poi = (PrimitiveObjectInspector)this.columnOIs.get(fieldIndex);
395402
if (JsonToken.VALUE_NULL == parser.getCurrentToken())
@@ -420,7 +427,7 @@ private void setRowFieldFromParser(int fieldIndex, JsonParser parser) throws Jso
420427
case BOOLEAN:
421428
((BooleanWritable)row.get(fieldIndex)).set(parser.getBooleanValue());
422429
break;
423-
case DATE: // DateWritable stores days not milliseconds.
430+
case DATE: // DateWritable stores days not milliseconds. See also HIVE-12192.
424431
HiveShims.setDateWritable(row.get(fieldIndex), parseDate(parser));
425432
break;
426433
case TIMESTAMP:

hive/src/main/java/com/esri/hadoop/shims/HiveShims.java

Lines changed: 7 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
import java.lang.reflect.Method;
44
import java.util.TimeZone;
55

6-
6+
/**
7+
* These shims are internal to Spatial-Framework-for-Hadoop and subject to change without notice.
8+
*/
79
public class HiveShims {
810

911
/**
@@ -48,7 +50,7 @@ static String getAsStringOrNull(Class<?> clazz, String constant) {
4850
/**
4951
* Classes o.a.h.h.common.type Date & Timestamp were introduced in Hive-3.1 version.
5052
*/
51-
public static Long getPrimitiveEpoch(Object prim, TimeZone tz) {
53+
public static Long getPrimitiveEpoch(Object prim) {
5254
if (prim instanceof java.sql.Timestamp) {
5355
return ((java.sql.Timestamp)prim).getTime();
5456
} else if (prim instanceof java.util.Date) {
@@ -78,9 +80,8 @@ public static Long getPrimitiveEpoch(Object prim, TimeZone tz) {
7880
* Type DATE was introduced in Hive-0.12 - class DateWritable in API.
7981
* Class DateWritableV2 is used instead as of Hive-3.1 version.
8082
*/
81-
public static void setDateWritable(Object dwHive, long epoch
82-
, TimeZone tz
83-
) {
83+
// See HIVE-12192.
84+
public static void setDateWritable(Object dwHive, long epoch) {
8485
try { // Hive 3.1 and above
8586
Class<?> dtClazz = Class.forName("org.apache.hadoop.hive.common.type.Date");
8687
Class<?> dwClazz = Class.forName("org.apache.hadoop.hive.serde2.io.DateWritableV2");
@@ -97,32 +98,7 @@ public static void setDateWritable(Object dwHive, long epoch
9798
} catch (Exception e2) { // Hive 0.11 and below
9899
// column type DATE not supported
99100
throw new UnsupportedOperationException("DATE type");
100-
}
101-
}
102-
} // setDateWritable
103-
104-
/**
105-
* Type DATE was introduced in Hive-0.12 - class DateWritable in API.
106-
* Class DateWritableV2 is used instead as of Hive-3.1 version.
107-
*/
108-
public static void setDateWritable(Object dwHive, java.sql.Date jsd) {
109-
try { // Hive 3.1 and above
110-
Class<?> dtClazz = Class.forName("org.apache.hadoop.hive.common.type.Date");
111-
Class<?> dwClazz = Class.forName("org.apache.hadoop.hive.serde2.io.DateWritableV2");
112-
Method dtSetImpl = dtClazz.getMethod("setTimeInMillis", long.class);
113-
Method dwSetImpl = dwClazz.getMethod("set", dtClazz);
114-
Object dtObj = dtClazz.getConstructor().newInstance();
115-
dtSetImpl.invoke(dtObj, jsd.getTime());
116-
dwSetImpl.invoke(dwHive, dtObj);
117-
} catch (Exception e1) {
118-
try { // Hive 0.12 and above
119-
Class<?> dwClazz = Class.forName("org.apache.hadoop.hive.serde2.io.DateWritable");
120-
Method dwSetImpl = dwClazz.getMethod("set", java.sql.Date.class);
121-
dwSetImpl.invoke(dwHive, jsd);
122-
} catch (Exception e2) { // Hive 0.11 and below
123-
// column type DATE not supported
124-
throw new UnsupportedOperationException("DATE type");
125-
}
101+
}
126102
}
127103
} // setDateWritable
128104

@@ -151,29 +127,4 @@ public static void setTimeWritable(Object twHive, long epoch) {
151127
}
152128
} // setTimeWritable
153129

154-
/**
155-
* Type TIMESTAMP was introduced in Hive-0.12 - class TimestampWritable in API.
156-
* Class TimestampWritableV2 is used instead as of Hive-3.1 version.
157-
*/
158-
public static void setTimeWritable(Object twHive, java.sql.Timestamp jst) {
159-
long epoch = jst.getTime();
160-
try { // Hive 3.1 and above
161-
Class<?> ttClazz = Class.forName("org.apache.hadoop.hive.common.type.Timestamp");
162-
Class<?> twClazz = Class.forName("org.apache.hadoop.hive.serde2.io.TimestampWritableV2");
163-
Method ttSetImpl = ttClazz.getMethod("setTimeInMillis", long.class);
164-
Method twSetImpl = twClazz.getMethod("set", ttClazz);
165-
Object ttObj = ttClazz.getConstructor().newInstance();
166-
ttSetImpl.invoke(ttObj, epoch);
167-
twSetImpl.invoke(twHive, ttObj);
168-
} catch (Exception e1) {
169-
try { // Hive 0.12 and above
170-
Class<?> twClazz = Class.forName("org.apache.hadoop.hive.serde2.io.TimestampWritable");
171-
Method twSetImpl = twClazz.getMethod("set", java.sql.Timestamp.class);
172-
twSetImpl.invoke(twHive, new java.sql.Timestamp(epoch));
173-
} catch (Exception e2) { // Hive 0.11 and below
174-
// column type TIMESTAMP not supported
175-
throw new UnsupportedOperationException("TIMESTAMP type");
176-
}
177-
}
178-
} // setTimeWritable
179130
}

hive/src/test/java/com/esri/hadoop/hive/serde/TestEsriJsonSerDe.java

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import org.junit.Assert;
44
import org.junit.Test;
55

6-
import java.lang.reflect.Method;
76
import java.text.SimpleDateFormat;
87
import java.util.ArrayList;
98
import java.util.Properties;
@@ -65,26 +64,37 @@ public void TestIntWrite() throws Exception {
6564

6665
@Test
6766
public void TestEpochWrite() throws Exception {
68-
ArrayList<Object> stuff = new ArrayList<Object>();
6967
Properties proptab = new Properties();
7068
proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMNS, "when");
7169
proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMN_TYPES, "date");
7270
AbstractSerDe jserde = mkSerDe(proptab);
7371
StructObjectInspector rowOI = (StructObjectInspector)jserde.getObjectInspector();
72+
long day = 24*3600*1000; // DateWritable represents days not milliseconds.
7473

75-
// {"attributes":{"when":147147147147}}
76-
long epoch = 147147147147L;
77-
long zoned = epoch - TimeZone.getDefault().getOffset(epoch);
78-
java.sql.Date expected = new java.sql.Date(zoned);
79-
addWritable(stuff, expected);
74+
long epoch = 1641535200000L; // 2022-01-07 06:00 UTC
75+
//long zoned = epoch - TimeZone.getDefault().getOffset(epoch);
76+
java.sql.Date jsd = new java.sql.Date(epoch); // zoned?
77+
ArrayList<Object> stuff = new ArrayList<Object>();
78+
addWritable(stuff, jsd);
8079
Writable jsw = jserde.serialize(stuff, rowOI);
8180
JsonNode jn = new ObjectMapper().readTree(((Text)jsw).toString());
8281
jn = jn.findValue("attributes");
8382
jn = jn.findValue("when");
84-
java.sql.Date actual = new java.sql.Date(jn.getLongValue());
85-
long day = 24*3600*1000; // DateWritable represents days not milliseconds.
83+
Assert.assertEquals(epoch/day, jn.getLongValue()/day);
84+
85+
epoch = 1641578400000L; // 2022-01-07 18:00 UTC
86+
//long zoned = epoch - TimeZone.getDefault().getOffset(epoch);
87+
jsd = new java.sql.Date(epoch); // zoned?
88+
stuff = new ArrayList<Object>();
89+
addWritable(stuff, jsd);
90+
jsw = jserde.serialize(stuff, rowOI);
91+
jn = new ObjectMapper().readTree(((Text)jsw).toString());
92+
jn = jn.findValue("attributes");
93+
jn = jn.findValue("when");
94+
System.err.println(jn);
8695
Assert.assertEquals(epoch/day, jn.getLongValue()/day);
8796
}
97+
8898
@Test
8999
public void TestTimeWrite() throws Exception {
90100
ArrayList<Object> stuff = new ArrayList<Object>();
@@ -171,7 +181,6 @@ public void TestDateParse() throws Exception {
171181
Object row = jserde.deserialize(value);
172182
StructField f0 = rowOI.getStructFieldRef("when");
173183
Object fieldData = rowOI.getStructFieldData(row, f0);
174-
175184
Assert.assertEquals(dateStr, iso8601FromWritable(fieldData));
176185
dateStr = "2017-05-05";
177186
value.set("{\"attributes\":{\"when\":\"" + dateStr + "\"}}");
@@ -191,18 +200,20 @@ public void TestEpochParse() throws Exception {
191200
proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMN_TYPES, "date");
192201
jserde.initialize(config, proptab);
193202
StructObjectInspector rowOI = (StructObjectInspector)jserde.getObjectInspector();
203+
// Half a day apart to test both a.m. & p.m. whether in East or West
194204

195-
value.set("{\"attributes\":{\"when\":147147147147}}");
205+
value.set("{\"attributes\":{\"when\":1641535200000}}"); // 2022-01-07 06:00 UTC
196206
Object row = jserde.deserialize(value);
197207
StructField f0 = rowOI.getStructFieldRef("when");
198208
Object fieldData = rowOI.getStructFieldData(row, f0);
199209
long day = 24*3600*1000; // DateWritable represents days not milliseconds.
200-
long epochExpected = 147147147147L;
210+
long epochExpected = 1641535200000L; // or likely 00:00 UTC
201211
Assert.assertEquals(epochExpected/day, epochFromWritable(fieldData)/day);
202-
value.set("{\"attributes\":{\"when\":142857142857}}");
212+
213+
value.set("{\"attributes\":{\"when\":1641578400000}}"); // 2022-01-07 18:00 UTC
203214
row = jserde.deserialize(value);
204215
fieldData = rowOI.getStructFieldData(row, f0);
205-
epochExpected = 142857142857L;
216+
epochExpected = 1641578400000L; // or likely 00:00 UTC
206217
Assert.assertEquals(epochExpected/day, epochFromWritable(fieldData)/day);
207218
}
208219

hive/src/test/java/com/esri/hadoop/hive/serde/TestGeoJsonSerDe.java

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -54,24 +54,33 @@ public void TestIntWrite() throws Exception {
5454

5555
@Test
5656
public void TestEpochWrite() throws Exception {
57-
ArrayList<Object> stuff = new ArrayList<Object>();
5857
Properties proptab = new Properties();
5958
proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMNS, "when");
6059
proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMN_TYPES, "date");
6160
AbstractSerDe jserde = mkSerDe(proptab);
6261
StructObjectInspector rowOI = (StructObjectInspector)jserde.getObjectInspector();
62+
long day = 24*3600*1000; // DateWritable represents days not milliseconds.
6363

64-
// {"properties":{"when":147147147147}}
65-
long epoch = 147147147147L;
66-
long zoned = epoch - TimeZone.getDefault().getOffset(epoch);
67-
java.sql.Date expected = new java.sql.Date(zoned);
68-
addWritable(stuff, expected);
64+
long epoch = 1641535200000L; // 2022-01-07 06:00 UTC
65+
java.sql.Date jsd = new java.sql.Date(epoch); // zoned?
66+
ArrayList<Object> stuff = new ArrayList<Object>();
67+
addWritable(stuff, jsd);
6968
Writable jsw = jserde.serialize(stuff, rowOI);
7069
JsonNode jn = new ObjectMapper().readTree(((Text)jsw).toString());
7170
jn = jn.findValue("properties");
7271
jn = jn.findValue("when");
73-
java.sql.Date actual = new java.sql.Date(jn.getLongValue());
74-
long day = 24*3600*1000; // DateWritable represents days not milliseconds.
72+
Assert.assertEquals(epoch/day, jn.getLongValue()/day);
73+
74+
epoch = 1641578400000L; // 2022-01-07 18:00 UTC
75+
//long zoned = epoch - TimeZone.getDefault().getOffset(epoch);
76+
jsd = new java.sql.Date(epoch); // zoned?
77+
stuff = new ArrayList<Object>();
78+
addWritable(stuff, jsd);
79+
jsw = jserde.serialize(stuff, rowOI);
80+
jn = new ObjectMapper().readTree(((Text)jsw).toString());
81+
jn = jn.findValue("properties");
82+
jn = jn.findValue("when");
83+
System.err.println(jn);
7584
Assert.assertEquals(epoch/day, jn.getLongValue()/day);
7685
}
7786

@@ -158,18 +167,20 @@ public void TestEpochParse() throws Exception {
158167
proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMN_TYPES, "date");
159168
jserde.initialize(config, proptab);
160169
StructObjectInspector rowOI = (StructObjectInspector)jserde.getObjectInspector();
170+
// Half a day apart to test both a.m. & p.m. whether in East or West
161171

162-
value.set("{\"properties\":{\"when\":147147147147}}");
172+
value.set("{\"properties\":{\"when\":1641535200000}}"); // 2022-01-07 06:00 UTC
163173
Object row = jserde.deserialize(value);
164174
StructField f0 = rowOI.getStructFieldRef("when");
165175
Object fieldData = rowOI.getStructFieldData(row, f0);
166176
long day = 24*3600*1000; // DateWritable represents days not milliseconds.
167-
long epochExpected = 147147147147L;
177+
long epochExpected = 1641535200000L; // or likely 00:00 UTC
168178
Assert.assertEquals(epochExpected/day, epochFromWritable(fieldData)/day);
169-
value.set("{\"properties\":{\"when\":142857142857}}");
179+
180+
value.set("{\"properties\":{\"when\":1641578400000}}"); // 2022-01-07 18:00 UTC
170181
row = jserde.deserialize(value);
171182
fieldData = rowOI.getStructFieldData(row, f0);
172-
epochExpected = 142857142857L;
183+
epochExpected = 1641578400000L; // or likely 00:00 UTC
173184
Assert.assertEquals(epochExpected/day, epochFromWritable(fieldData)/day);
174185
}
175186

0 commit comments

Comments
 (0)