Skip to content

Commit 3313308

Browse files
JSON SerDe: date-time columns for both Hive-3.1 and prior (#174)
- POM: dependency versions
1 parent 4666070 commit 3313308

File tree

6 files changed

+409
-139
lines changed

6 files changed

+409
-139
lines changed

hive/src/main/java/com/esri/hadoop/hive/serde/BaseJsonSerDe.java

Lines changed: 24 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ public Object deserialize(Writable json_in) throws SerDeException {
147147

148148
// null out array because we reuse it and we don't want values persisting
149149
// from the last record
150-
for (int i=0;i<numColumns;i++)
150+
for (int i=0; i<numColumns; i++)
151151
row.set(i, null);
152152

153153
try {
@@ -321,12 +321,11 @@ private void generateJsonFromWritable(Writable value, int fieldIndex, String lab
321321
PrimitiveObjectInspector poi, JsonGenerator jsonGen)
322322
throws JsonProcessingException, IOException {
323323
Object prim = poi.getPrimitiveJavaObject(value);
324-
if (prim instanceof java.util.Date) {
325-
long epoch = ((java.util.Date)prim).getTime();
326-
long offset = prim instanceof java.sql.Timestamp ? 0 : tz.getOffset(epoch);
327-
jsonGen.writeObjectField(label, epoch - offset); // UTC
328-
} else {
324+
Long epoch = HiveShims.getPrimitiveEpoch(prim, tz);
325+
if (epoch == null) { // anything but a recognized DATE or TIMESTAMP
329326
jsonGen.writeObjectField(label, prim);
327+
} else {
328+
jsonGen.writeObjectField(label, epoch);
330329
}
331330
}
332331

@@ -339,11 +338,14 @@ private void generateJsonFromWritable(Writable value, int fieldIndex, String lab
339338
private java.sql.Date parseDate(JsonParser parser) throws JsonParseException, IOException {
340339
java.sql.Date jsd = null;
341340
if (JsonToken.VALUE_NUMBER_INT.equals(parser.getCurrentToken())) {
341+
// DateWritable#daysToMillis adjusts the numerical/epoch time
342+
// to midnight in the local time zone. See HIVE-12192.
343+
// Attempt to compensate, when date provided as epoch, which is unambiguously UTC.
342344
long epoch = parser.getLongValue();
343-
jsd = new java.sql.Date(epoch);
345+
jsd = new java.sql.Date(epoch - tz.getOffset(epoch));
344346
} else try {
345347
long epoch = parseTime(parser.getText(), "yyyy-MM-dd");
346-
jsd = new java.sql.Date(epoch);
348+
jsd = new java.sql.Date(epoch + 43200000); // midday rather than midnight
347349
} catch (java.text.ParseException e) {
348350
// null
349351
}
@@ -358,35 +360,24 @@ private java.sql.Timestamp parseTime(JsonParser parser) throws JsonParseExceptio
358360
} else {
359361
String value = parser.getText();
360362
int point = value.indexOf('.');
361-
if (point >= 0) {
362-
jst = parseTime(value.substring(0,point+4)); // "yyyy-MM-dd HH:mm:ss.SSS" - truncate
363-
// idea: jst.setNanos; alt: Java-8, JodaTime, javax.xml.bind.DatatypeConverter
364-
} else {
365-
jst = parseTime(value); // "yyyy-MM-dd HH:mm:ss.SSS"
366-
String[] formats = {"yyyy-MM-dd HH:mm:ss","yyyy-MM-dd HH:mm", "yyyy-MM-dd"};
367-
for (String format: formats) {
368-
if (jst != null) break;
369-
try {
370-
jst = new java.sql.Timestamp(parseTime(value, format));
371-
} catch (java.text.ParseException e) {
372-
// remain null
373-
}
363+
String dateStr = (point < 0) ? value : value.substring(0,point+4);
364+
String[] formats = {"yyyy-MM-dd HH:mm:ss.SSS", "yyyy-MM-dd HH:mm:ss", "yyyy-MM-dd HH:mm", "yyyy-MM-dd"};
365+
for (String format: formats) {
366+
try {
367+
jst = new java.sql.Timestamp(parseTime(dateStr, format));
368+
break;
369+
} catch (java.text.ParseException e) {
370+
// remain null after this attempted format
374371
}
375372
}
376-
}
373+
} // else String value
377374
return jst;
378375
}
379376

380-
private java.sql.Timestamp parseTime(String value) {
381-
try {
382-
return java.sql.Timestamp.valueOf(value);
383-
} catch (IllegalArgumentException iae) {
384-
return null;
385-
}
386-
}
387-
388377
private long parseTime(String value, String format) throws java.text.ParseException { // epoch
389-
return new java.text.SimpleDateFormat(format).parse(value).getTime();
378+
java.text.SimpleDateFormat dtFmt = new java.text.SimpleDateFormat(format);
379+
dtFmt.setTimeZone(TimeZone.getTimeZone("UTC"));
380+
return dtFmt.parse(value).getTime();
390381
}
391382

392383
/**
@@ -430,10 +421,10 @@ private void setRowFieldFromParser(int fieldIndex, JsonParser parser) throws Jso
430421
((BooleanWritable)row.get(fieldIndex)).set(parser.getBooleanValue());
431422
break;
432423
case DATE: // DateWritable stores days not milliseconds.
433-
((DateWritable)row.get(fieldIndex)).set(parseDate(parser));
424+
HiveShims.setDateWritable(row.get(fieldIndex), parseDate(parser));
434425
break;
435426
case TIMESTAMP:
436-
((TimestampWritable)row.get(fieldIndex)).set(parseTime(parser));
427+
HiveShims.setTimeWritable(row.get(fieldIndex), parseTime(parser));
437428
break;
438429
default: // STRING/unrecognized
439430
((Text)row.get(fieldIndex)).set(parser.getText());
Lines changed: 143 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,31 @@
11
package com.esri.hadoop.shims;
22

3+
import java.lang.reflect.Method;
4+
import java.util.TimeZone;
5+
6+
37
public class HiveShims {
48

59
/**
610
* This class is supplied for compatibility between Hive versions.
7-
* At 10.0 the serde constants were moved to another package. Also,
8-
* at 11.0 the previous class will be re-added for backwards
9-
* compatibility, but deprecated
11+
* At 0.10 the serde constants were moved to another package. Also,
12+
* at 0.11 the previous class will be re-added for backwards
13+
* compatibility, but deprecated.
1014
*
1115
*/
1216
public static class serdeConstants {
1317
public static final String LIST_COLUMNS;
1418
public static final String LIST_COLUMN_TYPES;
15-
19+
1620
static {
1721
Class<?> clazz = null;
1822

1923
try {
20-
// Hive 10 and above constants
24+
// Hive 0.10 and above constants
2125
clazz = Class.forName("org.apache.hadoop.hive.serde.serdeConstants");
2226
} catch (ClassNotFoundException e) {
2327
try {
24-
// Hive 9 and below constants
28+
// Hive 0.9 and below constants
2529
clazz = Class.forName("org.apache.hadoop.hive.serde.Constants");
2630
} catch (ClassNotFoundException e1) {
2731
// not much we can do here
@@ -31,7 +35,7 @@ public static class serdeConstants {
3135
LIST_COLUMNS = getAsStringOrNull(clazz, "LIST_COLUMNS");
3236
LIST_COLUMN_TYPES = getAsStringOrNull(clazz, "LIST_COLUMN_TYPES");
3337
}
34-
38+
3539
static String getAsStringOrNull(Class<?> clazz, String constant) {
3640
try {
3741
return (String) clazz.getField(constant).get(null);
@@ -40,4 +44,136 @@ static String getAsStringOrNull(Class<?> clazz, String constant) {
4044
}
4145
}
4246
}
47+
48+
/**
49+
* Classes o.a.h.h.common.type Date & Timestamp were introduced in Hive-3.1 version.
50+
*/
51+
public static Long getPrimitiveEpoch(Object prim, TimeZone tz) {
52+
if (prim instanceof java.sql.Timestamp) {
53+
return ((java.sql.Timestamp)prim).getTime();
54+
} else if (prim instanceof java.util.Date) {
55+
return ((java.util.Date)prim).getTime();
56+
} else {
57+
try {
58+
Class<?> dtClazz = Class.forName("org.apache.hadoop.hive.common.type.Date");
59+
if (prim.getClass() == dtClazz) {
60+
Method dtGetImpl = dtClazz.getMethod("toEpochMilli");
61+
return (java.lang.Long)(dtGetImpl.invoke(prim));
62+
} else {
63+
Class<?> ttClazz = Class.forName("org.apache.hadoop.hive.common.type.Timestamp");
64+
if (prim.getClass() == ttClazz) {
65+
Method ttGetImpl = ttClazz.getMethod("toEpochMilli");
66+
return (java.lang.Long)(ttGetImpl.invoke(prim));
67+
} else {
68+
return null;
69+
}
70+
}
71+
} catch (Exception exc) {
72+
return null;
73+
}
74+
}
75+
}
76+
77+
/**
78+
* Type DATE was introduced in Hive-0.12 - class DateWritable in API.
79+
* Class DateWritableV2 is used instead as of Hive-3.1 version.
80+
*/
81+
public static void setDateWritable(Object dwHive, long epoch
82+
, TimeZone tz
83+
) {
84+
try { // Hive 3.1 and above
85+
Class<?> dtClazz = Class.forName("org.apache.hadoop.hive.common.type.Date");
86+
Class<?> dwClazz = Class.forName("org.apache.hadoop.hive.serde2.io.DateWritableV2");
87+
Method dtSetImpl = dtClazz.getMethod("setTimeInMillis", long.class);
88+
Method dwSetImpl = dwClazz.getMethod("set", dtClazz);
89+
Object dtObj = dtClazz.getConstructor().newInstance();
90+
dtSetImpl.invoke(dtObj, epoch);
91+
dwSetImpl.invoke(dwHive, dtObj);
92+
} catch (Exception e1) {
93+
try { // Hive 0.12 and above
94+
Class<?> dwClazz = Class.forName("org.apache.hadoop.hive.serde2.io.DateWritable");
95+
Method dwSetImpl = dwClazz.getMethod("set", java.sql.Date.class);
96+
dwSetImpl.invoke(dwHive, new java.sql.Date(epoch));
97+
} catch (Exception e2) { // Hive 0.11 and below
98+
// column type DATE not supported
99+
throw new UnsupportedOperationException("DATE type");
100+
}
101+
}
102+
} // setDateWritable
103+
104+
/**
105+
* Type DATE was introduced in Hive-0.12 - class DateWritable in API.
106+
* Class DateWritableV2 is used instead as of Hive-3.1 version.
107+
*/
108+
public static void setDateWritable(Object dwHive, java.sql.Date jsd) {
109+
try { // Hive 3.1 and above
110+
Class<?> dtClazz = Class.forName("org.apache.hadoop.hive.common.type.Date");
111+
Class<?> dwClazz = Class.forName("org.apache.hadoop.hive.serde2.io.DateWritableV2");
112+
Method dtSetImpl = dtClazz.getMethod("setTimeInMillis", long.class);
113+
Method dwSetImpl = dwClazz.getMethod("set", dtClazz);
114+
Object dtObj = dtClazz.getConstructor().newInstance();
115+
dtSetImpl.invoke(dtObj, jsd.getTime());
116+
dwSetImpl.invoke(dwHive, dtObj);
117+
} catch (Exception e1) {
118+
try { // Hive 0.12 and above
119+
Class<?> dwClazz = Class.forName("org.apache.hadoop.hive.serde2.io.DateWritable");
120+
Method dwSetImpl = dwClazz.getMethod("set", java.sql.Date.class);
121+
dwSetImpl.invoke(dwHive, jsd);
122+
} catch (Exception e2) { // Hive 0.11 and below
123+
// column type DATE not supported
124+
throw new UnsupportedOperationException("DATE type");
125+
}
126+
}
127+
} // setDateWritable
128+
129+
/**
130+
* Type TIMESTAMP was introduced in Hive-0.12 - class TimestampWritable in API.
131+
* Class TimestampWritableV2 is used instead as of Hive-3.1 version.
132+
*/
133+
public static void setTimeWritable(Object twHive, long epoch) {
134+
try { // Hive 3.1 and above
135+
Class<?> ttClazz = Class.forName("org.apache.hadoop.hive.common.type.Timestamp");
136+
Class<?> twClazz = Class.forName("org.apache.hadoop.hive.serde2.io.TimestampWritableV2");
137+
Method ttSetImpl = ttClazz.getMethod("setTimeInMillis", long.class);
138+
Method twSetImpl = twClazz.getMethod("set", ttClazz);
139+
Object ttObj = ttClazz.getConstructor().newInstance();
140+
ttSetImpl.invoke(ttObj, epoch);
141+
twSetImpl.invoke(twHive, ttObj);
142+
} catch (Exception e1) {
143+
try { // Hive 0.12 and above
144+
Class<?> twClazz = Class.forName("org.apache.hadoop.hive.serde2.io.TimestampWritable");
145+
Method twSetImpl = twClazz.getMethod("set", java.sql.Timestamp.class);
146+
twSetImpl.invoke(twHive, new java.sql.Timestamp(epoch));
147+
} catch (Exception e2) { // Hive 0.11 and below
148+
// column type TIMESTAMP not supported
149+
throw new UnsupportedOperationException("TIMESTAMP type");
150+
}
151+
}
152+
} // setTimeWritable
153+
154+
/**
155+
* Type TIMESTAMP was introduced in Hive-0.12 - class TimestampWritable in API.
156+
* Class TimestampWritableV2 is used instead as of Hive-3.1 version.
157+
*/
158+
public static void setTimeWritable(Object twHive, java.sql.Timestamp jst) {
159+
long epoch = jst.getTime();
160+
try { // Hive 3.1 and above
161+
Class<?> ttClazz = Class.forName("org.apache.hadoop.hive.common.type.Timestamp");
162+
Class<?> twClazz = Class.forName("org.apache.hadoop.hive.serde2.io.TimestampWritableV2");
163+
Method ttSetImpl = ttClazz.getMethod("setTimeInMillis", long.class);
164+
Method twSetImpl = twClazz.getMethod("set", ttClazz);
165+
Object ttObj = ttClazz.getConstructor().newInstance();
166+
ttSetImpl.invoke(ttObj, epoch);
167+
twSetImpl.invoke(twHive, ttObj);
168+
} catch (Exception e1) {
169+
try { // Hive 0.12 and above
170+
Class<?> twClazz = Class.forName("org.apache.hadoop.hive.serde2.io.TimestampWritable");
171+
Method twSetImpl = twClazz.getMethod("set", java.sql.Timestamp.class);
172+
twSetImpl.invoke(twHive, new java.sql.Timestamp(epoch));
173+
} catch (Exception e2) { // Hive 0.11 and below
174+
// column type TIMESTAMP not supported
175+
throw new UnsupportedOperationException("TIMESTAMP type");
176+
}
177+
}
178+
} // setTimeWritable
43179
}

0 commit comments

Comments
 (0)