Skip to content

Commit 20be766

Browse files
authored
[FEAT] Converting a Row to a JSON string
### What changes were proposed in this pull request? This PR adds JSON serialization capability to the Row interface by implementing a `ToJsonString()` method. The implementation includes: - Added `ToJsonString()` method to the Row interface that converts Row data to a JSON string representation - Implemented comprehensive `convertToJsonValue()` function that handles all Spark data types including: - Basic types (bool, string, integers, floats) - Binary data (base64 encoded) - Decimal types (decimal128/256 as string representations) - Arrow timestamp and date types (RFC3339 and ISO date formats) - Arrays and nested structures with recursive conversion - Maps with string keys (validates non-string keys return errors) - Custom types using reflection fallback for underlying basic types - Added extensive test suite with 238 lines covering all supported data types and error scenarios ### Why are the changes needed? This feature enables users to easily serialize Row data to JSON format, which is essential for: - Data export and interoperability with other systems - Debugging and logging Row contents in human-readable format - Integration with REST APIs and web services that expect JSON - Data analysis workflows that require JSON output Currently, there's no built-in way to convert Row data to JSON, forcing users to manually iterate through fields and handle type conversions. ### Does this PR introduce _any_ user-facing change? Yes. This PR adds a new public method `ToJsonString() (string, error)` to the Row interface. Users can now call this method on any Row instance to get a JSON string representation: ```go row := // ... get a Row from DataFrame operations jsonStr, err := row.ToJsonString() if err != nil { // handle conversion error } // jsonStr contains: {"field1": "value1", "field2": 42, ...} ``` ### How was this patch tested? Added comprehensive unit tests in spark/sql/types/row_json_test.go covering: - Basic data types: strings, integers, floats, booleans, nil values - Binary data: byte arrays converted to base64 encoding - Decimal types: decimal128 and decimal256 number representations - Temporal types: Arrow timestamps, Date32, Date64, and Go time.Time - Collection types: arrays with recursive element conversion - Map types: both map[string]any and map[any]any with string key validation - Nested structures: complex combinations of arrays and maps - Error cases: invalid map keys and conversion failures - JSON validity: all outputs are verified to be valid JSON through round-trip parsing Tests ensure both successful conversions produce expected JSON and error cases properly return meaningful error messages. Closes #162 from grundprinzip/json_value. Authored-by: Martin Grund <martin.grund@databricks.com> Signed-off-by: Martin Grund <martin.grund@databricks.com>
1 parent 7a9a51d commit 20be766

File tree

2 files changed

+403
-0
lines changed

2 files changed

+403
-0
lines changed

spark/sql/types/row.go

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,16 @@
1717
package types
1818

1919
import (
20+
"encoding/base64"
21+
"encoding/json"
22+
"fmt"
2023
"maps"
24+
"reflect"
25+
"time"
26+
27+
"github.com/apache/arrow-go/v18/arrow"
28+
"github.com/apache/arrow-go/v18/arrow/decimal128"
29+
"github.com/apache/arrow-go/v18/arrow/decimal256"
2130
)
2231

2332
type Row interface {
@@ -32,6 +41,9 @@ type Row interface {
3241
// Len returns the number of fields within a [Row].
3342
Len() int
3443
FieldNames() []string
44+
// ToJsonString converts the Row to a JSON string representation.
45+
// Returns an error if the row contains data that cannot be properly represented in JSON.
46+
ToJsonString() (string, error)
3547
}
3648

3749
type rowImpl struct {
@@ -70,3 +82,156 @@ func (r *rowImpl) FieldNames() []string {
7082
}
7183
return names
7284
}
85+
86+
func (r *rowImpl) ToJsonString() (string, error) {
87+
jsonMap := make(map[string]any)
88+
fieldNames := r.FieldNames()
89+
90+
for i, fieldName := range fieldNames {
91+
value := r.values[i]
92+
convertedValue, err := convertToJsonValue(value)
93+
if err != nil {
94+
return "", fmt.Errorf("failed to convert field '%s': %w", fieldName, err)
95+
}
96+
jsonMap[fieldName] = convertedValue
97+
}
98+
99+
jsonBytes, err := json.Marshal(jsonMap)
100+
if err != nil {
101+
return "", fmt.Errorf("failed to marshal JSON: %w", err)
102+
}
103+
104+
return string(jsonBytes), nil
105+
}
106+
107+
func convertToJsonValue(value any) (any, error) {
108+
if value == nil {
109+
return nil, nil
110+
}
111+
112+
switch v := value.(type) {
113+
case bool, string, int8, int16, int32, int64, float32, float64:
114+
return v, nil
115+
116+
case []byte:
117+
return base64.StdEncoding.EncodeToString(v), nil
118+
119+
case decimal128.Num:
120+
return v.BigInt().String(), nil
121+
122+
case decimal256.Num:
123+
return v.BigInt().String(), nil
124+
125+
case arrow.Timestamp:
126+
epochUs := int64(v)
127+
t := time.Unix(epochUs/1000000, (epochUs%1000000)*1000).UTC()
128+
return t.Format(time.RFC3339), nil
129+
130+
case arrow.Date32:
131+
epochDays := int64(v)
132+
epochTime := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC).AddDate(0, 0, int(epochDays))
133+
return epochTime.Format("2006-01-02"), nil
134+
135+
case arrow.Date64:
136+
epochMs := int64(v)
137+
t := time.Unix(epochMs/1000, (epochMs%1000)*1000000).UTC()
138+
return t.Format("2006-01-02"), nil
139+
140+
case time.Time:
141+
if v.IsZero() {
142+
return nil, nil
143+
}
144+
return v.Format(time.RFC3339), nil
145+
146+
case []any:
147+
result := make([]any, len(v))
148+
for i, item := range v {
149+
convertedItem, err := convertToJsonValue(item)
150+
if err != nil {
151+
return nil, fmt.Errorf("failed to convert array element at index %d: %w", i, err)
152+
}
153+
result[i] = convertedItem
154+
}
155+
return result, nil
156+
157+
case map[any]any:
158+
result := make(map[string]any)
159+
for key, val := range v {
160+
keyStr, ok := key.(string)
161+
if !ok {
162+
return nil, fmt.Errorf("map key must be string for JSON conversion, got %T", key)
163+
}
164+
convertedVal, err := convertToJsonValue(val)
165+
if err != nil {
166+
return nil, fmt.Errorf("failed to convert map value for key '%s': %w", keyStr, err)
167+
}
168+
result[keyStr] = convertedVal
169+
}
170+
return result, nil
171+
172+
case map[string]any:
173+
result := make(map[string]any)
174+
for key, val := range v {
175+
convertedVal, err := convertToJsonValue(val)
176+
if err != nil {
177+
return nil, fmt.Errorf("failed to convert map value for key '%s': %w", key, err)
178+
}
179+
result[key] = convertedVal
180+
}
181+
return result, nil
182+
183+
default:
184+
// Use reflection to handle custom types that have basic types as their underlying type.
185+
// For example, a custom type like "type MyInt int32" would not match the explicit
186+
// int32 case above, but would match reflect.Int32 here. This ensures we can still
187+
// convert custom integer, float, bool, and string types to their JSON representations.
188+
rv := reflect.ValueOf(value)
189+
switch rv.Kind() {
190+
case reflect.Slice, reflect.Array:
191+
length := rv.Len()
192+
result := make([]any, length)
193+
for i := 0; i < length; i++ {
194+
convertedItem, err := convertToJsonValue(rv.Index(i).Interface())
195+
if err != nil {
196+
return nil, fmt.Errorf("failed to convert array element at index %d: %w", i, err)
197+
}
198+
result[i] = convertedItem
199+
}
200+
return result, nil
201+
202+
case reflect.Map:
203+
if rv.Type().Key().Kind() != reflect.String {
204+
return nil, fmt.Errorf("map key must be string for JSON conversion, got %s", rv.Type().Key().Kind())
205+
}
206+
result := make(map[string]any)
207+
for _, key := range rv.MapKeys() {
208+
keyStr := key.String()
209+
val := rv.MapIndex(key)
210+
convertedVal, err := convertToJsonValue(val.Interface())
211+
if err != nil {
212+
return nil, fmt.Errorf("failed to convert map value for key '%s': %w", keyStr, err)
213+
}
214+
result[keyStr] = convertedVal
215+
}
216+
return result, nil
217+
218+
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
219+
return rv.Int(), nil
220+
221+
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
222+
return rv.Uint(), nil
223+
224+
case reflect.Float32, reflect.Float64:
225+
return rv.Float(), nil
226+
227+
case reflect.Bool:
228+
return rv.Bool(), nil
229+
230+
case reflect.String:
231+
return rv.String(), nil
232+
233+
default:
234+
return fmt.Sprintf("%v", value), nil
235+
}
236+
}
237+
}

0 commit comments

Comments
 (0)