Skip to content

Commit 26a692c

Browse files
authored
Add csv fileformat to omniv2 handler + sample + a couple more custom_func (#88)
Add csv fileformat to omniv2 handler + sample + a couple more custom_func
1 parent 9c55da7 commit 26a692c

23 files changed

+805
-24
lines changed

README.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,12 @@ Take a detailed look at samples here:
119119
extension repo/package; the rest of the library is just golang 1.12.
120120
121121
## Recent Feature Additions
122-
- added trie based high performance `times.SmartParse`.
123-
- command line interface (one-off `transform` cmd or long-running http `server` mode).
124-
- javascript engine integration as a custom_func.
122+
- Added CSV file format support in omniv2 handler.
123+
- Introduced IDR node cache for allocation recycling.
124+
- Introduced [IDR](./idr/README.md) for in-memory data representation.
125+
- Added trie based high performance `times.SmartParse`.
126+
- Command line interface (one-off `transform` cmd or long-running http `server` mode).
127+
- `javascript` engine integration as a custom_func.
125128
- JSON stream parser.
126129
- Extensibility:
127130
- Ability to provide custom functions.

cli/cmd/serverCmd.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ func httpPostTransform(w http.ResponseWriter, r *http.Request) {
150150

151151
var (
152152
sampleDir = "../../samples/omniv2/"
153-
sampleFormats = []string{"json", "xml"}
153+
sampleFormats = []string{"csv", "json", "xml"}
154154
sampleInputFilenamePattern = regexp.MustCompile("^([0-9]+[_a-zA-Z]+)\\.input\\.[a-z]+$")
155155
)
156156

customfuncs/.snapshots/TestDumpBuiltinCustomFuncNames

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
"splitIntoJsonArray",
2020
"substring",
2121
"sum",
22+
"switch",
23+
"switchByPattern",
2224
"upper",
2325
"uuidv3"
2426
]

customfuncs/customFuncs.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"encoding/json"
55
"fmt"
66
"math"
7+
"regexp"
78
"strconv"
89
"strings"
910

@@ -50,6 +51,8 @@ var builtinPublishedCustomFuncs = map[string]CustomFuncType{
5051
"splitIntoJsonArray": splitIntoJsonArray,
5152
"substring": substring,
5253
"sum": sum,
54+
"switch": switchFunc,
55+
"switchByPattern": switchByPattern,
5356
"upper": upper,
5457
"uuidv3": uuidv3,
5558
}
@@ -208,6 +211,36 @@ func substring(_ *transformctx.Ctx, str, startIndex, lengthStr string) (string,
208211
return string(runes[start : start+length]), nil
209212
}
210213

214+
func switchFunc(ctx *transformctx.Ctx, expr string, casesReturns ...string) (string, error) {
215+
if len(casesReturns)%2 != 1 {
216+
return "", fmt.Errorf("length of 'casesReturns' must be odd, but got: %d", len(casesReturns))
217+
}
218+
patternsReturns := make([]string, len(casesReturns))
219+
for i := 0; i < len(patternsReturns)/2; i++ {
220+
patternsReturns[2*i] = "^" + regexp.QuoteMeta(casesReturns[2*i]) + "$"
221+
patternsReturns[2*i+1] = casesReturns[2*i+1]
222+
}
223+
patternsReturns[len(casesReturns)-1] = casesReturns[len(casesReturns)-1]
224+
return switchByPattern(ctx, expr, patternsReturns...)
225+
}
226+
227+
func switchByPattern(_ *transformctx.Ctx, expr string, patternsReturns ...string) (string, error) {
228+
if len(patternsReturns)%2 != 1 {
229+
return "", fmt.Errorf(
230+
"length of 'patternsReturns' must be odd, but got: %d", len(patternsReturns))
231+
}
232+
for i := 0; i < len(patternsReturns)/2; i++ {
233+
re, err := caches.GetRegex(patternsReturns[2*i])
234+
if err != nil {
235+
return "", fmt.Errorf(`invalid pattern '%s', err: %s`, patternsReturns[2*i], err.Error())
236+
}
237+
if re.MatchString(expr) {
238+
return patternsReturns[(2*i)+1], nil
239+
}
240+
}
241+
return patternsReturns[len(patternsReturns)-1], nil
242+
}
243+
211244
func upper(_ *transformctx.Ctx, s string) (string, error) {
212245
return strings.ToUpper(s), nil
213246
}

customfuncs/customFuncs_test.go

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -655,6 +655,112 @@ func TestSubstring(t *testing.T) {
655655
}
656656
}
657657

658+
func TestSwitchFunc(t *testing.T) {
659+
for _, test := range []struct {
660+
name string
661+
expr string
662+
casesReturns []string
663+
err string
664+
expected string
665+
}{
666+
{
667+
name: "empty casesReturns",
668+
expr: "abc",
669+
casesReturns: nil,
670+
err: "length of 'casesReturns' must be odd, but got: 0",
671+
},
672+
{
673+
name: "even casesReturns length",
674+
expr: "abc",
675+
casesReturns: []string{"1", "2", "3", "4"},
676+
err: "length of 'casesReturns' must be odd, but got: 4",
677+
},
678+
{
679+
name: "no case, just default",
680+
expr: "abc",
681+
casesReturns: []string{"default"},
682+
expected: "default",
683+
},
684+
{
685+
name: "case string contains special characters",
686+
expr: "How do you do",
687+
casesReturns: []string{
688+
"How do you do?", "Wrong",
689+
"How do you do", "Correct",
690+
"Huh"},
691+
expected: "Correct",
692+
},
693+
} {
694+
t.Run(test.name, func(t *testing.T) {
695+
result, err := switchFunc(nil, test.expr, test.casesReturns...)
696+
if test.err != "" {
697+
assert.Error(t, err)
698+
assert.Equal(t, test.err, err.Error())
699+
assert.Equal(t, "", result)
700+
} else {
701+
assert.NoError(t, err)
702+
assert.Equal(t, test.expected, result)
703+
}
704+
})
705+
}
706+
}
707+
708+
func TestSwitchByPattern(t *testing.T) {
709+
for _, test := range []struct {
710+
name string
711+
expr string
712+
patternsReturns []string
713+
err string
714+
expected string
715+
}{
716+
{
717+
name: "empty patternsReturns",
718+
expr: "abc",
719+
patternsReturns: nil,
720+
err: "length of 'patternsReturns' must be odd, but got: 0",
721+
},
722+
{
723+
name: "even patternsReturns length",
724+
expr: "abc",
725+
patternsReturns: []string{"1", "2", "3", "4"},
726+
err: "length of 'patternsReturns' must be odd, but got: 4",
727+
},
728+
{
729+
name: "regex invalid",
730+
expr: "abc",
731+
patternsReturns: []string{"[", "2", "3"},
732+
err: "invalid pattern '[', err: error parsing regexp: missing closing ]: `[`",
733+
},
734+
{
735+
name: "no pattern, only default",
736+
expr: "abc",
737+
patternsReturns: []string{"default"},
738+
expected: "default",
739+
},
740+
{
741+
name: "case string contains special characters",
742+
expr: "2019/02/23",
743+
patternsReturns: []string{
744+
"^[0-9]{2}/[0-9]{2}/[0-9]{4}$", "Wrong",
745+
"^[0-9]{4}/[0-9]{2}/[0-9]{2}$", "Correct",
746+
"Huh"},
747+
expected: "Correct",
748+
},
749+
} {
750+
t.Run(test.name, func(t *testing.T) {
751+
result, err := switchByPattern(nil, test.expr, test.patternsReturns...)
752+
if test.err != "" {
753+
assert.Error(t, err)
754+
assert.Equal(t, test.err, err.Error())
755+
assert.Equal(t, "", result)
756+
} else {
757+
assert.NoError(t, err)
758+
assert.Equal(t, test.expected, result)
759+
}
760+
})
761+
}
762+
}
763+
658764
func TestUpper(t *testing.T) {
659765
s, err := upper(nil, "")
660766
assert.NoError(t, err)
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
{
2+
"file_declaration": {
3+
"delimiter": ",",
4+
"replace_double_quotes": true,
5+
"header_row_index": 2,
6+
"data_row_index": 4,
7+
"columns": [
8+
{
9+
"name": "col1",
10+
"alias": null
11+
},
12+
{
13+
"name": "col 2",
14+
"alias": "col2"
15+
}
16+
]
17+
},
18+
"XPath": ".[col1 != 'skip']"
19+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"file_declaration": {
3+
"delimiter": ",",
4+
"replace_double_quotes": false,
5+
"header_row_index": null,
6+
"data_row_index": 1,
7+
"columns": [
8+
{
9+
"name": "col1",
10+
"alias": null
11+
}
12+
]
13+
},
14+
"XPath": ""
15+
}
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
package omniv2csv
2+
3+
import (
4+
"encoding/json"
5+
"fmt"
6+
"io"
7+
"strings"
8+
9+
"github.com/jf-tech/go-corelib/caches"
10+
"github.com/jf-tech/go-corelib/strs"
11+
12+
"github.com/jf-tech/omniparser/errs"
13+
omniv2fileformat "github.com/jf-tech/omniparser/handlers/omni/v2/fileformat"
14+
"github.com/jf-tech/omniparser/handlers/omni/v2/transform"
15+
"github.com/jf-tech/omniparser/validation"
16+
)
17+
18+
const (
19+
fileFormatCSV = "csv"
20+
fileFormatDelimited = "delimited"
21+
)
22+
23+
type csvFileFormat struct {
24+
schemaName string
25+
}
26+
27+
// NewCSVFileFormat creates a FileFormat for CSV for omniv2 schema handler.
28+
func NewCSVFileFormat(schemaName string) omniv2fileformat.FileFormat {
29+
return &csvFileFormat{schemaName: schemaName}
30+
}
31+
32+
type csvFormatRuntime struct {
33+
Decl *fileDecl `json:"file_declaration"`
34+
XPath string
35+
}
36+
37+
func (f *csvFileFormat) ValidateSchema(
38+
format string, schemaContent []byte, finalOutputDecl *transform.Decl) (interface{}, error) {
39+
if format != fileFormatCSV && format != fileFormatDelimited {
40+
return nil, errs.ErrSchemaNotSupported
41+
}
42+
err := validation.SchemaValidate(f.schemaName, schemaContent, validation.JSONSchemaCSVFileDeclaration)
43+
if err != nil {
44+
// err is already context formatted.
45+
return nil, err
46+
}
47+
var runtime csvFormatRuntime
48+
_ = json.Unmarshal(schemaContent, &runtime) // JSON schema validation earlier guarantees Unmarshal success.
49+
err = f.validateFileDecl(runtime.Decl)
50+
if err != nil {
51+
// err is already context formatted.
52+
return nil, err
53+
}
54+
if finalOutputDecl == nil {
55+
return nil, f.FmtErr("'FINAL_OUTPUT' is missing")
56+
}
57+
runtime.XPath = strings.TrimSpace(strs.StrPtrOrElse(finalOutputDecl.XPath, ""))
58+
if runtime.XPath != "" {
59+
_, err := caches.GetXPathExpr(runtime.XPath)
60+
if err != nil {
61+
return nil, f.FmtErr("'FINAL_OUTPUT.xpath' (value: '%s') is invalid, err: %s",
62+
runtime.XPath, err.Error())
63+
}
64+
}
65+
return &runtime, nil
66+
}
67+
68+
func (f *csvFileFormat) validateFileDecl(decl *fileDecl) error {
69+
// If header_row_index is specified, then it must be < data_row_index
70+
if decl.HeaderRowIndex != nil && *decl.HeaderRowIndex >= decl.DataRowIndex {
71+
return f.FmtErr(
72+
"file_declaration.header_row_index(%d) must be smaller than file_declaration.data_row_index(%d)",
73+
*decl.HeaderRowIndex, decl.DataRowIndex)
74+
}
75+
if err := f.validateColumns(decl.Columns); err != nil {
76+
return err
77+
}
78+
return nil
79+
}
80+
81+
func (f *csvFileFormat) validateColumns(columns []column) error {
82+
namesSeen := map[string]bool{}
83+
aliasesSeen := map[string]bool{}
84+
for _, column := range columns {
85+
if _, found := namesSeen[column.Name]; found {
86+
return f.FmtErr("file_declaration.columns contains duplicate name '%s'", column.Name)
87+
}
88+
namesSeen[column.Name] = true
89+
if column.Alias != nil {
90+
if _, found := aliasesSeen[*column.Alias]; found {
91+
return f.FmtErr("file_declaration.columns contains duplicate alias '%s'", *column.Alias)
92+
}
93+
aliasesSeen[*column.Alias] = true
94+
}
95+
}
96+
return nil
97+
}
98+
99+
func (f *csvFileFormat) CreateFormatReader(
100+
name string, r io.Reader, runtime interface{}) (omniv2fileformat.FormatReader, error) {
101+
csv := runtime.(*csvFormatRuntime)
102+
return NewReader(name, r, csv.Decl, csv.XPath)
103+
}
104+
105+
func (f *csvFileFormat) FmtErr(format string, args ...interface{}) error {
106+
return fmt.Errorf("schema '%s': %s", f.schemaName, fmt.Sprintf(format, args...))
107+
}

0 commit comments

Comments
 (0)