Skip to content

Commit 60033d1

Browse files
authored
Add a few more custom funcs plus a 'javascript'-vs-'eval'-vs-'ifese' benchmark comparison; add sub-sec support in times. (#58)
* Add a few more custom funcs plus a 'javascript'-vs-'eval'-vs-'ifese' benchmark comparison * add sub-second support into timetrie and parse
1 parent 3d49d68 commit 60033d1

File tree

13 files changed

+137750
-28678
lines changed

13 files changed

+137750
-28678
lines changed

cache/regexCache.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package cache
2+
3+
import (
4+
"regexp"
5+
)
6+
7+
// RegexCache is the default loading cache used for caching the compiled
8+
// regex expression. If the default size is too big/small and/or a cache limit isn't
9+
// desired at all, caller can simply replace the cache during global initialization.
10+
// But be aware it's global so any packages uses this package inside your process will
11+
// be affected.
12+
var RegexCache = NewLoadingCache()
13+
14+
// GetRegex compiles a given regex pattern and returns a compiled *regexp.Regexp
15+
// or error.
16+
func GetRegex(pattern string) (*regexp.Regexp, error) {
17+
exp, err := RegexCache.Get(pattern, func(key interface{}) (interface{}, error) {
18+
return regexp.Compile(key.(string))
19+
})
20+
if err != nil {
21+
return nil, err
22+
}
23+
return exp.(*regexp.Regexp), nil
24+
}

cache/regexCache_test.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
package cache
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/assert"
7+
)
8+
9+
func TestGetRegex(t *testing.T) {
10+
RegexCache = NewLoadingCache()
11+
assert.Equal(t, 0, len(RegexCache.DumpForTest()))
12+
// failure case
13+
expr, err := GetRegex("[")
14+
assert.Error(t, err)
15+
assert.Equal(t, "error parsing regexp: missing closing ]: `[`", err.Error())
16+
assert.Nil(t, expr)
17+
assert.Equal(t, 0, len(RegexCache.DumpForTest()))
18+
// success case
19+
expr, err = GetRegex("^[0-9]{4}-[0-9]{2}-[0-9]{2}$")
20+
assert.NoError(t, err)
21+
assert.NotNil(t, expr)
22+
assert.Equal(t, 1, len(RegexCache.DumpForTest()))
23+
// repeat success case shouldn't case any cache growth
24+
expr, err = GetRegex("^[0-9]{4}-[0-9]{2}-[0-9]{2}$")
25+
assert.NoError(t, err)
26+
assert.NotNil(t, expr)
27+
assert.Equal(t, 1, len(RegexCache.DumpForTest()))
28+
}

cache/xpathExprCache.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import (
1111
// be affected.
1212
var XPathExprCache = NewLoadingCache()
1313

14-
// GetXPathExpr compiles a given xpath expression string and returns a compile xpath.Expr
14+
// GetXPathExpr compiles a given xpath expression string and returns a compiled *xpath.Expr
1515
// or error.
1616
func GetXPathExpr(expr string) (*xpath.Expr, error) {
1717
exp, err := XPathExprCache.Get(expr, func(key interface{}) (interface{}, error) {
Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
[
22
"avg",
33
"concat",
4+
"containsPattern",
45
"dateTimeLayoutToRFC3339",
56
"dateTimeToEpoch",
67
"dateTimeToRFC3339",
@@ -9,10 +10,13 @@
910
"eval",
1011
"external",
1112
"floor",
13+
"ifElse",
14+
"isEmpty",
1215
"javascript",
1316
"lower",
1417
"splitIntoJsonArray",
1518
"substring",
1619
"sum",
17-
"upper"
20+
"upper",
21+
"uuidv3"
1822
]

omniparser/customfuncs/aggregate_test.go

Lines changed: 62 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -8,116 +8,116 @@ import (
88

99
func TestAvg(t *testing.T) {
1010
for _, test := range []struct {
11-
name string
12-
inputs []string
13-
expectedErr string
14-
expectedAvg string
11+
name string
12+
inputs []string
13+
err string
14+
expected string
1515
}{
1616
{
17-
name: "nil",
18-
inputs: nil,
19-
expectedErr: "",
20-
expectedAvg: "0",
17+
name: "nil",
18+
inputs: nil,
19+
err: "",
20+
expected: "0",
2121
},
2222
{
23-
name: "empty",
24-
inputs: []string{},
25-
expectedErr: "",
26-
expectedAvg: "0",
23+
name: "empty",
24+
inputs: []string{},
25+
err: "",
26+
expected: "0",
2727
},
2828
{
29-
name: "single",
30-
inputs: []string{"3.14159265358"},
31-
expectedErr: "",
32-
expectedAvg: "3.14159265358",
29+
name: "single",
30+
inputs: []string{"3.14159265358"},
31+
err: "",
32+
expected: "3.14159265358",
3333
},
3434
{
35-
name: "multiple small ones",
36-
inputs: []string{"3.45", "5.38"},
37-
expectedErr: "",
38-
expectedAvg: "4.415",
35+
name: "multiple small ones",
36+
inputs: []string{"3.45", "5.38"},
37+
err: "",
38+
expected: "4.415",
3939
},
4040
{
41-
name: "multiple big ones",
42-
inputs: []string{"1.23e+9", "0.34E+10"},
43-
expectedErr: "",
44-
expectedAvg: "2.315e+09",
41+
name: "multiple big ones",
42+
inputs: []string{"1.23e+9", "0.34E+10"},
43+
err: "",
44+
expected: "2.315e+09",
4545
},
4646
{
47-
name: "invalid value",
48-
inputs: []string{"1", "two"},
49-
expectedErr: `strconv.ParseFloat: parsing "two": invalid syntax`,
50-
expectedAvg: "",
47+
name: "invalid value",
48+
inputs: []string{"1", "two"},
49+
err: `strconv.ParseFloat: parsing "two": invalid syntax`,
50+
expected: "",
5151
},
5252
} {
5353
t.Run(test.name, func(t *testing.T) {
5454
result, err := avg(nil, test.inputs...)
55-
if test.expectedErr != "" {
55+
if test.err != "" {
5656
assert.Error(t, err)
57-
assert.Equal(t, test.expectedErr, err.Error())
57+
assert.Equal(t, test.err, err.Error())
5858
assert.Equal(t, "", result)
5959
} else {
6060

6161
assert.NoError(t, err)
62-
assert.Equal(t, test.expectedAvg, result)
62+
assert.Equal(t, test.expected, result)
6363
}
6464
})
6565
}
6666
}
6767

6868
func TestSum(t *testing.T) {
6969
for _, test := range []struct {
70-
name string
71-
inputs []string
72-
expectedErr string
73-
expectedSum string
70+
name string
71+
inputs []string
72+
err string
73+
expected string
7474
}{
7575
{
76-
name: "nil",
77-
inputs: nil,
78-
expectedErr: "",
79-
expectedSum: "0",
76+
name: "nil",
77+
inputs: nil,
78+
err: "",
79+
expected: "0",
8080
},
8181
{
82-
name: "empty",
83-
inputs: []string{},
84-
expectedErr: "",
85-
expectedSum: "0",
82+
name: "empty",
83+
inputs: []string{},
84+
err: "",
85+
expected: "0",
8686
},
8787
{
88-
name: "single",
89-
inputs: []string{"3.14159265358"},
90-
expectedErr: "",
91-
expectedSum: "3.14159265358",
88+
name: "single",
89+
inputs: []string{"3.14159265358"},
90+
err: "",
91+
expected: "3.14159265358",
9292
},
9393
{
94-
name: "multiple small ones",
95-
inputs: []string{"3.45", "5.38"},
96-
expectedErr: "",
97-
expectedSum: "8.83",
94+
name: "multiple small ones",
95+
inputs: []string{"3.45", "5.38"},
96+
err: "",
97+
expected: "8.83",
9898
},
9999
{
100-
name: "multiple big ones",
101-
inputs: []string{"1.23e+9", "0.34E+10"},
102-
expectedErr: "",
103-
expectedSum: "4.63e+09",
100+
name: "multiple big ones",
101+
inputs: []string{"1.23e+9", "0.34E+10"},
102+
err: "",
103+
expected: "4.63e+09",
104104
},
105105
{
106-
name: "invalid value",
107-
inputs: []string{"1", "two"},
108-
expectedErr: `strconv.ParseFloat: parsing "two": invalid syntax`,
109-
expectedSum: "",
106+
name: "invalid value",
107+
inputs: []string{"1", "two"},
108+
err: `strconv.ParseFloat: parsing "two": invalid syntax`,
109+
expected: "",
110110
},
111111
} {
112112
t.Run(test.name, func(t *testing.T) {
113113
result, err := sum(nil, test.inputs...)
114-
if test.expectedErr != "" {
114+
if test.err != "" {
115115
assert.Error(t, err)
116-
assert.Equal(t, test.expectedErr, err.Error())
116+
assert.Equal(t, test.err, err.Error())
117117
assert.Equal(t, "", result)
118118
} else {
119119
assert.NoError(t, err)
120-
assert.Equal(t, test.expectedSum, result)
120+
assert.Equal(t, test.expected, result)
121121
}
122122
})
123123
}

omniparser/customfuncs/customFuncs.go

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ import (
88
"strconv"
99
"strings"
1010

11+
"github.com/google/uuid"
12+
13+
"github.com/jf-tech/omniparser/cache"
1114
"github.com/jf-tech/omniparser/omniparser/transformctx"
1215
"github.com/jf-tech/omniparser/strs"
1316
)
@@ -34,24 +37,28 @@ var builtinPublishedCustomFuncs = map[string]CustomFuncType{
3437
// keep these custom funcs lexically sorted
3538
"avg": avg,
3639
"concat": concat,
40+
"containsPattern": containsPattern,
3741
"dateTimeLayoutToRFC3339": dateTimeLayoutToRFC3339,
3842
"dateTimeToEpoch": dateTimeToEpoch,
3943
"dateTimeToRFC3339": dateTimeToRFC3339,
44+
"eval": eval,
4045
"floor": floor,
46+
"ifElse": ifElse,
47+
"isEmpty": isEmpty,
4148
"javascript": javascript,
4249
"lower": lower,
50+
"splitIntoJsonArray": splitIntoJsonArray,
4351
"substring": substring,
4452
"sum": sum,
4553
"upper": upper,
54+
"uuidv3": uuidv3,
4655
}
4756

4857
var builtinHiddenBackCompatCustomFuncs = map[string]CustomFuncType{
4958
// keep these custom funcs lexically sorted
5059
"dateTimeWithLayoutToRfc3339": dateTimeLayoutToRFC3339, // deprecated; use dateTimeLayoutToRFC3339.
5160
"dateTimeToRfc3339": dateTimeToRFC3339, // deprecated; use dateTimeToRFC3339.
52-
"eval": eval, // deprecated; use 'javascript'.
5361
"external": external, // deprecated; use "external" decl.
54-
"splitIntoJsonArray": splitIntoJsonArray, // deprecated; use 'javascript'.
5562
}
5663

5764
// BuiltinCustomFuncs contains all the built-in custom functions.
@@ -65,6 +72,19 @@ func concat(_ *transformctx.Ctx, strs ...string) (string, error) {
6572
return b.String(), nil
6673
}
6774

75+
func containsPattern(_ *transformctx.Ctx, regexPattern string, strs ...string) (string, error) {
76+
r, err := cache.GetRegex(regexPattern)
77+
if err != nil {
78+
return "", err
79+
}
80+
for _, str := range strs {
81+
if r.MatchString(str) {
82+
return "true", nil
83+
}
84+
}
85+
return "false", nil
86+
}
87+
6888
func external(ctx *transformctx.Ctx, name string) (string, error) {
6989
if v, found := ctx.ExternalProperty(name); found {
7090
return v, nil
@@ -88,6 +108,30 @@ func floor(_ *transformctx.Ctx, value, decimalPlaces string) (string, error) {
88108
return fmt.Sprintf("%v", math.Floor(v*p10)/p10), nil
89109
}
90110

111+
func ifElse(_ *transformctx.Ctx, conditionsAndValues ...string) (string, error) {
112+
if len(conditionsAndValues)%2 != 1 {
113+
return "", fmt.Errorf("arg number must be odd, but got: %d", len(conditionsAndValues))
114+
}
115+
for i := 0; i < len(conditionsAndValues)/2; i++ {
116+
condition, err := strconv.ParseBool(conditionsAndValues[2*i])
117+
if err != nil {
118+
return "", fmt.Errorf(
119+
`condition argument must be a boolean string, but got: %s`, conditionsAndValues[2*i])
120+
}
121+
if condition {
122+
return conditionsAndValues[(2*i)+1], nil
123+
}
124+
}
125+
return conditionsAndValues[len(conditionsAndValues)-1], nil
126+
}
127+
128+
func isEmpty(_ *transformctx.Ctx, str string) (string, error) {
129+
if str == "" {
130+
return "true", nil
131+
}
132+
return "false", nil
133+
}
134+
91135
func lower(_ *transformctx.Ctx, s string) (string, error) {
92136
return strings.ToLower(s), nil
93137
}
@@ -157,3 +201,7 @@ func substring(_ *transformctx.Ctx, str, startIndex, lengthStr string) (string,
157201
func upper(_ *transformctx.Ctx, s string) (string, error) {
158202
return strings.ToUpper(s), nil
159203
}
204+
205+
func uuidv3(_ *transformctx.Ctx, s string) (string, error) {
206+
return uuid.NewMD5(uuid.Nil, []byte(s)).String(), nil
207+
}

0 commit comments

Comments
 (0)