Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions omniparser/schemaplugin/omni/v2/inputprocessor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package omniv2

import (
"encoding/json"
"errors"

"github.com/jf-tech/omniparser/omniparser/customfuncs"
"github.com/jf-tech/omniparser/omniparser/errs"
"github.com/jf-tech/omniparser/omniparser/schemaplugin/omni/v2/transform"
"github.com/jf-tech/omniparser/omniparser/transformctx"
)

type inputProcessor struct {
finalOutputDecl *transform.Decl
customFuncs customfuncs.CustomFuncs
ctx *transformctx.Ctx
reader InputReader
}

func (p *inputProcessor) Read() ([]byte, error) {
node, err := p.reader.Read()
if err != nil {
// Read() supposed to have already done CtxAwareErr error wrapping. So directly return.
return nil, err
}
result, err := transform.NewParseCtx(p.ctx, p.customFuncs).ParseNode(node, p.finalOutputDecl)
if err != nil {
// ParseNode() error not CtxAwareErr wrapped, so wrap it.
// Note errs.ErrorTransformFailed is a continuable error.
return nil, errs.ErrTransformFailed(p.fmtErrStr("fail to transform. err: %s", err.Error()))
}
return json.Marshal(result)
}

func (p *inputProcessor) IsContinuableError(err error) bool {
return errs.IsErrTransformFailed(err) || p.reader.IsContinuableError(err)
}

func (p *inputProcessor) FmtErr(format string, args ...interface{}) error {
return errors.New(p.fmtErrStr(format, args...))
}

func (p *inputProcessor) fmtErrStr(format string, args ...interface{}) string {
return p.reader.FmtErr(format, args...).Error()
}
97 changes: 97 additions & 0 deletions omniparser/schemaplugin/omni/v2/inputprocessor_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package omniv2

import (
"errors"
"fmt"
"testing"

node "github.com/antchfx/xmlquery"
"github.com/stretchr/testify/assert"

"github.com/jf-tech/omniparser/omniparser/errs"
"github.com/jf-tech/omniparser/omniparser/schemaplugin/omni/v2/transform"
)

var testContinuableErr = errors.New("continuable error")

type testReader struct {
result []*node.Node
err []error
}

func (r *testReader) Read() (*node.Node, error) {
if len(r.result) == 0 {
return nil, errs.ErrEOF
}
result := r.result[0]
err := r.err[0]
r.result = r.result[1:]
r.err = r.err[1:]
return result, err
}

func (r *testReader) IsContinuableError(err error) bool { return err == testContinuableErr }

func (r *testReader) FmtErr(format string, args ...interface{}) error {
return fmt.Errorf("ctx: "+format, args...)
}

func TestInputProcessor_Read_ReadFailure(t *testing.T) {
p := &inputProcessor{
reader: &testReader{result: []*node.Node{nil}, err: []error{errors.New("test failure")}},
}
b, err := p.Read()
assert.Error(t, err)
assert.Equal(t, "test failure", err.Error())
assert.Nil(t, b)
}

func TestInputProcessor_Read_ParseNodeFailure(t *testing.T) {
finalOutputDecl, err := transform.ValidateTransformDeclarations(
[]byte(` {
"transform_declarations": {
"FINAL_OUTPUT": { "const": "abc", "result_type": "int" }
}
}`), nil)
assert.NoError(t, err)
p := &inputProcessor{
finalOutputDecl: finalOutputDecl,
reader: &testReader{result: []*node.Node{nil}, err: []error{nil}},
}
b, err := p.Read()
assert.Error(t, err)
assert.True(t, errs.IsErrTransformFailed(err))
assert.True(t, p.IsContinuableError(err))
assert.Equal(t,
`ctx: fail to transform. err: fail to convert value 'abc' to type 'int' on 'FINAL_OUTPUT', err: strconv.ParseFloat: parsing "abc": invalid syntax`,
err.Error())
assert.Nil(t, b)
}

func TestInputProcessor_Read_Success(t *testing.T) {
finalOutputDecl, err := transform.ValidateTransformDeclarations(
[]byte(` {
"transform_declarations": {
"FINAL_OUTPUT": { "const": "123", "result_type": "int" }
}
}`), nil)
assert.NoError(t, err)
p := &inputProcessor{
finalOutputDecl: finalOutputDecl,
reader: &testReader{result: []*node.Node{nil}, err: []error{nil}},
}
b, err := p.Read()
assert.NoError(t, err)
assert.Equal(t, "123", string(b))
}

func TestIsContinuableError(t *testing.T) {
p := &inputProcessor{reader: &testReader{}}
assert.False(t, p.IsContinuableError(errors.New("test failure")))
assert.True(t, p.IsContinuableError(testContinuableErr))
}

func TestFmtErr(t *testing.T) {
p := &inputProcessor{reader: &testReader{}}
assert.Equal(t, "ctx: some 1 fruit", p.FmtErr("some %d %s", 1, "fruit").Error())
}
23 changes: 23 additions & 0 deletions omniparser/schemaplugin/omni/v2/inputreader.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package omniv2

import (
node "github.com/antchfx/xmlquery"

"github.com/jf-tech/omniparser/omniparser/errs"
)

// InputReader is an interface for reading input stream in omni plugin. We'll have a number of file
// format specific readers. The omni plugin will use these readers for loading input stream content
// before doing the xpath/node based parsing.
type InputReader interface {
// Read returns a *Node and its subtree that will eventually be parsed and transformed into an
// output record.
Read() (*node.Node, error)
// IsContinuableError determines whether an InputReader returned error is continuable or not.
// For certain errors (like EOF or corruption) there is no point to keep on trying; while others
// can be safely ignored.
IsContinuableError(err error) bool
// InputReader must be able to format an error by providing context information (such as input
// file name and (approx.) error location, such as line number)
errs.CtxAwareErr
}
2 changes: 1 addition & 1 deletion omniparser/schemaplugin/omni/v2/transform/decl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ func verifyDeclDeepCopy(t *testing.T, d1, d2 *Decl) {
verifyPtrsInDeepCopy(d1.Template, d2.Template)

verifyPtrsInDeepCopy(d1.Object, d2.Object)
for name, _ := range d1.Object {
for name := range d1.Object {
verifyDeclDeepCopy(t, d1.Object[name], d2.Object[name])
}

Expand Down
6 changes: 3 additions & 3 deletions omniparser/schemaplugin/omni/v2/transform/invokeCustomFunc.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ func (p *parseCtx) prepCustomFuncArgValues(
}

for _, argDecl := range customFuncDecl.Args {
// We'd love to delegate all the value calculation to parseNode but here we have
// We'd love to delegate all the value calculation to ParseNode but here we have
// one special case, when we deal with a field.
// We have situations we need to support aggregation func such as sum/avg. In those cases
// the arg to the custom func can be a field with xpath/xpath_dynamic that we want it to
Expand Down Expand Up @@ -98,7 +98,7 @@ func (p *parseCtx) prepCustomFuncArgValues(
// fn is NOT variadic and xpath query returned at least one value, only use the first one.
appendArgValue(argDecl, argValueNodes[0].InnerText())
case KindArray:
argValue, err := p.parseNode(n, argDecl)
argValue, err := p.ParseNode(n, argDecl)
if err != nil {
return nil, err
}
Expand All @@ -110,7 +110,7 @@ func (p *parseCtx) prepCustomFuncArgValues(
}
default:
// Normal case not involving field (so const/external/nested custom_func)
v, err := p.parseNode(n, argDecl)
v, err := p.ParseNode(n, argDecl)
if err != nil {
return nil, err
}
Expand Down
12 changes: 6 additions & 6 deletions omniparser/schemaplugin/omni/v2/transform/nodeToObject_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,18 @@ func TestIsChildText(t *testing.T) {
name: "xml: child is array",
xpath: "a",
xmlStr: `<a>
<b>1</b>
<b>2</b>
</a>`,
<b>1</b>
<b>2</b>
</a>`,
isTextNode: false,
},
{
name: "xml: child is object",
xpath: "a",
xmlStr: `<a>
<b>1</b>
<c>2</c>
</a>`,
<b>1</b>
<c>2</c>
</a>`,
isTextNode: false,
},
} {
Expand Down
21 changes: 9 additions & 12 deletions omniparser/schemaplugin/omni/v2/transform/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ type parseCtx struct {
transformCache map[string]interface{}
}

func newParseCtx(opCtx *transformctx.Ctx, customFuncs customfuncs.CustomFuncs) *parseCtx {
// NewParseCtx creates new context for parsing a *Node (and its sub-tree) into an output record.
func NewParseCtx(opCtx *transformctx.Ctx, customFuncs customfuncs.CustomFuncs) *parseCtx {
return &parseCtx{
opCtx: opCtx,
customFuncs: customFuncs,
Expand All @@ -42,12 +43,10 @@ func resultTypeConversion(decl *Decl, value string) (interface{}, error) {
return value, nil
}
// after this point, result type isn't of string.

// Omit the field in final result if it is empty with non-string type.
if !strs.IsStrNonBlank(value) {
return nil, nil
}

switch decl.resultType() {
case ResultTypeInt:
f, err := strconv.ParseFloat(value, 64)
Expand Down Expand Up @@ -128,15 +127,14 @@ func normalizeAndReturnValue(decl *Decl, value interface{}) (interface{}, error)
return returnValue, nil
}

func (p *parseCtx) parseNode(n *node.Node, decl *Decl) (interface{}, error) {
func (p *parseCtx) ParseNode(n *node.Node, decl *Decl) (interface{}, error) {
var cacheKey string
if !p.disableTransformCache {
cacheKey = nodePtrAddrStr(n) + "/" + decl.hash
if cacheValue, found := p.transformCache[cacheKey]; found {
return cacheValue, nil
}
}

saveIntoCache := func(value interface{}, err error) (interface{}, error) {
if !p.disableTransformCache {
if err != nil {
Expand All @@ -146,7 +144,6 @@ func (p *parseCtx) parseNode(n *node.Node, decl *Decl) (interface{}, error) {
}
return value, err
}

switch decl.kind {
case KindConst:
return saveIntoCache(p.parseConst(decl))
Expand Down Expand Up @@ -204,14 +201,14 @@ func (p *parseCtx) computeXPath(n *node.Node, decl *Decl) (xpath string, dynamic
}

func (p *parseCtx) computeXPathDynamic(n *node.Node, xpathDynamicDecl *Decl) (string, error) {
v, err := p.parseNode(n, xpathDynamicDecl)
v, err := p.ParseNode(n, xpathDynamicDecl)
if err != nil {
return "", err
}
// if v is straight out nil, then we should fail out
// if v isn't nil, it could be an interface{} type whose value is nil; or it could be some valid values.
// note we need to guard the IsNil call as it would panic if v kind isn't interface/chan/func/map/slice/ptr.
// note we only need to ensure for kind == interface, because parseNode will never return
// note we only need to ensure for kind == interface, because ParseNode will never return
// chan/func/ptr. It's possible to return map/slice, but in earlier validation (validateXPath) we already
// ensured `xpath_dynamic` result type is string.
if v == nil || (reflect.ValueOf(v).Kind() == reflect.Interface && reflect.ValueOf(v).IsNil()) {
Expand Down Expand Up @@ -298,11 +295,11 @@ func (p *parseCtx) parseObject(n *node.Node, decl *Decl) (interface{}, error) {
}
object := map[string]interface{}{}
for _, childDecl := range decl.children {
childValue, err := p.parseNode(n, childDecl)
childValue, err := p.ParseNode(n, childDecl)
if err != nil {
return nil, err
}
// value returned by p.parseNode is already normalized, thus this
// value returned by p.ParseNode is already normalized, thus this
// normalizeAndSaveValue won't fail.
_ = normalizeAndSaveValue(childDecl, childValue, func(normalizedValue interface{}) {
object[strs.LastNameletOfFQDN(childDecl.fqdn)] = normalizedValue
Expand Down Expand Up @@ -330,11 +327,11 @@ func (p *parseCtx) parseArray(n *node.Node, decl *Decl) (interface{}, error) {
return nil, fmt.Errorf("xpath query '%s' on '%s' failed: %s", xpath, childDecl.fqdn, err.Error())
}
for _, nodeForChildDecl := range nodes {
childValue, err := p.parseNode(nodeForChildDecl, childDecl)
childValue, err := p.ParseNode(nodeForChildDecl, childDecl)
if err != nil {
return nil, err
}
// value returned by p.parseNode is already normalized, thus this
// value returned by p.ParseNode is already normalized, thus this
// normalizeAndSaveValue won't fail.
_ = normalizeAndSaveValue(childDecl, childValue, func(normalizedValue interface{}) {
array = append(array, normalizedValue)
Expand Down
4 changes: 2 additions & 2 deletions omniparser/schemaplugin/omni/v2/transform/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import (
)

func testParseCtx() *parseCtx {
ctx := newParseCtx(
ctx := NewParseCtx(
&transformctx.Ctx{
InputName: "test-input",
ExternalProperties: map[string]string{"abc": "efg"},
Expand Down Expand Up @@ -453,7 +453,7 @@ func TestParseCtx_ParseNode(t *testing.T) {
linkParent(test.decl)
ctx := testParseCtx()
ctx.disableTransformCache = false
value, err := ctx.parseNode(testNode(), test.decl)
value, err := ctx.ParseNode(testNode(), test.decl)
switch test.expectedErr {
case "":
assert.NoError(t, err)
Expand Down