|
| 1 | +package omniv2csv |
| 2 | + |
| 3 | +import ( |
| 4 | +"encoding/json" |
| 5 | +"fmt" |
| 6 | +"io" |
| 7 | +"strings" |
| 8 | + |
| 9 | +"github.com/jf-tech/go-corelib/caches" |
| 10 | +"github.com/jf-tech/go-corelib/strs" |
| 11 | + |
| 12 | +"github.com/jf-tech/omniparser/errs" |
| 13 | +omniv2fileformat "github.com/jf-tech/omniparser/handlers/omni/v2/fileformat" |
| 14 | +"github.com/jf-tech/omniparser/handlers/omni/v2/transform" |
| 15 | +"github.com/jf-tech/omniparser/validation" |
| 16 | +) |
| 17 | + |
| 18 | +const ( |
| 19 | +fileFormatCSV = "csv" |
| 20 | +fileFormatDelimited = "delimited" |
| 21 | +) |
| 22 | + |
| 23 | +type csvFileFormat struct { |
| 24 | +schemaName string |
| 25 | +} |
| 26 | + |
| 27 | +// NewCSVFileFormat creates a FileFormat for CSV for omniv2 schema handler. |
| 28 | +func NewCSVFileFormat(schemaName string) omniv2fileformat.FileFormat { |
| 29 | +return &csvFileFormat{schemaName: schemaName} |
| 30 | +} |
| 31 | + |
| 32 | +type csvFormatRuntime struct { |
| 33 | +Decl *fileDecl `json:"file_declaration"` |
| 34 | +XPath string |
| 35 | +} |
| 36 | + |
| 37 | +func (f *csvFileFormat) ValidateSchema( |
| 38 | +format string, schemaContent []byte, finalOutputDecl *transform.Decl) (interface{}, error) { |
| 39 | +if format != fileFormatCSV && format != fileFormatDelimited { |
| 40 | +return nil, errs.ErrSchemaNotSupported |
| 41 | +} |
| 42 | +err := validation.SchemaValidate(f.schemaName, schemaContent, validation.JSONSchemaCSVFileDeclaration) |
| 43 | +if err != nil { |
| 44 | +// err is already context formatted. |
| 45 | +return nil, err |
| 46 | +} |
| 47 | +var runtime csvFormatRuntime |
| 48 | +_ = json.Unmarshal(schemaContent, &runtime) // JSON schema validation earlier guarantees Unmarshal success. |
| 49 | +err = f.validateFileDecl(runtime.Decl) |
| 50 | +if err != nil { |
| 51 | +// err is already context formatted. |
| 52 | +return nil, err |
| 53 | +} |
| 54 | +if finalOutputDecl == nil { |
| 55 | +return nil, f.FmtErr("'FINAL_OUTPUT' is missing") |
| 56 | +} |
| 57 | +runtime.XPath = strings.TrimSpace(strs.StrPtrOrElse(finalOutputDecl.XPath, "")) |
| 58 | +if runtime.XPath != "" { |
| 59 | +_, err := caches.GetXPathExpr(runtime.XPath) |
| 60 | +if err != nil { |
| 61 | +return nil, f.FmtErr("'FINAL_OUTPUT.xpath' (value: '%s') is invalid, err: %s", |
| 62 | +runtime.XPath, err.Error()) |
| 63 | +} |
| 64 | +} |
| 65 | +return &runtime, nil |
| 66 | +} |
| 67 | + |
| 68 | +func (f *csvFileFormat) validateFileDecl(decl *fileDecl) error { |
| 69 | +// If header_row_index is specified, then it must be < data_row_index |
| 70 | +if decl.HeaderRowIndex != nil && *decl.HeaderRowIndex >= decl.DataRowIndex { |
| 71 | +return f.FmtErr( |
| 72 | +"file_declaration.header_row_index(%d) must be smaller than file_declaration.data_row_index(%d)", |
| 73 | +*decl.HeaderRowIndex, decl.DataRowIndex) |
| 74 | +} |
| 75 | +if err := f.validateColumns(decl.Columns); err != nil { |
| 76 | +return err |
| 77 | +} |
| 78 | +return nil |
| 79 | +} |
| 80 | + |
| 81 | +func (f *csvFileFormat) validateColumns(columns []column) error { |
| 82 | +namesSeen := map[string]bool{} |
| 83 | +aliasesSeen := map[string]bool{} |
| 84 | +for _, column := range columns { |
| 85 | +if _, found := namesSeen[column.Name]; found { |
| 86 | +return f.FmtErr("file_declaration.columns contains duplicate name '%s'", column.Name) |
| 87 | +} |
| 88 | +namesSeen[column.Name] = true |
| 89 | +if column.Alias != nil { |
| 90 | +if _, found := aliasesSeen[*column.Alias]; found { |
| 91 | +return f.FmtErr("file_declaration.columns contains duplicate alias '%s'", *column.Alias) |
| 92 | +} |
| 93 | +aliasesSeen[*column.Alias] = true |
| 94 | +} |
| 95 | +} |
| 96 | +return nil |
| 97 | +} |
| 98 | + |
| 99 | +func (f *csvFileFormat) CreateFormatReader( |
| 100 | +name string, r io.Reader, runtime interface{}) (omniv2fileformat.FormatReader, error) { |
| 101 | +csv := runtime.(*csvFormatRuntime) |
| 102 | +return NewReader(name, r, csv.Decl, csv.XPath) |
| 103 | +} |
| 104 | + |
| 105 | +func (f *csvFileFormat) FmtErr(format string, args ...interface{}) error { |
| 106 | +return fmt.Errorf("schema '%s': %s", f.schemaName, fmt.Sprintf(format, args...)) |
| 107 | +} |
0 commit comments