Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/howto/pipeline_testing.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ Before sending log events to the ingest pipeline, a data transformation process
```yml
multiline:
first_line_pattern: "^(?:[0-9]{1,3}\\.){3}[0-9]{1,3}"
exclude_lines:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you should prepare a PR for the package-spec first.

- "^#.*"
fields:
"@timestamp": "2020-04-28T11:07:58.223Z"
ecs:
Expand All @@ -99,6 +101,8 @@ numeric_keyword_fields:

The `multiline` section ([raw files](#raw-files) only) configures the log file reader to correctly detect multiline log entries using the `first_line_pattern`. Use this property if your logs may be split into multiple lines, e.g. Java stack traces.

The `exclude_lines` sections allows to filter out lines from the sample log files. If `multiline` is used, pattern is checked after merging lines.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regarding the elastic/integrations#1145:

I'm wondering if it's possible to remove lines starting with # with ingest pipeline (during processing, just skip them).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea, I will try that!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that something like #384 will be needed, otherwise errors like this happen:

ERROR: verifying test result failed: comparing test results failed: can't adjust test results: can't unmarshal event: unexpected end of JSON input 

The `fields` section allows for customizing extra fields to be added to every read log entry (e.g. `@timestamp`, `ecs`). Use this property to extend your logs with data that can't be extracted from log content, but it's fine to have same field values for every record (e.g. timezone, hostname).

The `dynamic_fields` section allows for marking fields as dynamic (every time they have different non-static values), so that pattern matching instead of strict value check is applied.
Expand Down
167 changes: 137 additions & 30 deletions internal/testrunner/runners/pipeline/test_case.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,46 +84,153 @@ func createTestCase(filename string, entries []json.RawMessage, config *testConf
}, nil
}

type lineReader interface {
Scan() bool
Text() string
Err() error
}

func readRawInputEntries(inputData []byte, c *testConfig) ([]string, error) {
var inputDataEntries []string
var err error
var scanner lineReader = bufio.NewScanner(bytes.NewReader(inputData))

// Setup multiline
if c.Multiline != nil && c.Multiline.FirstLinePattern != "" {
scanner, err = newMultilineReader(scanner, c.Multiline)
if err != nil {
return nil, errors.Wrap(err, "failed to read multiline")
}
}

// Setup exclude lines
if len(c.ExcludeLines) > 0 {
scanner, err = newExcludePatternsReader(scanner, c.ExcludeLines)
if err != nil {
return nil, errors.Wrap(err, "invalid expression in exclude_lines")
}
}

var builder strings.Builder
scanner := bufio.NewScanner(bytes.NewReader(inputData))
var inputDataEntries []string
for scanner.Scan() {
line := scanner.Text()
inputDataEntries = append(inputDataEntries, scanner.Text())
}
err = scanner.Err()
if err != nil {
return nil, errors.Wrap(err, "reading raw input test file failed")
}

var body string
if c.Multiline != nil && c.Multiline.FirstLinePattern != "" {
matched, err := regexp.MatchString(c.Multiline.FirstLinePattern, line)
if err != nil {
return nil, errors.Wrapf(err, "regexp matching failed (pattern: %s)", c.Multiline.FirstLinePattern)
}
return inputDataEntries, nil
}

if matched {
body = builder.String()
builder.Reset()
}
if builder.Len() > 0 {
builder.WriteByte('\n')
}
builder.WriteString(line)
if !matched || body == "" {
continue
}
} else {
body = line
type multilineReader struct {
reader lineReader
firstLinePattern *regexp.Regexp

current strings.Builder
next strings.Builder
}

func newMultilineReader(reader lineReader, config *multiline) (*multilineReader, error) {
firstLinePattern, err := regexp.Compile(config.FirstLinePattern)
if err != nil {
return nil, err
}
return &multilineReader{
reader: reader,
firstLinePattern: firstLinePattern,
}, nil
}

func (r *multilineReader) Scan() (scanned bool) {
r.current.Reset()
if r.next.Len() > 0 {
scanned = true
r.current.WriteString(r.next.String())
r.next.Reset()
}

for r.reader.Scan() {
scanned = true
text := r.reader.Text()
if r.firstLinePattern.MatchString(text) && r.current.Len() > 0 {
r.next.WriteString(text)
break
}

if r.current.Len() > 0 {
r.current.WriteByte('\n')
}

inputDataEntries = append(inputDataEntries, body)
r.current.WriteString(text)
}
err := scanner.Err()

return
}

func (r *multilineReader) Text() (body string) {
return r.current.String()
}

func (r *multilineReader) Err() error {
return r.reader.Err()
}

type excludePatternsReader struct {
reader lineReader
patterns []*regexp.Regexp

text string
}

func newExcludePatternsReader(reader lineReader, patterns []string) (*excludePatternsReader, error) {
compiled, err := compilePatterns(patterns)
if err != nil {
return nil, errors.Wrap(err, "reading raw input test file failed")
return nil, err
}
return &excludePatternsReader{
reader: reader,
patterns: compiled,
}, nil
}

func (r *excludePatternsReader) Scan() (scanned bool) {
r.text = ""
for r.reader.Scan() {
text := r.reader.Text()
if anyPatternMatch(r.patterns, text) {
continue
}

r.text = text
return true
}
return false
}

func (r *excludePatternsReader) Text() (body string) {
return r.text
}

func (r *excludePatternsReader) Err() error {
return r.reader.Err()
}

func anyPatternMatch(patterns []*regexp.Regexp, text string) bool {
for _, pattern := range patterns {
if pattern.MatchString(text) {
return true
}
}
return false
}

lastEntry := builder.String()
if len(lastEntry) > 0 {
inputDataEntries = append(inputDataEntries, lastEntry)
func compilePatterns(patterns []string) (regexps []*regexp.Regexp, err error) {
for _, pattern := range patterns {
r, err := regexp.Compile(pattern)
if err != nil {
return nil, err
}
regexps = append(regexps, r)
}
return inputDataEntries, nil
return
}
12 changes: 12 additions & 0 deletions internal/testrunner/runners/pipeline/test_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,22 @@ type testConfig struct {
Fields map[string]interface{} `config:"fields"`
DynamicFields map[string]string `config:"dynamic_fields"`

// List of regular expressions to exclude lines from the test file.
ExcludeLines []string `config:"exclude_lines"`

// NumericKeywordFields holds a list of fields that have keyword
// type but can be ingested as numeric type.
NumericKeywordFields []string `config:"numeric_keyword_fields"`
}

func (c *testConfig) Validate() error {
if _, err := compilePatterns(c.ExcludeLines); err != nil {
return errors.Wrap(err, "invalid pattern in exclude_lines")
}

return nil
}

type multiline struct {
FirstLinePattern string `config:"first_line_pattern"`
}
Expand Down Expand Up @@ -64,6 +75,7 @@ func readConfigForTestCase(testCasePath string) (*testConfig, error) {
return nil, errors.Wrapf(err, "can't unpack test configuration: %s", configPath)
}
}

return &c, nil
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
"expected": [
null
]
}
}