Skip to content

Commit 66d0ce8

Browse files
committed
Implement ScanLinesAndTruncateWhenLongerThanBuffer
1 parent 6bb8c18 commit 66d0ce8

File tree

2 files changed

+122
-1
lines changed

2 files changed

+122
-1
lines changed

pkg/utils/lines.go

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
package utils
22

3-
import "strings"
3+
import (
4+
"bytes"
5+
"strings"
6+
)
47

58
// SplitLines takes a multiline string and splits it on newlines
69
// currently we are also stripping \r's which may have adverse effects for
@@ -43,3 +46,57 @@ func EscapeSpecialChars(str string) string {
4346
"\v", "\\v",
4447
).Replace(str)
4548
}
49+
50+
func dropCR(data []byte) []byte {
51+
if len(data) > 0 && data[len(data)-1] == '\r' {
52+
return data[0 : len(data)-1]
53+
}
54+
return data
55+
}
56+
57+
// ScanLinesAndTruncateWhenLongerThanBuffer returns a split function that can be
58+
// used with bufio.Scanner.Split(). It is very similar to bufio.ScanLines,
59+
// except that it will truncate lines that are longer than the scanner's read
60+
// buffer (whereas bufio.ScanLines will return an error in that case, which is
61+
// often difficult to handle).
62+
//
63+
// If you are using your own buffer for the scanner, you must set maxBufferSize
64+
// to the same value as the max parameter that you passed to scanner.Buffer().
65+
// Otherwise, maxBufferSize must be set to bufio.MaxScanTokenSize.
66+
func ScanLinesAndTruncateWhenLongerThanBuffer(maxBufferSize int) func(data []byte, atEOF bool) (int, []byte, error) {
67+
skipOverRemainderOfLongLine := false
68+
69+
return func(data []byte, atEOF bool) (int, []byte, error) {
70+
if atEOF && len(data) == 0 {
71+
// Done
72+
return 0, nil, nil
73+
}
74+
if i := bytes.IndexByte(data, '\n'); i >= 0 {
75+
if skipOverRemainderOfLongLine {
76+
skipOverRemainderOfLongLine = false
77+
return i + 1, nil, nil
78+
}
79+
return i + 1, dropCR(data[0:i]), nil
80+
}
81+
if atEOF {
82+
if skipOverRemainderOfLongLine {
83+
return len(data), nil, nil
84+
}
85+
86+
return len(data), dropCR(data), nil
87+
}
88+
89+
// Buffer is full, so we can't get more data
90+
if len(data) >= maxBufferSize {
91+
if skipOverRemainderOfLongLine {
92+
return len(data), nil, nil
93+
}
94+
95+
skipOverRemainderOfLongLine = true
96+
return len(data), data, nil
97+
}
98+
99+
// Request more data.
100+
return 0, nil, nil
101+
}
102+
}

pkg/utils/lines_test.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package utils
22

33
import (
4+
"bufio"
5+
"strings"
46
"testing"
57

68
"github.com/stretchr/testify/assert"
@@ -100,3 +102,65 @@ func TestNormalizeLinefeeds(t *testing.T) {
100102
assert.EqualValues(t, string(s.expected), NormalizeLinefeeds(string(s.byteArray)))
101103
}
102104
}
105+
106+
func TestScanLinesAndTruncateWhenLongerThanBuffer(t *testing.T) {
107+
type scenario struct {
108+
input string
109+
expectedLines []string
110+
}
111+
112+
scenarios := []scenario{
113+
{
114+
"",
115+
[]string{},
116+
},
117+
{
118+
"\n",
119+
[]string{""},
120+
},
121+
{
122+
"abc",
123+
[]string{"abc"},
124+
},
125+
{
126+
"abc\ndef",
127+
[]string{"abc", "def"},
128+
},
129+
{
130+
"abc\n\ndef",
131+
[]string{"abc", "", "def"},
132+
},
133+
{
134+
"abc\r\ndef\r",
135+
[]string{"abc", "def"},
136+
},
137+
{
138+
"abcdef",
139+
[]string{"abcde"},
140+
},
141+
{
142+
"abcdef\n",
143+
[]string{"abcde"},
144+
},
145+
{
146+
"abcdef\nghijkl\nx",
147+
[]string{"abcde", "ghijk", "x"},
148+
},
149+
{
150+
"abc\ndefghijklmnopqrstuvw\nx",
151+
[]string{"abc", "defgh", "x"},
152+
},
153+
}
154+
155+
for _, s := range scenarios {
156+
scanner := bufio.NewScanner(strings.NewReader(s.input))
157+
scanner.Buffer(make([]byte, 5), 5)
158+
scanner.Split(ScanLinesAndTruncateWhenLongerThanBuffer(5))
159+
result := []string{}
160+
for scanner.Scan() {
161+
result = append(result, scanner.Text())
162+
}
163+
assert.NoError(t, scanner.Err())
164+
assert.EqualValues(t, s.expectedLines, result)
165+
}
166+
}

0 commit comments

Comments
 (0)