summaryrefslogtreecommitdiffstats
path: root/pkg
diff options
context:
space:
mode:
authorStefan Haller <stefan@haller-berlin.de>2024-04-27 17:16:06 +0200
committerStefan Haller <stefan@haller-berlin.de>2024-05-15 13:27:01 +0200
commit66d0ce841c9d89bbca3663e4c51b59120838d074 (patch)
tree7f8f401e059c08f6ddabf9fafae73572b7681016 /pkg
parent6bb8c180b2c44911775cd045b8c90a916de738d9 (diff)
Implement ScanLinesAndTruncateWhenLongerThanBuffer
Diffstat (limited to 'pkg')
-rw-r--r--pkg/utils/lines.go59
-rw-r--r--pkg/utils/lines_test.go64
2 files changed, 122 insertions, 1 deletions
diff --git a/pkg/utils/lines.go b/pkg/utils/lines.go
index 662ba2f9b..c70d02ffc 100644
--- a/pkg/utils/lines.go
+++ b/pkg/utils/lines.go
@@ -1,6 +1,9 @@
package utils
-import "strings"
+import (
+ "bytes"
+ "strings"
+)
// SplitLines takes a multiline string and splits it on newlines
// currently we are also stripping \r's which may have adverse effects for
@@ -43,3 +46,57 @@ func EscapeSpecialChars(str string) string {
"\v", "\\v",
).Replace(str)
}
+
+func dropCR(data []byte) []byte {
+ if len(data) > 0 && data[len(data)-1] == '\r' {
+ return data[0 : len(data)-1]
+ }
+ return data
+}
+
+// ScanLinesAndTruncateWhenLongerThanBuffer returns a split function that can be
+// used with bufio.Scanner.Split(). It is very similar to bufio.ScanLines,
+// except that it will truncate lines that are longer than the scanner's read
+// buffer (whereas bufio.ScanLines will return an error in that case, which is
+// often difficult to handle).
+//
+// If you are using your own buffer for the scanner, you must set maxBufferSize
+// to the same value as the max parameter that you passed to scanner.Buffer().
+// Otherwise, maxBufferSize must be set to bufio.MaxScanTokenSize.
+func ScanLinesAndTruncateWhenLongerThanBuffer(maxBufferSize int) func(data []byte, atEOF bool) (int, []byte, error) {
+ skipOverRemainderOfLongLine := false
+
+ return func(data []byte, atEOF bool) (int, []byte, error) {
+ if atEOF && len(data) == 0 {
+ // Done
+ return 0, nil, nil
+ }
+ if i := bytes.IndexByte(data, '\n'); i >= 0 {
+ if skipOverRemainderOfLongLine {
+ skipOverRemainderOfLongLine = false
+ return i + 1, nil, nil
+ }
+ return i + 1, dropCR(data[0:i]), nil
+ }
+ if atEOF {
+ if skipOverRemainderOfLongLine {
+ return len(data), nil, nil
+ }
+
+ return len(data), dropCR(data), nil
+ }
+
+ // Buffer is full, so we can't get more data
+ if len(data) >= maxBufferSize {
+ if skipOverRemainderOfLongLine {
+ return len(data), nil, nil
+ }
+
+ skipOverRemainderOfLongLine = true
+ return len(data), data, nil
+ }
+
+ // Request more data.
+ return 0, nil, nil
+ }
+}
diff --git a/pkg/utils/lines_test.go b/pkg/utils/lines_test.go
index e7171022b..2192a3780 100644
--- a/pkg/utils/lines_test.go
+++ b/pkg/utils/lines_test.go
@@ -1,6 +1,8 @@
package utils
import (
+ "bufio"
+ "strings"
"testing"
"github.com/stretchr/testify/assert"
@@ -100,3 +102,65 @@ func TestNormalizeLinefeeds(t *testing.T) {
assert.EqualValues(t, string(s.expected), NormalizeLinefeeds(string(s.byteArray)))
}
}
+
+func TestScanLinesAndTruncateWhenLongerThanBuffer(t *testing.T) {
+ type scenario struct {
+ input string
+ expectedLines []string
+ }
+
+ scenarios := []scenario{
+ {
+ "",
+ []string{},
+ },
+ {
+ "\n",
+ []string{""},
+ },
+ {
+ "abc",
+ []string{"abc"},
+ },
+ {
+ "abc\ndef",
+ []string{"abc", "def"},
+ },
+ {
+ "abc\n\ndef",
+ []string{"abc", "", "def"},
+ },
+ {
+ "abc\r\ndef\r",
+ []string{"abc", "def"},
+ },
+ {
+ "abcdef",
+ []string{"abcde"},
+ },
+ {
+ "abcdef\n",
+ []string{"abcde"},
+ },
+ {
+ "abcdef\nghijkl\nx",
+ []string{"abcde", "ghijk", "x"},
+ },
+ {
+ "abc\ndefghijklmnopqrstuvw\nx",
+ []string{"abc", "defgh", "x"},
+ },
+ }
+
+ for _, s := range scenarios {
+ scanner := bufio.NewScanner(strings.NewReader(s.input))
+ scanner.Buffer(make([]byte, 5), 5)
+ scanner.Split(ScanLinesAndTruncateWhenLongerThanBuffer(5))
+ result := []string{}
+ for scanner.Scan() {
+ result = append(result, scanner.Text())
+ }
+ assert.NoError(t, scanner.Err())
+ assert.EqualValues(t, s.expectedLines, result)
+ }
+}