summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCharlie Vieth <charlie.vieth@gmail.com>2021-03-11 05:34:50 -0500
committerGitHub <noreply@github.com>2021-03-11 19:34:50 +0900
commit5a874ae241af64368e65b667592918b3fdb17177 (patch)
treeba500615d50512a74ad9c59cd29b0dd0edde4e31
parentf4e1ed25f224d0ebb4e6bd27c4a012a42c1ccfcd (diff)
Speed up ANSI code processing (#2368)
This commit speeds up the parsing/processing of ANSI escape codes by roughly 7.5x. The speedup is mostly accomplished by replacing the regex with dedicated parsing logic (nextAnsiEscapeSequence()) and reducing the number of allocations in extractColor(). #### Benchmarks ``` name old time/op new time/op delta ExtractColor-16 4.89µs ± 5% 0.64µs ± 2% -86.87% (p=0.000 n=9+9) name old speed new speed delta ExtractColor-16 25.6MB/s ± 5% 194.6MB/s ± 2% +661.43% (p=0.000 n=9+9) name old alloc/op new alloc/op delta ExtractColor-16 1.37kB ± 0% 0.31kB ± 0% -77.31% (p=0.000 n=10+10) name old allocs/op new allocs/op delta ExtractColor-16 48.0 ± 0% 4.0 ± 0% -91.67% (p=0.000 n=10+10) ```
-rw-r--r--Makefile5
-rw-r--r--src/ansi.go263
-rw-r--r--src/ansi_test.go239
3 files changed, 431 insertions, 76 deletions
diff --git a/Makefile b/Makefile
index f246508f..8b37ef9c 100644
--- a/Makefile
+++ b/Makefile
@@ -73,6 +73,9 @@ test: $(SOURCES)
github.com/junegunn/fzf/src/tui \
github.com/junegunn/fzf/src/util
+bench:
+ cd src && SHELL=/bin/sh GOOS= $(GO) test -v -tags "$(TAGS)" -run=Bench -bench=. -benchmem
+
install: bin/fzf
build:
@@ -153,4 +156,4 @@ update:
$(GO) get -u
$(GO) mod tidy
-.PHONY: all build release test install clean docker docker-test update
+.PHONY: all build release test bench install clean docker docker-test update
diff --git a/src/ansi.go b/src/ansi.go
index b4f98f6d..a67ac0b9 100644
--- a/src/ansi.go
+++ b/src/ansi.go
@@ -1,8 +1,6 @@
package fzf
import (
- "bytes"
- "regexp"
"strconv"
"strings"
"unicode/utf8"
@@ -82,73 +80,154 @@ func toAnsiString(color tui.Color, offset int) string {
return ret + ";"
}
-var ansiRegex *regexp.Regexp
-
-func init() {
- /*
- References:
- - https://github.com/gnachman/iTerm2
- - http://ascii-table.com/ansi-escape-sequences.php
- - http://ascii-table.com/ansi-escape-sequences-vt-100.php
- - http://tldp.org/HOWTO/Bash-Prompt-HOWTO/x405.html
- - https://invisible-island.net/xterm/ctlseqs/ctlseqs.html
- */
- // The following regular expression will include not all but most of the
- // frequently used ANSI sequences
- ansiRegex = regexp.MustCompile("(?:\x1b[\\[()][0-9;]*[a-zA-Z@]|\x1b][0-9];[[:print:]]+(?:\x1b\\\\|\x07)|\x1b.|[\x0e\x0f]|.\x08)")
+func isPrint(c uint8) bool {
+ return '\x20' <= c && c <= '\x7e'
}
-func findAnsiStart(str string) int {
- idx := 0
- for ; idx < len(str); idx++ {
- b := str[idx]
- if b == 0x1b || b == 0x0e || b == 0x0f {
- return idx
+func matchOperatingSystemCommand(s string) int {
+ // `\x1b][0-9];[[:print:]]+(?:\x1b\\\\|\x07)`
+ // ^ match starting here
+ //
+ i := 5 // prefix matched in nextAnsiEscapeSequence()
+ for ; i < len(s) && isPrint(s[i]); i++ {
+ }
+ if i < len(s) {
+ if s[i] == '\x07' {
+ return i + 1
+ }
+ if s[i] == '\x1b' && i < len(s)-1 && s[i+1] == '\\' {
+ return i + 2
}
- if b == 0x08 && idx > 0 {
- return idx - 1
+ }
+ return -1
+}
+
+func matchControlSequence(s string) int {
+ // `\x1b[\\[()][0-9;]*[a-zA-Z@]`
+ // ^ match starting here
+ //
+ i := 2 // prefix matched in nextAnsiEscapeSequence()
+ for ; i < len(s) && (isNumeric(s[i]) || s[i] == ';'); i++ {
+ }
+ if i < len(s) {
+ c := s[i]
+ if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '@' {
+ return i + 1
}
}
- return idx
+ return -1
+}
+
+func isCtrlSeqStart(c uint8) bool {
+ return c == '\\' || c == '[' || c == '(' || c == ')'
+}
+
+// nextAnsiEscapeSequence returns the ANSI escape sequence and is equivalent to
+// calling FindStringIndex() on the below regex (which was originally used):
+//
+// "(?:\x1b[\\[()][0-9;]*[a-zA-Z@]|\x1b][0-9];[[:print:]]+(?:\x1b\\\\|\x07)|\x1b.|[\x0e\x0f]|.\x08)"
+//
+func nextAnsiEscapeSequence(s string) (int, int) {
+ // fast check for ANSI escape sequences
+ i := 0
+ for ; i < len(s); i++ {
+ switch s[i] {
+ case '\x0e', '\x0f', '\x1b', '\x08':
+ // We ignore the fact that '\x08' cannot be the first char
+ // in the string and be an escape sequence for the sake of
+ // speed and simplicity.
+ goto Loop
+ }
+ }
+ return -1, -1
+
+Loop:
+ for ; i < len(s); i++ {
+ switch s[i] {
+ case '\x08':
+ // backtrack to match: `.\x08`
+ if i > 0 && s[i-1] != '\n' {
+ if s[i-1] < utf8.RuneSelf {
+ return i - 1, i + 1
+ }
+ _, n := utf8.DecodeLastRuneInString(s[:i])
+ return i - n, i + 1
+ }
+ case '\x1b':
+ // match: `\x1b[\\[()][0-9;]*[a-zA-Z@]`
+ if i+2 < len(s) && isCtrlSeqStart(s[i+1]) {
+ if j := matchControlSequence(s[i:]); j != -1 {
+ return i, i + j
+ }
+ }
+
+ // match: `\x1b][0-9];[[:print:]]+(?:\x1b\\\\|\x07)`
+ if i+5 < len(s) && s[i+1] == ']' && isNumeric(s[i+2]) &&
+ s[i+3] == ';' && isPrint(s[i+4]) {
+
+ if j := matchOperatingSystemCommand(s[i:]); j != -1 {
+ return i, i + j
+ }
+ }
+
+ // match: `\x1b.`
+ if i+1 < len(s) && s[i+1] != '\n' {
+ if s[i+1] < utf8.RuneSelf {
+ return i, i + 2
+ }
+ _, n := utf8.DecodeRuneInString(s[i+1:])
+ return i, i + n + 1
+ }
+ case '\x0e', '\x0f':
+ // match: `[\x0e\x0f]`
+ return i, i + 1
+ }
+ }
+ return -1, -1
}
func extractColor(str string, state *ansiState, proc func(string, *ansiState) bool) (string, *[]ansiOffset, *ansiState) {
- var offsets []ansiOffset
- var output bytes.Buffer
+ // We append to a stack allocated variable that we'll
+ // later copy and return, to save on allocations.
+ offsets := make([]ansiOffset, 0, 32)
if state != nil {
offsets = append(offsets, ansiOffset{[2]int32{0, 0}, *state})
}
- prevIdx := 0
- runeCount := 0
+ var (
+ pstate *ansiState // lazily allocated
+ output strings.Builder
+ prevIdx int
+ runeCount int
+ )
for idx := 0; idx < len(str); {
- idx += findAnsiStart(str[idx:])
- if idx == len(str) {
- break
- }
-
// Make sure that we found an ANSI code
- offset := ansiRegex.FindStringIndex(str[idx:])
- if len(offset) < 2 {
- idx++
- continue
+ start, end := nextAnsiEscapeSequence(str[idx:])
+ if start == -1 {
+ break
}
- offset[0] += idx
- offset[1] += idx
- idx = offset[1]
+ start += idx
+ idx += end
// Check if we should continue
- prev := str[prevIdx:offset[0]]
+ prev := str[prevIdx:start]
if proc != nil && !proc(prev, state) {
return "", nil, nil
}
+ prevIdx = idx
- prevIdx = offset[1]
- runeCount += utf8.RuneCountInString(prev)
- output.WriteString(prev)
+ if len(prev) != 0 {
+ runeCount += utf8.RuneCountInString(prev)
+ // Grow the buffer size to the maximum possible length (string length
+ // containing ansi codes) to avoid repetitive allocation
+ if output.Cap() == 0 {
+ output.Grow(len(str))
+ }
+ output.WriteString(prev)
+ }
- newState := interpretCode(str[offset[0]:offset[1]], state)
+ newState := interpretCode(str[start:idx], state)
if !newState.equals(state) {
if state != nil {
// Update last offset
@@ -157,8 +236,15 @@ func extractColor(str string, state *ansiState, proc func(string, *ansiState) bo
if newState.colored() {
// Append new offset
- state = newState
- offsets = append(offsets, ansiOffset{[2]int32{int32(runeCount), int32(runeCount)}, *state})
+ if pstate == nil {
+ pstate = &ansiState{}
+ }
+ *pstate = newState
+ state = pstate
+ offsets = append(offsets, ansiOffset{
+ [2]int32{int32(runeCount), int32(runeCount)},
+ newState,
+ })
} else {
// Discard state
state = nil
@@ -168,7 +254,6 @@ func extractColor(str string, state *ansiState, proc func(string, *ansiState) bo
var rest string
var trimmed string
-
if prevIdx == 0 {
// No ANSI code found
rest = str
@@ -178,51 +263,75 @@ func extractColor(str string, state *ansiState, proc func(string, *ansiState) bo
output.WriteString(rest)
trimmed = output.String()
}
- if len(rest) > 0 && state != nil {
- // Update last offset
- runeCount += utf8.RuneCountInString(rest)
- (&offsets[len(offsets)-1]).offset[1] = int32(runeCount)
- }
if proc != nil {
proc(rest, state)
}
- if len(offsets) == 0 {
- return trimmed, nil, state
+ if len(offsets) > 0 {
+ if len(rest) > 0 && state != nil {
+ // Update last offset
+ runeCount += utf8.RuneCountInString(rest)
+ (&offsets[len(offsets)-1]).offset[1] = int32(runeCount)
+ }
+ // Return a copy of the offsets slice
+ a := make([]ansiOffset, len(offsets))
+ copy(a, offsets)
+ return trimmed, &a, state
}
- return trimmed, &offsets, state
+ return trimmed, nil, state
+}
+
+func parseAnsiCode(s string) (int, string) {
+ var remaining string
+ if i := strings.IndexByte(s, ';'); i >= 0 {
+ remaining = s[i+1:]
+ s = s[:i]
+ }
+
+ if len(s) > 0 {
+ // Inlined version of strconv.Atoi() that only handles positive
+ // integers and does not allocate on error.
+ code := 0
+ for _, ch := range []byte(s) {
+ ch -= '0'
+ if ch > 9 {
+ return -1, remaining
+ }
+ code = code*10 + int(ch)
+ }
+ return code, remaining
+ }
+
+ return -1, remaining
}
-func interpretCode(ansiCode string, prevState *ansiState) *ansiState {
- // State
- var state *ansiState
+func interpretCode(ansiCode string, prevState *ansiState) ansiState {
+ var state ansiState
if prevState == nil {
- state = &ansiState{-1, -1, 0, -1}
+ state = ansiState{-1, -1, 0, -1}
} else {
- state = &ansiState{prevState.fg, prevState.bg, prevState.attr, prevState.lbg}
+ state = ansiState{prevState.fg, prevState.bg, prevState.attr, prevState.lbg}
}
if ansiCode[0] != '\x1b' || ansiCode[1] != '[' || ansiCode[len(ansiCode)-1] != 'm' {
- if strings.HasSuffix(ansiCode, "0K") && prevState != nil {
+ if prevState != nil && strings.HasSuffix(ansiCode, "0K") {
state.lbg = prevState.bg
}
return state
}
- ptr := &state.fg
- state256 := 0
-
- init := func() {
+ if len(ansiCode) <= 3 {
state.fg = -1
state.bg = -1
state.attr = 0
- state256 = 0
+ return state
}
-
ansiCode = ansiCode[2 : len(ansiCode)-1]
- if len(ansiCode) == 0 {
- init()
- }
- for _, code := range strings.Split(ansiCode, ";") {
- if num, err := strconv.Atoi(code); err == nil {
+
+ state256 := 0
+ ptr := &state.fg
+
+ for len(ansiCode) != 0 {
+ var num int
+ if num, ansiCode = parseAnsiCode(ansiCode); num != -1 {
switch state256 {
case 0:
switch num {
@@ -253,7 +362,10 @@ func interpretCode(ansiCode string, prevState *ansiState) *ansiState {
case 24: // tput rmul
state.attr = state.attr &^ tui.Underline
case 0:
- init()
+ state.fg = -1
+ state.bg = -1
+ state.attr = 0
+ state256 = 0
default:
if num >= 30 && num <= 37 {
state.fg = tui.Color(num - 30)
@@ -289,6 +401,7 @@ func interpretCode(ansiCode string, prevState *ansiState) *ansiState {
}
}
}
+
if state256 > 0 {
*ptr = -1
}
diff --git a/src/ansi_test.go b/src/ansi_test.go
index 497eca44..36ed0c7d 100644
--- a/src/ansi_test.go
+++ b/src/ansi_test.go
@@ -2,12 +2,190 @@ package fzf
import (
"fmt"
+ "math/rand"
+ "regexp"
"strings"
"testing"
+ "unicode/utf8"
"github.com/junegunn/fzf/src/tui"
)
+// The following regular expression will include not all but most of the
+// frequently used ANSI sequences. This regex is used as a reference for
+// testing nextAnsiEscapeSequence().
+//
+// References:
+// - https://github.com/gnachman/iTerm2
+// - http://ascii-table.com/ansi-escape-sequences.php
+// - http://ascii-table.com/ansi-escape-sequences-vt-100.php
+// - http://tldp.org/HOWTO/Bash-Prompt-HOWTO/x405.html
+// - https://invisible-island.net/xterm/ctlseqs/ctlseqs.html
+var ansiRegexRefence = regexp.MustCompile("(?:\x1b[\\[()][0-9;]*[a-zA-Z@]|\x1b][0-9];[[:print:]]+(?:\x1b\\\\|\x07)|\x1b.|[\x0e\x0f]|.\x08)")
+
+func testParserReference(t testing.TB, str string) {
+ t.Helper()
+
+ toSlice := func(start, end int) []int {
+ if start == -1 {
+ return nil
+ }
+ return []int{start, end}
+ }
+
+ s := str
+ for i := 0; ; i++ {
+ got := toSlice(nextAnsiEscapeSequence(s))
+ exp := ansiRegexRefence.FindStringIndex(s)
+
+ equal := len(got) == len(exp)
+ if equal {
+ for i := 0; i < len(got); i++ {
+ if got[i] != exp[i] {
+ equal = false
+ break
+ }
+ }
+ }
+ if !equal {
+ var exps, gots []rune
+ if len(got) == 2 {
+ gots = []rune(s[got[0]:got[1]])
+ }
+ if len(exp) == 2 {
+ exps = []rune(s[exp[0]:exp[1]])
+ }
+ t.Errorf("%d: %q: got: %v (%q) want: %v (%q)", i, s, got, gots, exp, exps)
+ return
+ }
+ if len(exp) == 0 {
+ return
+ }
+ s = s[exp[1]:]
+ }
+}
+
+func TestNextAnsiEscapeSequence(t *testing.T) {
+ testStrs := []string{
+ "\x1b[0mhello world",
+ "\x1b[1mhello world",
+ "椙\x1b[1m椙",
+ "椙\x1b[1椙m椙",
+ "\x1b[1mhello \x1b[mw\x1b7o\x1b8r\x1b(Bl\x1b[2@d",
+ "\x1b[1mhello \x1b[Kworld",
+ "hello \x1b[34;45;1mworld",
+ "hello \x1b[34;45;1mwor\x1b[34;45;1mld",
+ "hello \x1b[34;45;1mwor\x1b[0mld",
+ "hello \x1b[34;48;5;233;1mwo\x1b[38;5;161mr\x1b[0ml\x1b[38;5;161md",
+ "hello \x1b[38;5;38;48;5;48;1mwor\x1b[38;5;48;48;5;38ml\x1b[0md",
+ "hello \x1b[32;1mworld",
+ "hello world",
+ "hello \x1b[0;38;5;200;48;5;100mworld",
+ "\x1b椙",
+ "椙\x08",
+ "\n\x08",
+ "X\x08",
+ "",
+ "\x1b]4;3;rgb:aa/bb/cc\x07 ",
+ "\x1b]4;3;rgb:aa/bb/cc\x1b\\ ",
+ ansiBenchmarkString,
+ }
+
+ for _, s := range testStrs {
+ testParserReference(t, s)
+ }
+}
+
+func TestNextAnsiEscapeSequence_Fuzz_Modified(t *testing.T) {
+ t.Parallel()
+ if testing.Short() {
+ t.Skip("short test")
+ }
+
+ testStrs := []string{
+ "\x1b[0mhello world",
+ "\x1b[1mhello world",
+ "椙\x1b[1m椙",
+ "椙\x1b[1椙m椙",
+ "\x1b[1mhello \x1b[mw\x1b7o\x1b8r\x1b(Bl\x1b[2@d",
+ "\x1b[1mhello \x1b[Kworld",
+ "hello \x1b[34;45;1mworld",
+ "hello \x1b[34;45;1mwor\x1b[34;45;1mld",
+ "hello \x1b[34;45;1mwor\x1b[0mld",
+ "hello \x1b[34;48;5;233;1mwo\x1b[38;5;161mr\x1b[0ml\x1b[38;5;161md",
+ "hello \x1b[38;5;38;48;5;48;1mwor\x1b[38;5;48;48;5;38ml\x1b[0md",
+ "hello \x1b[32;1mworld",
+ "hello world",
+ "hello \x1b[0;38;5;200;48;5;100mworld",
+ ansiBenchmarkString,
+ }
+
+ replacementBytes := [...]rune{'\x0e', '\x0f', '\x1b', '\x08'}
+
+ modifyString := func(s string, rr *rand.Rand) string {
+ n := rr.Intn(len(s))
+ b := []rune(s)
+ for ; n >= 0 && len(b) != 0; n-- {
+ i := rr.Intn(len(b))
+ switch x := rr.Intn(4); x {
+ case 0:
+ b = append(b[:i], b[i+1:]...)
+ case 1:
+ j := rr.Intn(len(replacementBytes) - 1)
+ b[i] = replacementBytes[j]
+ case 2:
+ x := rune(rr.Intn(utf8.MaxRune))
+ for !utf8.ValidRune(x) {
+ x = rune(rr.Intn(utf8.MaxRune))
+ }
+ b[i] = x
+ case 3:
+ b[i] = rune(rr.Intn(utf8.MaxRune)) // potentially invalid
+ default:
+ t.Fatalf("unsupported value: %d", x)
+ }
+ }
+ return string(b)
+ }
+
+ rr := rand.New(rand.NewSource(1))
+ for _, s := range testStrs {
+ for i := 1_000; i >= 0; i-- {
+ testParserReference(t, modifyString(s, rr))
+ }
+ }
+}
+
+func TestNextAnsiEscapeSequence_Fuzz_Random(t *testing.T) {
+ t.Parallel()
+
+ if testing.Short() {
+ t.Skip("short test")
+ }
+
+ randomString := func(rr *rand.Rand) string {
+ numChars := rand.Intn(50)
+ codePoints := make([]rune, numChars)
+ for i := 0; i < len(codePoints); i++ {
+ var r rune
+ for n := 0; n < 1000; n++ {
+ r = rune(rr.Intn(utf8.MaxRune))
+ // Allow 10% of runes to be invalid
+ if utf8.ValidRune(r) || rr.Float64() < 0.10 {
+ break
+ }
+ }
+ codePoints[i] = r
+ }
+ return string(codePoints)
+ }
+
+ rr := rand.New(rand.NewSource(1))
+ for i := 0; i < 100_000; i++ {
+ testParserReference(t, randomString(rr))
+ }
+}
+
func TestExtractColor(t *testing.T) {
assert := func(offset ansiOffset, b int32, e int32, fg tui.Color, bg tui.Color, bold bool) {
var attr tui.Attr
@@ -185,3 +363,64 @@ func TestAnsiCodeStringConversion(t *testing.T) {
&ansiState{attr: tui.Dim | tui.Italic, fg: 1, bg: 1},
"\x1b[2;3;7;38;2;10;20;30;48;5;100m")
}
+
+func TestParseAnsiCode(t *testing.T) {
+ tests := []struct {
+ In, Exp string
+ N int
+ }{
+ {"123", "", 123},
+ {"1a", "", -1},
+ {"1a;12", "12", -1},
+ {"12;a", "a", 12},
+ {"-2", "", -1},
+ }
+ for _, x := range tests {
+ n, s := parseAnsiCode(x.In)
+ if n != x.N || s != x.Exp {
+ t.Fatalf("%q: got: (%d %q) want: (%d %q)", x.In, n, s, x.N, x.Exp)
+ }
+ }
+}
+
+// kernel/bpf/preload/iterators/README
+const ansiBenchmarkString = "\x1b[38;5;81m\x1b[01;31m\x1b[Kkernel/\x1b[0m\x1b[38;5;81mbpf/" +
+ "\x1b[0m\x1b[38;5;81mpreload/\x1b[0m\x1b[38;5;81miterators/" +
+ "\x1b[0m\x1b[38;5;149mMakefile\x1b[m\x1b[K\x1b[0m"
+
+func BenchmarkNextAnsiEscapeSequence(b *testing.B) {
+ b.SetBytes(int64(len(ansiBenchmarkString)))
+ for i := 0; i < b.N; i++ {
+ s := ansiBenchmarkString
+ for {
+ _, o := nextAnsiEscapeSequence(s)
+ if o == -1 {
+ break
+ }
+ s = s[o:]
+ }
+ }
+}
+
+// Baseline test to compare the speed of nextAnsiEscapeSequence() to the
+// previously used regex based implementation.
+func BenchmarkNextAnsiEscapeSequence_Regex(b *testing.B) {
+ b.SetBytes(int64(len(ansiBenchmarkString)))
+ for i := 0; i < b.N; i++ {
+ s := ansiBenchmarkString
+ for {
+ a := ansiRegexRefence.FindStringIndex(s)
+ if len(a) == 0 {
+ break
+ }
+ s = s[a[1]:]
+ }
+ }
+}
+
+func BenchmarkExtractColor(b *testing.B) {
+ b.SetBytes(int64(len(ansiBenchmarkString)))
+ for i := 0; i < b.N; i++ {
+ extractColor(ansiBenchmarkString, nil, nil)
+ }
+}