diff options
author | Charlie Vieth <charlie.vieth@gmail.com> | 2021-03-11 05:34:50 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-11 19:34:50 +0900 |
commit | 5a874ae241af64368e65b667592918b3fdb17177 (patch) | |
tree | ba500615d50512a74ad9c59cd29b0dd0edde4e31 /src/ansi.go | |
parent | f4e1ed25f224d0ebb4e6bd27c4a012a42c1ccfcd (diff) |
Speed up ANSI code processing (#2368)
This commit speeds up the parsing/processing of ANSI escape codes by
roughly 7.5x. The speedup is mostly accomplished by replacing the regex
with dedicated parsing logic (nextAnsiEscapeSequence()) and reducing the
number of allocations in extractColor().
#### Benchmarks
```
name old time/op new time/op delta
ExtractColor-16 4.89µs ± 5% 0.64µs ± 2% -86.87% (p=0.000 n=9+9)
name old speed new speed delta
ExtractColor-16 25.6MB/s ± 5% 194.6MB/s ± 2% +661.43% (p=0.000 n=9+9)
name old alloc/op new alloc/op delta
ExtractColor-16 1.37kB ± 0% 0.31kB ± 0% -77.31% (p=0.000 n=10+10)
name old allocs/op new allocs/op delta
ExtractColor-16 48.0 ± 0% 4.0 ± 0% -91.67% (p=0.000 n=10+10)
```
Diffstat (limited to 'src/ansi.go')
-rw-r--r-- | src/ansi.go | 263 |
1 files changed, 188 insertions, 75 deletions
diff --git a/src/ansi.go b/src/ansi.go index b4f98f6d..a67ac0b9 100644 --- a/src/ansi.go +++ b/src/ansi.go @@ -1,8 +1,6 @@ package fzf import ( - "bytes" - "regexp" "strconv" "strings" "unicode/utf8" @@ -82,73 +80,154 @@ func toAnsiString(color tui.Color, offset int) string { return ret + ";" } -var ansiRegex *regexp.Regexp - -func init() { - /* - References: - - https://github.com/gnachman/iTerm2 - - http://ascii-table.com/ansi-escape-sequences.php - - http://ascii-table.com/ansi-escape-sequences-vt-100.php - - http://tldp.org/HOWTO/Bash-Prompt-HOWTO/x405.html - - https://invisible-island.net/xterm/ctlseqs/ctlseqs.html - */ - // The following regular expression will include not all but most of the - // frequently used ANSI sequences - ansiRegex = regexp.MustCompile("(?:\x1b[\\[()][0-9;]*[a-zA-Z@]|\x1b][0-9];[[:print:]]+(?:\x1b\\\\|\x07)|\x1b.|[\x0e\x0f]|.\x08)") +func isPrint(c uint8) bool { + return '\x20' <= c && c <= '\x7e' } -func findAnsiStart(str string) int { - idx := 0 - for ; idx < len(str); idx++ { - b := str[idx] - if b == 0x1b || b == 0x0e || b == 0x0f { - return idx +func matchOperatingSystemCommand(s string) int { + // `\x1b][0-9];[[:print:]]+(?:\x1b\\\\|\x07)` + // ^ match starting here + // + i := 5 // prefix matched in nextAnsiEscapeSequence() + for ; i < len(s) && isPrint(s[i]); i++ { + } + if i < len(s) { + if s[i] == '\x07' { + return i + 1 + } + if s[i] == '\x1b' && i < len(s)-1 && s[i+1] == '\\' { + return i + 2 } - if b == 0x08 && idx > 0 { - return idx - 1 + } + return -1 +} + +func matchControlSequence(s string) int { + // `\x1b[\\[()][0-9;]*[a-zA-Z@]` + // ^ match starting here + // + i := 2 // prefix matched in nextAnsiEscapeSequence() + for ; i < len(s) && (isNumeric(s[i]) || s[i] == ';'); i++ { + } + if i < len(s) { + c := s[i] + if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '@' { + return i + 1 } } - return idx + return -1 +} + +func isCtrlSeqStart(c uint8) bool { + return c == '\\' || c == '[' || c == '(' || c == ')' +} + +// nextAnsiEscapeSequence returns the ANSI escape sequence and is equivalent to +// calling FindStringIndex() on the below regex (which was originally used): +// +// "(?:\x1b[\\[()][0-9;]*[a-zA-Z@]|\x1b][0-9];[[:print:]]+(?:\x1b\\\\|\x07)|\x1b.|[\x0e\x0f]|.\x08)" +// +func nextAnsiEscapeSequence(s string) (int, int) { + // fast check for ANSI escape sequences + i := 0 + for ; i < len(s); i++ { + switch s[i] { + case '\x0e', '\x0f', '\x1b', '\x08': + // We ignore the fact that '\x08' cannot be the first char + // in the string and be an escape sequence for the sake of + // speed and simplicity. + goto Loop + } + } + return -1, -1 + +Loop: + for ; i < len(s); i++ { + switch s[i] { + case '\x08': + // backtrack to match: `.\x08` + if i > 0 && s[i-1] != '\n' { + if s[i-1] < utf8.RuneSelf { + return i - 1, i + 1 + } + _, n := utf8.DecodeLastRuneInString(s[:i]) + return i - n, i + 1 + } + case '\x1b': + // match: `\x1b[\\[()][0-9;]*[a-zA-Z@]` + if i+2 < len(s) && isCtrlSeqStart(s[i+1]) { + if j := matchControlSequence(s[i:]); j != -1 { + return i, i + j + } + } + + // match: `\x1b][0-9];[[:print:]]+(?:\x1b\\\\|\x07)` + if i+5 < len(s) && s[i+1] == ']' && isNumeric(s[i+2]) && + s[i+3] == ';' && isPrint(s[i+4]) { + + if j := matchOperatingSystemCommand(s[i:]); j != -1 { + return i, i + j + } + } + + // match: `\x1b.` + if i+1 < len(s) && s[i+1] != '\n' { + if s[i+1] < utf8.RuneSelf { + return i, i + 2 + } + _, n := utf8.DecodeRuneInString(s[i+1:]) + return i, i + n + 1 + } + case '\x0e', '\x0f': + // match: `[\x0e\x0f]` + return i, i + 1 + } + } + return -1, -1 } func extractColor(str string, state *ansiState, proc func(string, *ansiState) bool) (string, *[]ansiOffset, *ansiState) { - var offsets []ansiOffset - var output bytes.Buffer + // We append to a stack allocated variable that we'll + // later copy and return, to save on allocations. + offsets := make([]ansiOffset, 0, 32) if state != nil { offsets = append(offsets, ansiOffset{[2]int32{0, 0}, *state}) } - prevIdx := 0 - runeCount := 0 + var ( + pstate *ansiState // lazily allocated + output strings.Builder + prevIdx int + runeCount int + ) for idx := 0; idx < len(str); { - idx += findAnsiStart(str[idx:]) - if idx == len(str) { - break - } - // Make sure that we found an ANSI code - offset := ansiRegex.FindStringIndex(str[idx:]) - if len(offset) < 2 { - idx++ - continue + start, end := nextAnsiEscapeSequence(str[idx:]) + if start == -1 { + break } - offset[0] += idx - offset[1] += idx - idx = offset[1] + start += idx + idx += end // Check if we should continue - prev := str[prevIdx:offset[0]] + prev := str[prevIdx:start] if proc != nil && !proc(prev, state) { return "", nil, nil } + prevIdx = idx - prevIdx = offset[1] - runeCount += utf8.RuneCountInString(prev) - output.WriteString(prev) + if len(prev) != 0 { + runeCount += utf8.RuneCountInString(prev) + // Grow the buffer size to the maximum possible length (string length + // containing ansi codes) to avoid repetitive allocation + if output.Cap() == 0 { + output.Grow(len(str)) + } + output.WriteString(prev) + } - newState := interpretCode(str[offset[0]:offset[1]], state) + newState := interpretCode(str[start:idx], state) if !newState.equals(state) { if state != nil { // Update last offset @@ -157,8 +236,15 @@ func extractColor(str string, state *ansiState, proc func(string, *ansiState) bo if newState.colored() { // Append new offset - state = newState - offsets = append(offsets, ansiOffset{[2]int32{int32(runeCount), int32(runeCount)}, *state}) + if pstate == nil { + pstate = &ansiState{} + } + *pstate = newState + state = pstate + offsets = append(offsets, ansiOffset{ + [2]int32{int32(runeCount), int32(runeCount)}, + newState, + }) } else { // Discard state state = nil @@ -168,7 +254,6 @@ func extractColor(str string, state *ansiState, proc func(string, *ansiState) bo var rest string var trimmed string - if prevIdx == 0 { // No ANSI code found rest = str @@ -178,51 +263,75 @@ func extractColor(str string, state *ansiState, proc func(string, *ansiState) bo output.WriteString(rest) trimmed = output.String() } - if len(rest) > 0 && state != nil { - // Update last offset - runeCount += utf8.RuneCountInString(rest) - (&offsets[len(offsets)-1]).offset[1] = int32(runeCount) - } if proc != nil { proc(rest, state) } - if len(offsets) == 0 { - return trimmed, nil, state + if len(offsets) > 0 { + if len(rest) > 0 && state != nil { + // Update last offset + runeCount += utf8.RuneCountInString(rest) + (&offsets[len(offsets)-1]).offset[1] = int32(runeCount) + } + // Return a copy of the offsets slice + a := make([]ansiOffset, len(offsets)) + copy(a, offsets) + return trimmed, &a, state } - return trimmed, &offsets, state + return trimmed, nil, state +} + +func parseAnsiCode(s string) (int, string) { + var remaining string + if i := strings.IndexByte(s, ';'); i >= 0 { + remaining = s[i+1:] + s = s[:i] + } + + if len(s) > 0 { + // Inlined version of strconv.Atoi() that only handles positive + // integers and does not allocate on error. + code := 0 + for _, ch := range []byte(s) { + ch -= '0' + if ch > 9 { + return -1, remaining + } + code = code*10 + int(ch) + } + return code, remaining + } + + return -1, remaining } -func interpretCode(ansiCode string, prevState *ansiState) *ansiState { - // State - var state *ansiState +func interpretCode(ansiCode string, prevState *ansiState) ansiState { + var state ansiState if prevState == nil { - state = &ansiState{-1, -1, 0, -1} + state = ansiState{-1, -1, 0, -1} } else { - state = &ansiState{prevState.fg, prevState.bg, prevState.attr, prevState.lbg} + state = ansiState{prevState.fg, prevState.bg, prevState.attr, prevState.lbg} } if ansiCode[0] != '\x1b' || ansiCode[1] != '[' || ansiCode[len(ansiCode)-1] != 'm' { - if strings.HasSuffix(ansiCode, "0K") && prevState != nil { + if prevState != nil && strings.HasSuffix(ansiCode, "0K") { state.lbg = prevState.bg } return state } - ptr := &state.fg - state256 := 0 - - init := func() { + if len(ansiCode) <= 3 { state.fg = -1 state.bg = -1 state.attr = 0 - state256 = 0 + return state } - ansiCode = ansiCode[2 : len(ansiCode)-1] - if len(ansiCode) == 0 { - init() - } - for _, code := range strings.Split(ansiCode, ";") { - if num, err := strconv.Atoi(code); err == nil { + + state256 := 0 + ptr := &state.fg + + for len(ansiCode) != 0 { + var num int + if num, ansiCode = parseAnsiCode(ansiCode); num != -1 { switch state256 { case 0: switch num { @@ -253,7 +362,10 @@ func interpretCode(ansiCode string, prevState *ansiState) *ansiState { case 24: // tput rmul state.attr = state.attr &^ tui.Underline case 0: - init() + state.fg = -1 + state.bg = -1 + state.attr = 0 + state256 = 0 default: if num >= 30 && num <= 37 { state.fg = tui.Color(num - 30) @@ -289,6 +401,7 @@ func interpretCode(ansiCode string, prevState *ansiState) *ansiState { } } } + if state256 > 0 { *ptr = -1 } |