summaryrefslogtreecommitdiffstats
path: root/src/ansi.go
diff options
context:
space:
mode:
authorCharlie Vieth <charlie.vieth@gmail.com>2021-03-11 05:34:50 -0500
committerGitHub <noreply@github.com>2021-03-11 19:34:50 +0900
commit5a874ae241af64368e65b667592918b3fdb17177 (patch)
treeba500615d50512a74ad9c59cd29b0dd0edde4e31 /src/ansi.go
parentf4e1ed25f224d0ebb4e6bd27c4a012a42c1ccfcd (diff)
Speed up ANSI code processing (#2368)
This commit speeds up the parsing/processing of ANSI escape codes by roughly 7.5x. The speedup is mostly accomplished by replacing the regex with dedicated parsing logic (nextAnsiEscapeSequence()) and reducing the number of allocations in extractColor(). #### Benchmarks ``` name old time/op new time/op delta ExtractColor-16 4.89µs ± 5% 0.64µs ± 2% -86.87% (p=0.000 n=9+9) name old speed new speed delta ExtractColor-16 25.6MB/s ± 5% 194.6MB/s ± 2% +661.43% (p=0.000 n=9+9) name old alloc/op new alloc/op delta ExtractColor-16 1.37kB ± 0% 0.31kB ± 0% -77.31% (p=0.000 n=10+10) name old allocs/op new allocs/op delta ExtractColor-16 48.0 ± 0% 4.0 ± 0% -91.67% (p=0.000 n=10+10) ```
Diffstat (limited to 'src/ansi.go')
-rw-r--r--src/ansi.go263
1 files changed, 188 insertions, 75 deletions
diff --git a/src/ansi.go b/src/ansi.go
index b4f98f6d..a67ac0b9 100644
--- a/src/ansi.go
+++ b/src/ansi.go
@@ -1,8 +1,6 @@
package fzf
import (
- "bytes"
- "regexp"
"strconv"
"strings"
"unicode/utf8"
@@ -82,73 +80,154 @@ func toAnsiString(color tui.Color, offset int) string {
return ret + ";"
}
-var ansiRegex *regexp.Regexp
-
-func init() {
- /*
- References:
- - https://github.com/gnachman/iTerm2
- - http://ascii-table.com/ansi-escape-sequences.php
- - http://ascii-table.com/ansi-escape-sequences-vt-100.php
- - http://tldp.org/HOWTO/Bash-Prompt-HOWTO/x405.html
- - https://invisible-island.net/xterm/ctlseqs/ctlseqs.html
- */
- // The following regular expression will include not all but most of the
- // frequently used ANSI sequences
- ansiRegex = regexp.MustCompile("(?:\x1b[\\[()][0-9;]*[a-zA-Z@]|\x1b][0-9];[[:print:]]+(?:\x1b\\\\|\x07)|\x1b.|[\x0e\x0f]|.\x08)")
+func isPrint(c uint8) bool {
+ return '\x20' <= c && c <= '\x7e'
}
-func findAnsiStart(str string) int {
- idx := 0
- for ; idx < len(str); idx++ {
- b := str[idx]
- if b == 0x1b || b == 0x0e || b == 0x0f {
- return idx
+func matchOperatingSystemCommand(s string) int {
+ // `\x1b][0-9];[[:print:]]+(?:\x1b\\\\|\x07)`
+ // ^ match starting here
+ //
+ i := 5 // prefix matched in nextAnsiEscapeSequence()
+ for ; i < len(s) && isPrint(s[i]); i++ {
+ }
+ if i < len(s) {
+ if s[i] == '\x07' {
+ return i + 1
+ }
+ if s[i] == '\x1b' && i < len(s)-1 && s[i+1] == '\\' {
+ return i + 2
}
- if b == 0x08 && idx > 0 {
- return idx - 1
+ }
+ return -1
+}
+
+func matchControlSequence(s string) int {
+ // `\x1b[\\[()][0-9;]*[a-zA-Z@]`
+ // ^ match starting here
+ //
+ i := 2 // prefix matched in nextAnsiEscapeSequence()
+ for ; i < len(s) && (isNumeric(s[i]) || s[i] == ';'); i++ {
+ }
+ if i < len(s) {
+ c := s[i]
+ if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '@' {
+ return i + 1
}
}
- return idx
+ return -1
+}
+
+func isCtrlSeqStart(c uint8) bool {
+ return c == '\\' || c == '[' || c == '(' || c == ')'
+}
+
+// nextAnsiEscapeSequence returns the ANSI escape sequence and is equivalent to
+// calling FindStringIndex() on the below regex (which was originally used):
+//
+// "(?:\x1b[\\[()][0-9;]*[a-zA-Z@]|\x1b][0-9];[[:print:]]+(?:\x1b\\\\|\x07)|\x1b.|[\x0e\x0f]|.\x08)"
+//
+func nextAnsiEscapeSequence(s string) (int, int) {
+ // fast check for ANSI escape sequences
+ i := 0
+ for ; i < len(s); i++ {
+ switch s[i] {
+ case '\x0e', '\x0f', '\x1b', '\x08':
+ // We ignore the fact that '\x08' cannot be the first char
+ // in the string and be an escape sequence for the sake of
+ // speed and simplicity.
+ goto Loop
+ }
+ }
+ return -1, -1
+
+Loop:
+ for ; i < len(s); i++ {
+ switch s[i] {
+ case '\x08':
+ // backtrack to match: `.\x08`
+ if i > 0 && s[i-1] != '\n' {
+ if s[i-1] < utf8.RuneSelf {
+ return i - 1, i + 1
+ }
+ _, n := utf8.DecodeLastRuneInString(s[:i])
+ return i - n, i + 1
+ }
+ case '\x1b':
+ // match: `\x1b[\\[()][0-9;]*[a-zA-Z@]`
+ if i+2 < len(s) && isCtrlSeqStart(s[i+1]) {
+ if j := matchControlSequence(s[i:]); j != -1 {
+ return i, i + j
+ }
+ }
+
+ // match: `\x1b][0-9];[[:print:]]+(?:\x1b\\\\|\x07)`
+ if i+5 < len(s) && s[i+1] == ']' && isNumeric(s[i+2]) &&
+ s[i+3] == ';' && isPrint(s[i+4]) {
+
+ if j := matchOperatingSystemCommand(s[i:]); j != -1 {
+ return i, i + j
+ }
+ }
+
+ // match: `\x1b.`
+ if i+1 < len(s) && s[i+1] != '\n' {
+ if s[i+1] < utf8.RuneSelf {
+ return i, i + 2
+ }
+ _, n := utf8.DecodeRuneInString(s[i+1:])
+ return i, i + n + 1
+ }
+ case '\x0e', '\x0f':
+ // match: `[\x0e\x0f]`
+ return i, i + 1
+ }
+ }
+ return -1, -1
}
func extractColor(str string, state *ansiState, proc func(string, *ansiState) bool) (string, *[]ansiOffset, *ansiState) {
- var offsets []ansiOffset
- var output bytes.Buffer
+ // We append to a stack allocated variable that we'll
+ // later copy and return, to save on allocations.
+ offsets := make([]ansiOffset, 0, 32)
if state != nil {
offsets = append(offsets, ansiOffset{[2]int32{0, 0}, *state})
}
- prevIdx := 0
- runeCount := 0
+ var (
+ pstate *ansiState // lazily allocated
+ output strings.Builder
+ prevIdx int
+ runeCount int
+ )
for idx := 0; idx < len(str); {
- idx += findAnsiStart(str[idx:])
- if idx == len(str) {
- break
- }
-
// Make sure that we found an ANSI code
- offset := ansiRegex.FindStringIndex(str[idx:])
- if len(offset) < 2 {
- idx++
- continue
+ start, end := nextAnsiEscapeSequence(str[idx:])
+ if start == -1 {
+ break
}
- offset[0] += idx
- offset[1] += idx
- idx = offset[1]
+ start += idx
+ idx += end
// Check if we should continue
- prev := str[prevIdx:offset[0]]
+ prev := str[prevIdx:start]
if proc != nil && !proc(prev, state) {
return "", nil, nil
}
+ prevIdx = idx
- prevIdx = offset[1]
- runeCount += utf8.RuneCountInString(prev)
- output.WriteString(prev)
+ if len(prev) != 0 {
+ runeCount += utf8.RuneCountInString(prev)
+ // Grow the buffer size to the maximum possible length (string length
+ // containing ansi codes) to avoid repetitive allocation
+ if output.Cap() == 0 {
+ output.Grow(len(str))
+ }
+ output.WriteString(prev)
+ }
- newState := interpretCode(str[offset[0]:offset[1]], state)
+ newState := interpretCode(str[start:idx], state)
if !newState.equals(state) {
if state != nil {
// Update last offset
@@ -157,8 +236,15 @@ func extractColor(str string, state *ansiState, proc func(string, *ansiState) bo
if newState.colored() {
// Append new offset
- state = newState
- offsets = append(offsets, ansiOffset{[2]int32{int32(runeCount), int32(runeCount)}, *state})
+ if pstate == nil {
+ pstate = &ansiState{}
+ }
+ *pstate = newState
+ state = pstate
+ offsets = append(offsets, ansiOffset{
+ [2]int32{int32(runeCount), int32(runeCount)},
+ newState,
+ })
} else {
// Discard state
state = nil
@@ -168,7 +254,6 @@ func extractColor(str string, state *ansiState, proc func(string, *ansiState) bo
var rest string
var trimmed string
-
if prevIdx == 0 {
// No ANSI code found
rest = str
@@ -178,51 +263,75 @@ func extractColor(str string, state *ansiState, proc func(string, *ansiState) bo
output.WriteString(rest)
trimmed = output.String()
}
- if len(rest) > 0 && state != nil {
- // Update last offset
- runeCount += utf8.RuneCountInString(rest)
- (&offsets[len(offsets)-1]).offset[1] = int32(runeCount)
- }
if proc != nil {
proc(rest, state)
}
- if len(offsets) == 0 {
- return trimmed, nil, state
+ if len(offsets) > 0 {
+ if len(rest) > 0 && state != nil {
+ // Update last offset
+ runeCount += utf8.RuneCountInString(rest)
+ (&offsets[len(offsets)-1]).offset[1] = int32(runeCount)
+ }
+ // Return a copy of the offsets slice
+ a := make([]ansiOffset, len(offsets))
+ copy(a, offsets)
+ return trimmed, &a, state
}
- return trimmed, &offsets, state
+ return trimmed, nil, state
+}
+
+func parseAnsiCode(s string) (int, string) {
+ var remaining string
+ if i := strings.IndexByte(s, ';'); i >= 0 {
+ remaining = s[i+1:]
+ s = s[:i]
+ }
+
+ if len(s) > 0 {
+ // Inlined version of strconv.Atoi() that only handles positive
+ // integers and does not allocate on error.
+ code := 0
+ for _, ch := range []byte(s) {
+ ch -= '0'
+ if ch > 9 {
+ return -1, remaining
+ }
+ code = code*10 + int(ch)
+ }
+ return code, remaining
+ }
+
+ return -1, remaining
}
-func interpretCode(ansiCode string, prevState *ansiState) *ansiState {
- // State
- var state *ansiState
+func interpretCode(ansiCode string, prevState *ansiState) ansiState {
+ var state ansiState
if prevState == nil {
- state = &ansiState{-1, -1, 0, -1}
+ state = ansiState{-1, -1, 0, -1}
} else {
- state = &ansiState{prevState.fg, prevState.bg, prevState.attr, prevState.lbg}
+ state = ansiState{prevState.fg, prevState.bg, prevState.attr, prevState.lbg}
}
if ansiCode[0] != '\x1b' || ansiCode[1] != '[' || ansiCode[len(ansiCode)-1] != 'm' {
- if strings.HasSuffix(ansiCode, "0K") && prevState != nil {
+ if prevState != nil && strings.HasSuffix(ansiCode, "0K") {
state.lbg = prevState.bg
}
return state
}
- ptr := &state.fg
- state256 := 0
-
- init := func() {
+ if len(ansiCode) <= 3 {
state.fg = -1
state.bg = -1
state.attr = 0
- state256 = 0
+ return state
}
-
ansiCode = ansiCode[2 : len(ansiCode)-1]
- if len(ansiCode) == 0 {
- init()
- }
- for _, code := range strings.Split(ansiCode, ";") {
- if num, err := strconv.Atoi(code); err == nil {
+
+ state256 := 0
+ ptr := &state.fg
+
+ for len(ansiCode) != 0 {
+ var num int
+ if num, ansiCode = parseAnsiCode(ansiCode); num != -1 {
switch state256 {
case 0:
switch num {
@@ -253,7 +362,10 @@ func interpretCode(ansiCode string, prevState *ansiState) *ansiState {
case 24: // tput rmul
state.attr = state.attr &^ tui.Underline
case 0:
- init()
+ state.fg = -1
+ state.bg = -1
+ state.attr = 0
+ state256 = 0
default:
if num >= 30 && num <= 37 {
state.fg = tui.Color(num - 30)
@@ -289,6 +401,7 @@ func interpretCode(ansiCode string, prevState *ansiState) *ansiState {
}
}
}
+
if state256 > 0 {
*ptr = -1
}