summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJunegunn Choi <junegunn.c@gmail.com>2017-07-20 02:44:30 +0900
committerJunegunn Choi <junegunn.c@gmail.com>2017-07-20 02:44:30 +0900
commitc9f16b6430f3b9c9d12ee078e2218e8467c13340 (patch)
treeb0d7e33da2d605696d98da98e1691bf8d89437de
parentbc9d2abdb67639e06f7002b278341fb498b79456 (diff)
Avoid unconditionally storsing input as runes
When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters.
-rw-r--r--src/core.go14
-rw-r--r--src/options_test.go5
-rw-r--r--src/pattern.go2
-rw-r--r--src/pattern_test.go2
-rw-r--r--src/terminal.go2
-rw-r--r--src/tokenizer.go63
-rw-r--r--src/tokenizer_test.go14
-rw-r--r--src/util/chars.go43
-rw-r--r--src/util/chars_test.go26
9 files changed, 44 insertions, 127 deletions
diff --git a/src/core.go b/src/core.go
index aa425109..3e60934a 100644
--- a/src/core.go
+++ b/src/core.go
@@ -63,9 +63,6 @@ func Run(opts *Options, revision string) {
ansiProcessor := func(data []byte) (util.Chars, *[]ansiOffset) {
return util.ToChars(data), nil
}
- ansiProcessorRunes := func(data []rune) (util.Chars, *[]ansiOffset) {
- return util.RunesToChars(data), nil
- }
if opts.Ansi {
if opts.Theme != nil {
var state *ansiState
@@ -82,9 +79,6 @@ func Run(opts *Options, revision string) {
return util.RunesToChars([]rune(trimmed)), nil
}
}
- ansiProcessorRunes = func(data []rune) (util.Chars, *[]ansiOffset) {
- return ansiProcessor([]byte(string(data)))
- }
}
// Chunk list
@@ -103,15 +97,15 @@ func Run(opts *Options, revision string) {
})
} else {
chunkList = NewChunkList(func(data []byte, index int) Item {
- tokens := Tokenize(util.ToChars(data), opts.Delimiter)
+ tokens := Tokenize(string(data), opts.Delimiter)
trans := Transform(tokens, opts.WithNth)
+ transformed := joinTokens(trans)
if len(header) < opts.HeaderLines {
- header = append(header, string(joinTokens(trans)))
+ header = append(header, transformed)
eventBox.Set(EvtHeader, header)
return nilItem
}
- textRunes := joinTokens(trans)
- trimmed, colors := ansiProcessorRunes(textRunes)
+ trimmed, colors := ansiProcessor([]byte(transformed))
trimmed.Index = int32(index)
return Item{text: trimmed, colors: colors, origText: &data}
})
diff --git a/src/options_test.go b/src/options_test.go
index 907faf03..d3c93450 100644
--- a/src/options_test.go
+++ b/src/options_test.go
@@ -6,7 +6,6 @@ import (
"testing"
"github.com/junegunn/fzf/src/tui"
- "github.com/junegunn/fzf/src/util"
)
func TestDelimiterRegex(t *testing.T) {
@@ -44,7 +43,7 @@ func TestDelimiterRegex(t *testing.T) {
func TestDelimiterRegexString(t *testing.T) {
delim := delimiterRegexp("*")
- tokens := Tokenize(util.RunesToChars([]rune("-*--*---**---")), delim)
+ tokens := Tokenize("-*--*---**---", delim)
if delim.regex != nil ||
tokens[0].text.ToString() != "-*" ||
tokens[1].text.ToString() != "--*" ||
@@ -57,7 +56,7 @@ func TestDelimiterRegexString(t *testing.T) {
func TestDelimiterRegexRegex(t *testing.T) {
delim := delimiterRegexp("--\\*")
- tokens := Tokenize(util.RunesToChars([]rune("-*--*---**---")), delim)
+ tokens := Tokenize("-*--*---**---", delim)
if delim.str != nil ||
tokens[0].text.ToString() != "-*--*" ||
tokens[1].text.ToString() != "---*" ||
diff --git a/src/pattern.go b/src/pattern.go
index 97ee8fd6..64296d71 100644
--- a/src/pattern.go
+++ b/src/pattern.go
@@ -362,7 +362,7 @@ func (p *Pattern) prepareInput(item *Item) []Token {
return *item.transformed
}
- tokens := Tokenize(item.text, p.delimiter)
+ tokens := Tokenize(item.text.ToString(), p.delimiter)
ret := Transform(tokens, p.nth)
item.transformed = &ret
return ret
diff --git a/src/pattern_test.go b/src/pattern_test.go
index 31a127e3..85c174c9 100644
--- a/src/pattern_test.go
+++ b/src/pattern_test.go
@@ -133,7 +133,7 @@ func TestCaseSensitivity(t *testing.T) {
func TestOrigTextAndTransformed(t *testing.T) {
pattern := BuildPattern(true, algo.FuzzyMatchV2, true, CaseSmart, false, true, true, []Range{}, Delimiter{}, []rune("jg"))
- tokens := Tokenize(util.RunesToChars([]rune("junegunn")), Delimiter{})
+ tokens := Tokenize("junegunn", Delimiter{})
trans := Transform(tokens, []Range{Range{1, 1}})
origBytes := []byte("junegunn.choi")
diff --git a/src/terminal.go b/src/terminal.go
index 81fb8808..8d0b6bf8 100644
--- a/src/terminal.go
+++ b/src/terminal.go
@@ -1174,7 +1174,7 @@ func replacePlaceholder(template string, stripAnsi bool, delimiter Delimiter, fo
for idx, item := range items {
chars := util.RunesToChars([]rune(item.AsString(stripAnsi)))
- tokens := Tokenize(chars, delimiter)
+ tokens := Tokenize(chars.ToString(), delimiter)
trans := Transform(tokens, ranges)
str := string(joinTokens(trans))
if delimiter.str != nil {
diff --git a/src/tokenizer.go b/src/tokenizer.go
index 0e216ac7..5b7a8b6f 100644
--- a/src/tokenizer.go
+++ b/src/tokenizer.go
@@ -1,6 +1,7 @@
package fzf
import (
+ "bytes"
"regexp"
"strconv"
"strings"
@@ -74,14 +75,14 @@ func ParseRange(str *string) (Range, bool) {
return newRange(n, n), true
}
-func withPrefixLengths(tokens []util.Chars, begin int) []Token {
+func withPrefixLengths(tokens []string, begin int) []Token {
ret := make([]Token, len(tokens))
prefixLength := begin
- for idx, token := range tokens {
- // NOTE: &tokens[idx] instead of &tokens
- ret[idx] = Token{&tokens[idx], int32(prefixLength)}
- prefixLength += token.Length()
+ for idx := range tokens {
+ chars := util.ToChars([]byte(tokens[idx]))
+ ret[idx] = Token{&chars, int32(prefixLength)}
+ prefixLength += chars.Length()
}
return ret
}
@@ -92,16 +93,15 @@ const (
awkWhite
)
-func awkTokenizer(input util.Chars) ([]util.Chars, int) {
+func awkTokenizer(input string) ([]string, int) {
// 9, 32
- ret := []util.Chars{}
+ ret := []string{}
prefixLength := 0
state := awkNil
- numChars := input.Length()
begin := 0
end := 0
- for idx := 0; idx < numChars; idx++ {
- r := input.Get(idx)
+ for idx := 0; idx < len(input); idx++ {
+ r := input[idx]
white := r == 9 || r == 32
switch state {
case awkNil:
@@ -119,19 +119,19 @@ func awkTokenizer(input util.Chars) ([]util.Chars, int) {
if white {
end = idx + 1
} else {
- ret = append(ret, input.Slice(begin, end))
+ ret = append(ret, input[begin:end])
state, begin, end = awkBlack, idx, idx+1
}
}
}
if begin < end {
- ret = append(ret, input.Slice(begin, end))
+ ret = append(ret, input[begin:end])
}
return ret, prefixLength
}
// Tokenize tokenizes the given string with the delimiter
-func Tokenize(text util.Chars, delimiter Delimiter) []Token {
+func Tokenize(text string, delimiter Delimiter) []Token {
if delimiter.str == nil && delimiter.regex == nil {
// AWK-style (\S+\s*)
tokens, prefixLength := awkTokenizer(text)
@@ -139,36 +139,31 @@ func Tokenize(text util.Chars, delimiter Delimiter) []Token {
}
if delimiter.str != nil {
- return withPrefixLengths(text.Split(*delimiter.str), 0)
+ return withPrefixLengths(strings.SplitAfter(text, *delimiter.str), 0)
}
// FIXME performance
var tokens []string
if delimiter.regex != nil {
- str := text.ToString()
- for len(str) > 0 {
- loc := delimiter.regex.FindStringIndex(str)
+ for len(text) > 0 {
+ loc := delimiter.regex.FindStringIndex(text)
if loc == nil {
- loc = []int{0, len(str)}
+ loc = []int{0, len(text)}
}
last := util.Max(loc[1], 1)
- tokens = append(tokens, str[:last])
- str = str[last:]
+ tokens = append(tokens, text[:last])
+ text = text[last:]
}
}
- asRunes := make([]util.Chars, len(tokens))
- for i, token := range tokens {
- asRunes[i] = util.RunesToChars([]rune(token))
- }
- return withPrefixLengths(asRunes, 0)
+ return withPrefixLengths(tokens, 0)
}
-func joinTokens(tokens []Token) []rune {
- ret := []rune{}
+func joinTokens(tokens []Token) string {
+ var output bytes.Buffer
for _, token := range tokens {
- ret = append(ret, token.text.ToRunes()...)
+ output.WriteString(token.text.ToString())
}
- return ret
+ return output.String()
}
// Transform is used to transform the input when --with-nth option is given
@@ -181,7 +176,7 @@ func Transform(tokens []Token, withNth []Range) []Token {
if r.begin == r.end {
idx := r.begin
if idx == rangeEllipsis {
- chars := util.RunesToChars(joinTokens(tokens))
+ chars := util.ToChars([]byte(joinTokens(tokens)))
parts = append(parts, &chars)
} else {
if idx < 0 {
@@ -224,15 +219,15 @@ func Transform(tokens []Token, withNth []Range) []Token {
var merged util.Chars
switch len(parts) {
case 0:
- merged = util.RunesToChars([]rune{})
+ merged = util.ToChars([]byte{})
case 1:
merged = *parts[0]
default:
- runes := []rune{}
+ var output bytes.Buffer
for _, part := range parts {
- runes = append(runes, part.ToRunes()...)
+ output.WriteString(part.ToString())
}
- merged = util.RunesToChars(runes)
+ merged = util.ToChars([]byte(output.String()))
}
var prefixLength int32
diff --git a/src/tokenizer_test.go b/src/tokenizer_test.go
index 59250906..110fd062 100644
--- a/src/tokenizer_test.go
+++ b/src/tokenizer_test.go
@@ -2,8 +2,6 @@ package fzf
import (
"testing"
-
- "github.com/junegunn/fzf/src/util"
)
func TestParseRange(t *testing.T) {
@@ -47,19 +45,19 @@ func TestParseRange(t *testing.T) {
func TestTokenize(t *testing.T) {
// AWK-style
input := " abc: def: ghi "
- tokens := Tokenize(util.RunesToChars([]rune(input)), Delimiter{})
+ tokens := Tokenize(input, Delimiter{})
if tokens[0].text.ToString() != "abc: " || tokens[0].prefixLength != 2 {
t.Errorf("%s", tokens)
}
// With delimiter
- tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp(":"))
+ tokens = Tokenize(input, delimiterRegexp(":"))
if tokens[0].text.ToString() != " abc:" || tokens[0].prefixLength != 0 {
- t.Errorf("%s", tokens)
+ t.Error(tokens[0].text.ToString(), tokens[0].prefixLength)
}
// With delimiter regex
- tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp("\\s+"))
+ tokens = Tokenize(input, delimiterRegexp("\\s+"))
if tokens[0].text.ToString() != " " || tokens[0].prefixLength != 0 ||
tokens[1].text.ToString() != "abc: " || tokens[1].prefixLength != 2 ||
tokens[2].text.ToString() != "def: " || tokens[2].prefixLength != 8 ||
@@ -71,7 +69,7 @@ func TestTokenize(t *testing.T) {
func TestTransform(t *testing.T) {
input := " abc: def: ghi: jkl"
{
- tokens := Tokenize(util.RunesToChars([]rune(input)), Delimiter{})
+ tokens := Tokenize(input, Delimiter{})
{
ranges := splitNth("1,2,3")
tx := Transform(tokens, ranges)
@@ -93,7 +91,7 @@ func TestTransform(t *testing.T) {
}
}
{
- tokens := Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp(":"))
+ tokens := Tokenize(input, delimiterRegexp(":"))
{
ranges := splitNth("1..2,3,2..,1")
tx := Transform(tokens, ranges)
diff --git a/src/util/chars.go b/src/util/chars.go
index 5e702004..61e93411 100644
--- a/src/util/chars.go
+++ b/src/util/chars.go
@@ -157,46 +157,3 @@ func (chars *Chars) CopyRunes(dest []rune) {
}
return
}
-
-func (chars *Chars) Slice(b int, e int) Chars {
- if runes := chars.optionalRunes(); runes != nil {
- return RunesToChars(runes[b:e])
- }
- return Chars{slice: chars.slice[b:e], inBytes: true}
-}
-
-func (chars *Chars) Split(delimiter string) []Chars {
- delim := []rune(delimiter)
- numChars := chars.Length()
- numDelim := len(delim)
- begin := 0
- ret := make([]Chars, 0, 1)
-
- for index := 0; index < numChars; {
- if index+numDelim <= numChars {
- match := true
- for off, d := range delim {
- if chars.Get(index+off) != d {
- match = false
- break
- }
- }
- // Found the delimiter
- if match {
- incr := Max(numDelim, 1)
- ret = append(ret, chars.Slice(begin, index+incr))
- index += incr
- begin = index
- continue
- }
- } else {
- // Impossible to find the delimiter in the remaining substring
- break
- }
- index++
- }
- if begin < numChars || len(ret) == 0 {
- ret = append(ret, chars.Slice(begin, numChars))
- }
- return ret
-}
diff --git a/src/util/chars_test.go b/src/util/chars_test.go
index 07b8dea5..b7983f30 100644
--- a/src/util/chars_test.go
+++ b/src/util/chars_test.go
@@ -44,29 +44,3 @@ func TestTrimLength(t *testing.T) {
check(" h o ", 5)
check(" ", 0)
}
-
-func TestSplit(t *testing.T) {
- check := func(str string, delim string, tokens ...string) {
- input := ToChars([]byte(str))
- result := input.Split(delim)
- if len(result) != len(tokens) {
- t.Errorf(
- "Invalid Split result for '%s': %d tokens found (expected %d): %s",
- str, len(result), len(tokens), result)
- }
- for idx, token := range tokens {
- if result[idx].ToString() != token {
- t.Errorf("Invalid Split result for '%s': %s (expected %s)",
- str, result[idx].ToString(), token)
- }
- }
- }
- check("abc:def::", ":", "abc:", "def:", ":")
- check("abc:def::", "-", "abc:def::")
- check("abc", "", "a", "b", "c")
- check("abc", "a", "a", "bc")
- check("abc", "ab", "ab", "c")
- check("abc", "abc", "abc")
- check("abc", "abcd", "abc")
- check("", "abcd", "")
-}