From 1d4057c20907b7d263d6f2b8cb4350a024859dfe Mon Sep 17 00:00:00 2001 From: Junegunn Choi Date: Sun, 14 Aug 2016 00:39:44 +0900 Subject: [perf] Avoid allocating rune array for ascii string In the best case (all ascii), this reduces the memory footprint by 60% and the response time by 15% to 20%. In the worst case (every line has non-ascii characters), 3 to 4% overhead is observed. --- src/algo/algo.go | 66 ++++++++++++++++------------- src/algo/algo_test.go | 6 ++- src/chunklist_test.go | 8 ++-- src/core.go | 35 +++++++-------- src/item.go | 15 ++++--- src/item_test.go | 13 +++--- src/merger_test.go | 4 +- src/options_test.go | 21 ++++----- src/pattern.go | 8 ++-- src/pattern_test.go | 15 ++++--- src/terminal.go | 6 +-- src/tokenizer.go | 42 +++++++++--------- src/tokenizer_test.go | 44 ++++++++++--------- src/util/chars.go | 113 +++++++++++++++++++++++++++++++++++++++++++++++++ src/util/chars_test.go | 36 ++++++++++++++++ src/util/util.go | 29 ------------- 16 files changed, 303 insertions(+), 158 deletions(-) create mode 100644 src/util/chars.go create mode 100644 src/util/chars_test.go diff --git a/src/algo/algo.go b/src/algo/algo.go index d8e2fec2..9bf476fd 100644 --- a/src/algo/algo.go +++ b/src/algo/algo.go @@ -15,11 +15,18 @@ import ( * In short: They try to do as little work as possible. */ -func runeAt(runes []rune, index int, max int, forward bool) rune { +func indexAt(index int, max int, forward bool) int { if forward { - return runes[index] + return index } - return runes[max-index-1] + return max - index - 1 +} + +func runeAt(text util.Chars, index int, max int, forward bool) rune { + if forward { + return text.Get(index) + } + return text.Get(max - index - 1) } // Result conatins the results of running a match function. @@ -42,14 +49,14 @@ const ( charNumber ) -func evaluateBonus(caseSensitive bool, runes []rune, pattern []rune, sidx int, eidx int) int32 { +func evaluateBonus(caseSensitive bool, text util.Chars, pattern []rune, sidx int, eidx int) int32 { var bonus int32 pidx := 0 lenPattern := len(pattern) consecutive := false prevClass := charNonWord for index := 0; index < eidx; index++ { - char := runes[index] + char := text.Get(index) var class charClass if unicode.IsLower(char) { class = charLower @@ -107,7 +114,7 @@ func evaluateBonus(caseSensitive bool, runes []rune, pattern []rune, sidx int, e } // FuzzyMatch performs fuzzy-match -func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result { +func FuzzyMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result { if len(pattern) == 0 { return Result{0, 0, 0} } @@ -125,11 +132,11 @@ func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) sidx := -1 eidx := -1 - lenRunes := len(runes) + lenRunes := text.Length() lenPattern := len(pattern) - for index := range runes { - char := runeAt(runes, index, lenRunes, forward) + for index := 0; index < lenRunes; index++ { + char := runeAt(text, index, lenRunes, forward) // This is considerably faster than blindly applying strings.ToLower to the // whole string if !caseSensitive { @@ -142,7 +149,7 @@ func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) char = unicode.To(unicode.LowerCase, char) } } - pchar := runeAt(pattern, pidx, lenPattern, forward) + pchar := pattern[indexAt(pidx, lenPattern, forward)] if char == pchar { if sidx < 0 { sidx = index @@ -157,7 +164,7 @@ func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) if sidx >= 0 && eidx >= 0 { pidx-- for index := eidx - 1; index >= sidx; index-- { - char := runeAt(runes, index, lenRunes, forward) + char := runeAt(text, index, lenRunes, forward) if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 @@ -166,7 +173,7 @@ func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) } } - pchar := runeAt(pattern, pidx, lenPattern, forward) + pchar := pattern[indexAt(pidx, lenPattern, forward)] if char == pchar { if pidx--; pidx < 0 { sidx = index @@ -182,7 +189,7 @@ func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) } return Result{int32(sidx), int32(eidx), - evaluateBonus(caseSensitive, runes, pattern, sidx, eidx)} + evaluateBonus(caseSensitive, text, pattern, sidx, eidx)} } return Result{-1, -1, 0} } @@ -194,12 +201,12 @@ func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) // // We might try to implement better algorithms in the future: // http://en.wikipedia.org/wiki/String_searching_algorithm -func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result { +func ExactMatchNaive(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result { if len(pattern) == 0 { return Result{0, 0, 0} } - lenRunes := len(runes) + lenRunes := text.Length() lenPattern := len(pattern) if lenRunes < lenPattern { @@ -208,7 +215,7 @@ func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []r pidx := 0 for index := 0; index < lenRunes; index++ { - char := runeAt(runes, index, lenRunes, forward) + char := runeAt(text, index, lenRunes, forward) if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 @@ -216,7 +223,7 @@ func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []r char = unicode.To(unicode.LowerCase, char) } } - pchar := runeAt(pattern, pidx, lenPattern, forward) + pchar := pattern[indexAt(pidx, lenPattern, forward)] if pchar == char { pidx++ if pidx == lenPattern { @@ -229,7 +236,7 @@ func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []r eidx = lenRunes - (index - lenPattern + 1) } return Result{int32(sidx), int32(eidx), - evaluateBonus(caseSensitive, runes, pattern, sidx, eidx)} + evaluateBonus(caseSensitive, text, pattern, sidx, eidx)} } } else { index -= pidx @@ -240,13 +247,13 @@ func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []r } // PrefixMatch performs prefix-match -func PrefixMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result { - if len(runes) < len(pattern) { +func PrefixMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result { + if text.Length() < len(pattern) { return Result{-1, -1, 0} } for index, r := range pattern { - char := runes[index] + char := text.Get(index) if !caseSensitive { char = unicode.ToLower(char) } @@ -256,20 +263,19 @@ func PrefixMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) } lenPattern := len(pattern) return Result{0, int32(lenPattern), - evaluateBonus(caseSensitive, runes, pattern, 0, lenPattern)} + evaluateBonus(caseSensitive, text, pattern, 0, lenPattern)} } // SuffixMatch performs suffix-match -func SuffixMatch(caseSensitive bool, forward bool, input []rune, pattern []rune) Result { - runes := util.TrimRight(input) - trimmedLen := len(runes) +func SuffixMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result { + trimmedLen := text.Length() - text.TrailingWhitespaces() diff := trimmedLen - len(pattern) if diff < 0 { return Result{-1, -1, 0} } for index, r := range pattern { - char := runes[index+diff] + char := text.Get(index + diff) if !caseSensitive { char = unicode.ToLower(char) } @@ -281,16 +287,16 @@ func SuffixMatch(caseSensitive bool, forward bool, input []rune, pattern []rune) sidx := trimmedLen - lenPattern eidx := trimmedLen return Result{int32(sidx), int32(eidx), - evaluateBonus(caseSensitive, runes, pattern, sidx, eidx)} + evaluateBonus(caseSensitive, text, pattern, sidx, eidx)} } // EqualMatch performs equal-match -func EqualMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result { +func EqualMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result { // Note: EqualMatch always return a zero bonus. - if len(runes) != len(pattern) { + if text.Length() != len(pattern) { return Result{-1, -1, 0} } - runesStr := string(runes) + runesStr := text.ToString() if !caseSensitive { runesStr = strings.ToLower(runesStr) } diff --git a/src/algo/algo_test.go b/src/algo/algo_test.go index 3c954588..d6a5d487 100644 --- a/src/algo/algo_test.go +++ b/src/algo/algo_test.go @@ -3,13 +3,15 @@ package algo import ( "strings" "testing" + + "github.com/junegunn/fzf/src/util" ) -func assertMatch(t *testing.T, fun func(bool, bool, []rune, []rune) Result, caseSensitive, forward bool, input, pattern string, sidx int32, eidx int32, bonus int32) { +func assertMatch(t *testing.T, fun func(bool, bool, util.Chars, []rune) Result, caseSensitive, forward bool, input, pattern string, sidx int32, eidx int32, bonus int32) { if !caseSensitive { pattern = strings.ToLower(pattern) } - res := fun(caseSensitive, forward, []rune(input), []rune(pattern)) + res := fun(caseSensitive, forward, util.RunesToChars([]rune(input)), []rune(pattern)) if res.Start != sidx { t.Errorf("Invalid start index: %d (expected: %d, %s / %s)", res.Start, sidx, input, pattern) } diff --git a/src/chunklist_test.go b/src/chunklist_test.go index 5f7481df..2523675a 100644 --- a/src/chunklist_test.go +++ b/src/chunklist_test.go @@ -3,6 +3,8 @@ package fzf import ( "fmt" "testing" + + "github.com/junegunn/fzf/src/util" ) func TestChunkList(t *testing.T) { @@ -10,7 +12,7 @@ func TestChunkList(t *testing.T) { sortCriteria = []criterion{byMatchLen, byLength} cl := NewChunkList(func(s []byte, i int) *Item { - return &Item{text: []rune(string(s)), rank: buildEmptyRank(int32(i * 2))} + return &Item{text: util.ToChars(s), rank: buildEmptyRank(int32(i * 2))} }) // Snapshot @@ -42,8 +44,8 @@ func TestChunkList(t *testing.T) { last := func(arr [5]int32) int32 { return arr[len(arr)-1] } - if string((*chunk1)[0].text) != "hello" || last((*chunk1)[0].rank) != 0 || - string((*chunk1)[1].text) != "world" || last((*chunk1)[1].rank) != 2 { + if (*chunk1)[0].text.ToString() != "hello" || last((*chunk1)[0].rank) != 0 || + (*chunk1)[1].text.ToString() != "world" || last((*chunk1)[1].rank) != 2 { t.Error("Invalid data") } if chunk1.IsFull() { diff --git a/src/core.go b/src/core.go index fd4bc6cd..3df15ed3 100644 --- a/src/core.go +++ b/src/core.go @@ -63,29 +63,29 @@ func Run(opts *Options) { eventBox := util.NewEventBox() // ANSI code processor - ansiProcessor := func(data []byte) ([]rune, []ansiOffset) { - return util.BytesToRunes(data), nil + ansiProcessor := func(data []byte) (util.Chars, []ansiOffset) { + return util.ToChars(data), nil } - ansiProcessorRunes := func(data []rune) ([]rune, []ansiOffset) { - return data, nil + ansiProcessorRunes := func(data []rune) (util.Chars, []ansiOffset) { + return util.RunesToChars(data), nil } if opts.Ansi { if opts.Theme != nil { var state *ansiState - ansiProcessor = func(data []byte) ([]rune, []ansiOffset) { + ansiProcessor = func(data []byte) (util.Chars, []ansiOffset) { trimmed, offsets, newState := extractColor(string(data), state, nil) state = newState - return []rune(trimmed), offsets + return util.RunesToChars([]rune(trimmed)), offsets } } else { // When color is disabled but ansi option is given, // we simply strip out ANSI codes from the input - ansiProcessor = func(data []byte) ([]rune, []ansiOffset) { + ansiProcessor = func(data []byte) (util.Chars, []ansiOffset) { trimmed, _, _ := extractColor(string(data), nil, nil) - return []rune(trimmed), nil + return util.RunesToChars([]rune(trimmed)), nil } } - ansiProcessorRunes = func(data []rune) ([]rune, []ansiOffset) { + ansiProcessorRunes = func(data []rune) (util.Chars, []ansiOffset) { return ansiProcessor([]byte(string(data))) } } @@ -100,29 +100,30 @@ func Run(opts *Options) { eventBox.Set(EvtHeader, header) return nil } - runes, colors := ansiProcessor(data) + chars, colors := ansiProcessor(data) return &Item{ - text: runes, + text: chars, colors: colors, rank: buildEmptyRank(int32(index))} }) } else { chunkList = NewChunkList(func(data []byte, index int) *Item { - runes := util.BytesToRunes(data) - tokens := Tokenize(runes, opts.Delimiter) + chars := util.ToChars(data) + tokens := Tokenize(chars, opts.Delimiter) trans := Transform(tokens, opts.WithNth) if len(header) < opts.HeaderLines { header = append(header, string(joinTokens(trans))) eventBox.Set(EvtHeader, header) return nil } + textRunes := joinTokens(trans) item := Item{ - text: joinTokens(trans), - origText: &runes, + text: util.RunesToChars(textRunes), + origText: &data, colors: nil, rank: buildEmptyRank(int32(index))} - trimmed, colors := ansiProcessorRunes(item.text) + trimmed, colors := ansiProcessorRunes(textRunes) item.text = trimmed item.colors = colors return &item @@ -170,7 +171,7 @@ func Run(opts *Options) { func(runes []byte) bool { item := chunkList.trans(runes, 0) if item != nil && pattern.MatchItem(item) { - fmt.Println(string(item.text)) + fmt.Println(item.text.ToString()) found = true } return false diff --git a/src/item.go b/src/item.go index 06413503..36f8c0ae 100644 --- a/src/item.go +++ b/src/item.go @@ -4,6 +4,7 @@ import ( "math" "github.com/junegunn/fzf/src/curses" + "github.com/junegunn/fzf/src/util" ) // Offset holds three 32-bit integers denoting the offsets of a matched substring @@ -17,8 +18,8 @@ type colorOffset struct { // Item represents each input line type Item struct { - text []rune - origText *[]rune + text util.Chars + origText *[]byte transformed []Token offsets []Offset colors []ansiOffset @@ -91,12 +92,14 @@ func (item *Item) Rank(cache bool) [5]int32 { // If offsets is empty, lenSum will be 0, but we don't care val = int32(lenSum) } else { - val = int32(len(item.text)) + val = int32(item.text.Length()) } case byBegin: // We can't just look at item.offsets[0][0] because it can be an inverse term whitePrefixLen := 0 - for idx, r := range item.text { + numChars := item.text.Length() + for idx := 0; idx < numChars; idx++ { + r := item.text.Get(idx) whitePrefixLen = idx if idx == minBegin || r != ' ' && r != '\t' { break @@ -105,7 +108,7 @@ func (item *Item) Rank(cache bool) [5]int32 { val = int32(minBegin - whitePrefixLen) case byEnd: if prevEnd > 0 { - val = int32(1 + len(item.text) - prevEnd) + val = int32(1 + item.text.Length() - prevEnd) } else { // Empty offsets due to inverse terms. val = 1 @@ -134,7 +137,7 @@ func (item *Item) StringPtr(stripAnsi bool) *string { orig := string(*item.origText) return &orig } - str := string(item.text) + str := item.text.ToString() return &str } diff --git a/src/item_test.go b/src/item_test.go index d1c30d77..36a436d3 100644 --- a/src/item_test.go +++ b/src/item_test.go @@ -6,6 +6,7 @@ import ( "testing" "github.com/junegunn/fzf/src/curses" + "github.com/junegunn/fzf/src/util" ) func TestOffsetSort(t *testing.T) { @@ -44,13 +45,13 @@ func TestItemRank(t *testing.T) { sortCriteria = []criterion{byMatchLen, byLength} strs := [][]rune{[]rune("foo"), []rune("foobar"), []rune("bar"), []rune("baz")} - item1 := Item{text: strs[0], offsets: []Offset{}, rank: [5]int32{0, 0, 0, 0, 1}} + item1 := Item{text: util.RunesToChars(strs[0]), offsets: []Offset{}, rank: [5]int32{0, 0, 0, 0, 1}} rank1 := item1.Rank(true) if rank1[0] != math.MaxInt32 || rank1[1] != 3 || rank1[4] != 1 { t.Error(item1.Rank(true)) } // Only differ in index - item2 := Item{text: strs[0], offsets: []Offset{}} + item2 := Item{text: util.RunesToChars(strs[0]), offsets: []Offset{}} items := []*Item{&item1, &item2} sort.Sort(ByRelevance(items)) @@ -66,10 +67,10 @@ func TestItemRank(t *testing.T) { } // Sort by relevance - item3 := Item{text: strs[1], rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 3}, Offset{5, 7}}} - item4 := Item{text: strs[1], rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 2}, Offset{6, 7}}} - item5 := Item{text: strs[2], rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 3}, Offset{5, 7}}} - item6 := Item{text: strs[2], rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 2}, Offset{6, 7}}} + item3 := Item{text: util.RunesToChars(strs[1]), rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 3}, Offset{5, 7}}} + item4 := Item{text: util.RunesToChars(strs[1]), rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 2}, Offset{6, 7}}} + item5 := Item{text: util.RunesToChars(strs[2]), rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 3}, Offset{5, 7}}} + item6 := Item{text: util.RunesToChars(strs[2]), rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 2}, Offset{6, 7}}} items = []*Item{&item1, &item2, &item3, &item4, &item5, &item6} sort.Sort(ByRelevance(items)) if items[0] != &item6 || items[1] != &item4 || diff --git a/src/merger_test.go b/src/merger_test.go index 472e2048..f62f9759 100644 --- a/src/merger_test.go +++ b/src/merger_test.go @@ -5,6 +5,8 @@ import ( "math/rand" "sort" "testing" + + "github.com/junegunn/fzf/src/util" ) func assert(t *testing.T, cond bool, msg ...string) { @@ -22,7 +24,7 @@ func randItem() *Item { offsets[idx] = Offset{sidx, eidx} } return &Item{ - text: []rune(str), + text: util.RunesToChars([]rune(str)), rank: buildEmptyRank(rand.Int31()), offsets: offsets} } diff --git a/src/options_test.go b/src/options_test.go index eb1dfa9c..c1bc9147 100644 --- a/src/options_test.go +++ b/src/options_test.go @@ -5,6 +5,7 @@ import ( "testing" "github.com/junegunn/fzf/src/curses" + "github.com/junegunn/fzf/src/util" ) func TestDelimiterRegex(t *testing.T) { @@ -42,24 +43,24 @@ func TestDelimiterRegex(t *testing.T) { func TestDelimiterRegexString(t *testing.T) { delim := delimiterRegexp("*") - tokens := Tokenize([]rune("-*--*---**---"), delim) + tokens := Tokenize(util.RunesToChars([]rune("-*--*---**---")), delim) if delim.regex != nil || - string(tokens[0].text) != "-*" || - string(tokens[1].text) != "--*" || - string(tokens[2].text) != "---*" || - string(tokens[3].text) != "*" || - string(tokens[4].text) != "---" { + tokens[0].text.ToString() != "-*" || + tokens[1].text.ToString() != "--*" || + tokens[2].text.ToString() != "---*" || + tokens[3].text.ToString() != "*" || + tokens[4].text.ToString() != "---" { t.Errorf("%s %s %d", delim, tokens, len(tokens)) } } func TestDelimiterRegexRegex(t *testing.T) { delim := delimiterRegexp("--\\*") - tokens := Tokenize([]rune("-*--*---**---"), delim) + tokens := Tokenize(util.RunesToChars([]rune("-*--*---**---")), delim) if delim.str != nil || - string(tokens[0].text) != "-*--*" || - string(tokens[1].text) != "---*" || - string(tokens[2].text) != "*---" { + tokens[0].text.ToString() != "-*--*" || + tokens[1].text.ToString() != "---*" || + tokens[2].text.ToString() != "*---" { t.Errorf("%s %d", tokens, len(tokens)) } } diff --git a/src/pattern.go b/src/pattern.go index 42a341b4..2df28795 100644 --- a/src/pattern.go +++ b/src/pattern.go @@ -49,7 +49,7 @@ type Pattern struct { cacheable bool delimiter Delimiter nth []Range - procFun map[termType]func(bool, bool, []rune, []rune) algo.Result + procFun map[termType]func(bool, bool, util.Chars, []rune) algo.Result } var ( @@ -125,7 +125,7 @@ func BuildPattern(fuzzy bool, extended bool, caseMode Case, forward bool, cacheable: cacheable, nth: nth, delimiter: delimiter, - procFun: make(map[termType]func(bool, bool, []rune, []rune) algo.Result)} + procFun: make(map[termType]func(bool, bool, util.Chars, []rune) algo.Result)} ptr.procFun[termFuzzy] = algo.FuzzyMatch ptr.procFun[termEqual] = algo.EqualMatch @@ -361,13 +361,13 @@ func (p *Pattern) prepareInput(item *Item) []Token { tokens := Tokenize(item.text, p.delimiter) ret = Transform(tokens, p.nth) } else { - ret = []Token{Token{text: item.text, prefixLength: 0, trimLength: util.TrimLen(item.text)}} + ret = []Token{Token{text: item.text, prefixLength: 0, trimLength: item.text.TrimLength()}} } item.transformed = ret return ret } -func (p *Pattern) iter(pfun func(bool, bool, []rune, []rune) algo.Result, +func (p *Pattern) iter(pfun func(bool, bool, util.Chars, []rune) algo.Result, tokens []Token, caseSensitive bool, forward bool, pattern []rune) (Offset, int32) { for _, part := range tokens { prefixLength := int32(part.prefixLength) diff --git a/src/pattern_test.go b/src/pattern_test.go index 2f27fdab..26f92844 100644 --- a/src/pattern_test.go +++ b/src/pattern_test.go @@ -5,6 +5,7 @@ import ( "testing" "github.com/junegunn/fzf/src/algo" + "github.com/junegunn/fzf/src/util" ) func TestParseTermsExtended(t *testing.T) { @@ -71,7 +72,7 @@ func TestExact(t *testing.T) { pattern := BuildPattern(true, true, CaseSmart, true, []Range{}, Delimiter{}, []rune("'abc")) res := algo.ExactMatchNaive( - pattern.caseSensitive, pattern.forward, []rune("aabbcc abc"), pattern.termSets[0][0].text) + pattern.caseSensitive, pattern.forward, util.RunesToChars([]rune("aabbcc abc")), pattern.termSets[0][0].text) if res.Start != 7 || res.End != 10 { t.Errorf("%s / %d / %d", pattern.termSets, res.Start, res.End) } @@ -84,7 +85,7 @@ func TestEqual(t *testing.T) { match := func(str string, sidxExpected int32, eidxExpected int32) { res := algo.EqualMatch( - pattern.caseSensitive, pattern.forward, []rune(str), pattern.termSets[0][0].text) + pattern.caseSensitive, pattern.forward, util.RunesToChars([]rune(str)), pattern.termSets[0][0].text) if res.Start != sidxExpected || res.End != eidxExpected { t.Errorf("%s / %d / %d", pattern.termSets, res.Start, res.End) } @@ -120,20 +121,20 @@ func TestCaseSensitivity(t *testing.T) { func TestOrigTextAndTransformed(t *testing.T) { pattern := BuildPattern(true, true, CaseSmart, true, []Range{}, Delimiter{}, []rune("jg")) - tokens := Tokenize([]rune("junegunn"), Delimiter{}) + tokens := Tokenize(util.RunesToChars([]rune("junegunn")), Delimiter{}) trans := Transform(tokens, []Range{Range{1, 1}}) - origRunes := []rune("junegunn.choi") + origBytes := []byte("junegunn.choi") for _, extended := range []bool{false, true} { chunk := Chunk{ &Item{ - text: []rune("junegunn"), - origText: &origRunes, + text: util.RunesToChars([]rune("junegunn")), + origText: &origBytes, transformed: trans}, } pattern.extended = extended matches := pattern.matchChunk(&chunk) - if string(matches[0].text) != "junegunn" || string(*matches[0].origText) != "junegunn.choi" || + if matches[0].text.ToString() != "junegunn" || string(*matches[0].origText) != "junegunn.choi" || matches[0].offsets[0][0] != 0 || matches[0].offsets[0][1] != 5 || !reflect.DeepEqual(matches[0].transformed, trans) { t.Error("Invalid match result", matches) diff --git a/src/terminal.go b/src/terminal.go index 213d3b06..ff1120a0 100644 --- a/src/terminal.go +++ b/src/terminal.go @@ -564,7 +564,7 @@ func (t *Terminal) printHeader() { trimmed, colors, newState := extractColor(lineStr, state, nil) state = newState item := &Item{ - text: []rune(trimmed), + text: util.RunesToChars([]rune(trimmed)), colors: colors, rank: buildEmptyRank(0)} @@ -663,8 +663,8 @@ func (t *Terminal) printHighlighted(item *Item, bold bool, col1 int, col2 int, c } // Overflow - text := make([]rune, len(item.text)) - copy(text, item.text) + text := make([]rune, item.text.Length()) + copy(text, item.text.ToRunes()) offsets := item.colorOffsets(col2, bold, current) maxWidth := t.window.Width - 3 maxe = util.Constrain(maxe+util.Min(maxWidth/2-2, t.hscrollOff), 0, len(text)) diff --git a/src/tokenizer.go b/src/tokenizer.go index 4b89b38e..05b890a2 100644 --- a/src/tokenizer.go +++ b/src/tokenizer.go @@ -18,7 +18,7 @@ type Range struct { // Token contains the tokenized part of the strings and its prefix length type Token struct { - text []rune + text util.Chars prefixLength int trimLength int } @@ -75,15 +75,15 @@ func ParseRange(str *string) (Range, bool) { return newRange(n, n), true } -func withPrefixLengths(tokens [][]rune, begin int) []Token { +func withPrefixLengths(tokens []util.Chars, begin int) []Token { ret := make([]Token, len(tokens)) prefixLength := begin for idx, token := range tokens { // Need to define a new local variable instead of the reused token to take // the pointer to it - ret[idx] = Token{token, prefixLength, util.TrimLen(token)} - prefixLength += len(token) + ret[idx] = Token{token, prefixLength, token.TrimLength()} + prefixLength += token.Length() } return ret } @@ -94,13 +94,15 @@ const ( awkWhite ) -func awkTokenizer(input []rune) ([][]rune, int) { +func awkTokenizer(input util.Chars) ([]util.Chars, int) { // 9, 32 - ret := [][]rune{} + ret := []util.Chars{} str := []rune{} prefixLength := 0 state := awkNil - for _, r := range input { + numChars := input.Length() + for idx := 0; idx < numChars; idx++ { + r := input.Get(idx) white := r == 9 || r == 32 switch state { case awkNil: @@ -119,34 +121,34 @@ func awkTokenizer(input []rune) ([][]rune, int) { if white { str = append(str, r) } else { - ret = append(ret, str) + ret = append(ret, util.RunesToChars(str)) state = awkBlack str = []rune{r} } } } if len(str) > 0 { - ret = append(ret, str) + ret = append(ret, util.RunesToChars(str)) } return ret, prefixLength } // Tokenize tokenizes the given string with the delimiter -func Tokenize(runes []rune, delimiter Delimiter) []Token { +func Tokenize(text util.Chars, delimiter Delimiter) []Token { if delimiter.str == nil && delimiter.regex == nil { // AWK-style (\S+\s*) - tokens, prefixLength := awkTokenizer(runes) + tokens, prefixLength := awkTokenizer(text) return withPrefixLengths(tokens, prefixLength) } var tokens []string if delimiter.str != nil { - tokens = strings.Split(string(runes), *delimiter.str) + tokens = strings.Split(text.ToString(), *delimiter.str) for i := 0; i < len(tokens)-1; i++ { tokens[i] = tokens[i] + *delimiter.str } } else if delimiter.regex != nil { - str := string(runes) + str := text.ToString() for len(str) > 0 { loc := delimiter.regex.FindStringIndex(str) if loc == nil { @@ -157,9 +159,9 @@ func Tokenize(runes []rune, delimiter Delimiter) []Token { str = str[last:] } } - asRunes := make([][]rune, len(tokens)) + asRunes := make([]util.Chars, len(tokens)) for i, token := range tokens { - asRunes[i] = []rune(token) + asRunes[i] = util.RunesToChars([]rune(token)) } return withPrefixLengths(asRunes, 0) } @@ -167,7 +169,7 @@ func Tokenize(runes []rune, delimiter Delimiter) []Token { func joinTokens(tokens []Token) []rune { ret := []rune{} for _, token := range tokens { - ret = append(ret, token.text...) + ret = append(ret, token.text.ToRunes()...) } return ret } @@ -175,7 +177,7 @@ func joinTokens(tokens []Token) []rune { func joinTokensAsRunes(tokens []Token) []rune { ret := []rune{} for _, token := range tokens { - ret = append(ret, token.text...) + ret = append(ret, token.text.ToRunes()...) } return ret } @@ -197,7 +199,7 @@ func Transform(tokens []Token, withNth []Range) []Token { } if idx >= 1 && idx <= numTokens { minIdx = idx - 1 - part = append(part, tokens[idx-1].text...) + part = append(part, tokens[idx-1].text.ToRunes()...) } } } else { @@ -224,7 +226,7 @@ func Transform(tokens []Token, withNth []Range) []Token { minIdx = util.Max(0, begin-1) for idx := begin; idx <= end; idx++ { if idx >= 1 && idx <= numTokens { - part = append(part, tokens[idx-1].text...) + part = append(part, tokens[idx-1].text.ToRunes()...) } } } @@ -234,7 +236,7 @@ func Transform(tokens []Token, withNth []Range) []Token { } else { prefixLength = 0 } - transTokens[idx] = Token{part, prefixLength, util.TrimLen(part)} + transTokens[idx] = Token{util.RunesToChars(part), prefixLength, util.TrimLen(part)} } return transTokens } diff --git a/src/tokenizer_test.go b/src/tokenizer_test.go index b0924402..1dd44144 100644 --- a/src/tokenizer_test.go +++ b/src/tokenizer_test.go @@ -1,6 +1,10 @@ package fzf -import "testing" +import ( + "testing" + + "github.com/junegunn/fzf/src/util" +) func TestParseRange(t *testing.T) { { @@ -43,23 +47,23 @@ func TestParseRange(t *testing.T) { func TestTokenize(t *testing.T) { // AWK-style input := " abc: def: ghi " - tokens := Tokenize([]rune(input), Delimiter{}) - if string(tokens[0].text) != "abc: " || tokens[0].prefixLength != 2 || tokens[0].trimLength != 4 { + tokens := Tokenize(util.RunesToChars([]rune(input)), Delimiter{}) + if tokens[0].text.ToString() != "abc: " || tokens[0].prefixLength != 2 || tokens[0].trimLength != 4 { t.Errorf("%s", tokens) } // With delimiter - tokens = Tokenize([]rune(input), delimiterRegexp(":")) - if string(tokens[0].text) != " abc:" || tokens[0].prefixLength != 0 || tokens[0].trimLength != 4 { + tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp(":")) + if tokens[0].text.ToString() != " abc:" || tokens[0].prefixLength != 0 || tokens[0].trimLength != 4 { t.Errorf("%s", tokens) } // With delimiter regex - tokens = Tokenize([]rune(input), delimiterRegexp("\\s+")) - if string(tokens[0].text) != " " || tokens[0].prefixLength != 0 || tokens[0].trimLength != 0 || - string(tokens[1].text) != "abc: " || tokens[1].prefixLength != 2 || tokens[1].trimLength != 4 || - string(tokens[2].text) != "def: " || tokens[2].prefixLength != 8 || tokens[2].trimLength != 4 || - string(tokens[3].text) != "ghi " || tokens[3].prefixLength != 14 || tokens[3].trimLength != 3 { + tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp("\\s+")) + if tokens[0].text.ToString() != " " || tokens[0].prefixLength != 0 || tokens[0].trimLength != 0 || + tokens[1].text.ToString() != "abc: " || tokens[1].prefixLength != 2 || tokens[1].trimLength != 4 || + tokens[2].text.ToString() != "def: " || tokens[2].prefixLength != 8 || tokens[2].trimLength != 4 || + tokens[3].text.ToString() != "ghi " || tokens[3].prefixLength != 14 || tokens[3].trimLength != 3 { t.Errorf("%s", tokens) } } @@ -67,7 +71,7 @@ func TestTokenize(t *testing.T) { func TestTransform(t *testing.T) { input := " abc: def: ghi: jkl" { - tokens := Tokenize([]rune(input), Delimiter{}) + tokens := Tokenize(util.RunesToChars([]rune(input)), Delimiter{}) { ranges := splitNth("1,2,3") tx := Transform(tokens, ranges) @@ -80,25 +84,25 @@ func TestTransform(t *testing.T) { tx := Transform(tokens, ranges) if string(joinTokens(tx)) != "abc: def: ghi: def: ghi: jklabc: " || len(tx) != 4 || - string(tx[0].text) != "abc: def: " || tx[0].prefixLength != 2 || - string(tx[1].text) != "ghi: " || tx[1].prefixLength != 14 || - string(tx[2].text) != "def: ghi: jkl" || tx[2].prefixLength != 8 || - string(tx[3].text) != "abc: " || tx[3].prefixLength != 2 { + tx[0].text.ToString() != "abc: def: " || tx[0].prefixLength != 2 || + tx[1].text.ToString() != "ghi: " || tx[1].prefixLength != 14 || + tx[2].text.ToString() != "def: ghi: jkl" || tx[2].prefixLength != 8 || + tx[3].text.ToString() != "abc: " || tx[3].prefixLength != 2 { t.Errorf("%s", tx) } } } { - tokens := Tokenize([]rune(input), delimiterRegexp(":")) + tokens := Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp(":")) { ranges := splitNth("1..2,3,2..,1") tx := Transform(tokens, ranges) if string(joinTokens(tx)) != " abc: def: ghi: def: ghi: jkl abc:" || len(tx) != 4 || - string(tx[0].text) != " abc: def:" || tx[0].prefixLength != 0 || - string(tx[1].text) != " ghi:" || tx[1].prefixLength != 12 || - string(tx[2].text) != " def: ghi: jkl" || tx[2].prefixLength != 6 || - string(tx[3].text) != " abc:" || tx[3].prefixLength != 0 { + tx[0].text.ToString() != " abc: def:" || tx[0].prefixLength != 0 || + tx[1].text.ToString() != " ghi:" || tx[1].prefixLength != 12 || + tx[2].text.ToString() != " def: ghi: jkl" || tx[2].prefixLength != 6 || + tx[3].text.ToString() != " abc:" || tx[3].prefixLength != 0 { t.Errorf("%s", tx) } } diff --git a/src/util/chars.go b/src/util/chars.go new file mode 100644 index 00000000..25a15ddd --- /dev/null +++ b/src/util/chars.go @@ -0,0 +1,113 @@ +package util + +import ( + "unicode/utf8" +) + +type Chars struct { + runes []rune + bytes []byte +} + +// ToChars converts byte array into rune array +func ToChars(bytea []byte) Chars { + var runes []rune + ascii := true + numBytes := len(bytea) + for i := 0; i < numBytes; { + if bytea[i] < utf8.RuneSelf { + if !ascii { + runes = append(runes, rune(bytea[i])) + } + i++ + } else { + if ascii { + ascii = false + runes = make([]rune, i, numBytes) + for j := 0; j < i; j++ { + runes[j] = rune(bytea[j]) + } + } + r, sz := utf8.DecodeRune(bytea[i:]) + i += sz + runes = append(runes, r) + } + } + if ascii { + return Chars{bytes: bytea} + } + return Chars{runes: runes} +} + +func RunesToChars(runes []rune) Chars { + return Chars{runes: runes} +} + +func (chars *Chars) Get(i int) rune { + if chars.runes != nil { + return chars.runes[i] + } + return rune(chars.bytes[i]) +} + +func (chars *Chars) Length() int { + if chars.runes != nil { + return len(chars.runes) + } + return len(chars.bytes) +} + +// TrimLength returns the length after trimming leading and trailing whitespaces +func (chars *Chars) TrimLength() int { + var i int + len := chars.Length() + for i = len - 1; i >= 0; i-- { + char := chars.Get(i) + if char != ' ' && char != '\t' { + break + } + } + // Completely empty + if i < 0 { + return 0 + } + + var j int + for j = 0; j < len; j++ { + char := chars.Get(j) + if char != ' ' && char != '\t' { + break + } + } + return i - j + 1 +} + +func (chars *Chars) TrailingWhitespaces() int { + whitespaces := 0 + for i := chars.Length() - 1; i >= 0; i-- { + char := chars.Get(i) + if char != ' ' && char != '\t' { + break + } + whitespaces++ + } + return whitespaces +} + +func (chars *Chars) ToString() string { + if chars.runes != nil { + return string(chars.runes) + } + return string(chars.bytes) +} + +func (chars *Chars) ToRunes() []rune { + if chars.runes != nil { + return chars.runes + } + runes := make([]rune, len(chars.bytes)) + for idx, b := range chars.bytes { + runes[idx] = rune(b) + } + return runes +} diff --git a/src/util/chars_test.go b/src/util/chars_test.go new file mode 100644 index 00000000..e42cfb7c --- /dev/null +++ b/src/util/chars_test.go @@ -0,0 +1,36 @@ +package util + +import "testing" + +func TestToCharsNil(t *testing.T) { + bs := Chars{bytes: []byte{}} + if bs.bytes == nil || bs.runes != nil { + t.Error() + } + rs := RunesToChars([]rune{}) + if rs.bytes != nil || rs.runes == nil { + t.Error() + } +} + +func TestToCharsAscii(t *testing.T) { + chars := ToChars([]byte("foobar")) + if chars.ToString() != "foobar" || chars.runes != nil { + t.Error() + } +} + +func TestCharsLength(t *testing.T) { + chars := ToChars([]byte("\tabc한글 ")) + if chars.Length() != 8 || chars.TrimLength() != 5 { + t.Error() + } +} + +func TestCharsToString(t *testing.T) { + text := "\tabc한글 " + chars := ToChars([]byte(text)) + if chars.ToString() != text { + t.Error() + } +} diff --git a/src/util/util.go b/src/util/util.go index 4f3d409d..90cc28b4 100644 --- a/src/util/util.go +++ b/src/util/util.go @@ -7,7 +7,6 @@ import ( "os" "os/exec" "time" - "unicode/utf8" ) // Max returns the largest integer @@ -84,34 +83,6 @@ func IsTty() bool { return int(C.isatty(C.int(os.Stdin.Fd()))) != 0 } -// TrimRight returns rune array with trailing white spaces cut off -func TrimRight(runes []rune) []rune { - var i int - for i = len(runes) - 1; i >= 0; i-- { - char := runes[i] - if char != ' ' && char != '\t' { - break - } - } - return runes[0 : i+1] -} - -// BytesToRunes converts byte array into rune array -func BytesToRunes(bytea []byte) []rune { - runes := make([]rune, 0, len(bytea)) - for i := 0; i < len(bytea); { - if bytea[i] < utf8.RuneSelf { - runes = append(runes, rune(bytea[i])) - i++ - } else { - r, sz := utf8.DecodeRune(bytea[i:]) - i += sz - runes = append(runes, r) - } - } - return runes -} - // TrimLen returns the length of trimmed rune array func TrimLen(runes []rune) int { var i int -- cgit v1.2.3