Improvements in performance and memory usage

I profiled fzf and it turned out that it was spending significant amount of time repeatedly converting character arrays into Unicode codepoints. This commit greatly improves search performance after the initial scan by memoizing the converted results. This commit also addresses the problem of unbounded memory usage of fzf. fzf is a short-lived process that usually processes small input, so it was implemented to cache the intermediate results very aggressively with no notion of cache expiration/eviction. I still think a proper implementation of caching scheme is definitely an overkill. Instead this commit introduces limits to the maximum size (or minimum selectivity) of the intermediate results that can be cached.
author: Junegunn Choi <junegunn.c@gmail.com> 2015-04-17 22:23:52 +0900
committer: Junegunn Choi <junegunn.c@gmail.com> 2015-04-17 22:23:52 +0900
commit: 2fe1e28220c543ddbf4e12ee7396e44ee85ad8e0 (patch)
tree: 82c77b5e639cd66b941356788bcb8e0e053949be /src
parent: 288131ac5a895ba335681339d85ee039557490da (diff)
18 files changed, 136 insertions, 135 deletions
diff --git a/src/algo/algo.go b/src/algo/algo.go
index 36c8d873..c1c07f3e 100644
--- a/src/algo/algo.go
+++ b/src/algo/algo.go
@@ -1,8 +1,9 @@
 package algo
 
 import (
-	"strings"
 	"unicode"
+
+	"github.com/junegunn/fzf/src/util"
 )
 
 /*
@@ -14,13 +15,11 @@ import (
  */
 
 // FuzzyMatch performs fuzzy-match
-func FuzzyMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
+func FuzzyMatch(caseSensitive bool, runes *[]rune, pattern []rune) (int, int) {
 	if len(pattern) == 0 {
 		return 0, 0
 	}
 
-	runes := []rune(*input)
-
 	// 0. (FIXME) How to find the shortest match?
 	//    a_____b__c__abc
 	//    ^^^^^^^^^^  ^^^
@@ -34,7 +33,7 @@ func FuzzyMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
 	sidx := -1
 	eidx := -1
 
-	for index, char := range runes {
+	for index, char := range *runes {
 		// This is considerably faster than blindly applying strings.ToLower to the
 		// whole string
 		if !caseSensitive {
@@ -43,10 +42,10 @@ func FuzzyMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
 			// compiler as of now does not inline non-leaf functions.)
 			if char >= 'A' && char <= 'Z' {
 				char += 32
-				runes[index] = char
+				(*runes)[index] = char
 			} else if char > unicode.MaxASCII {
 				char = unicode.To(unicode.LowerCase, char)
-				runes[index] = char
+				(*runes)[index] = char
 			}
 		}
 		if char == pattern[pidx] {
@@ -63,7 +62,7 @@ func FuzzyMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
 	if sidx >= 0 && eidx >= 0 {
 		pidx--
 		for index := eidx - 1; index >= sidx; index-- {
-			char := runes[index]
+			char := (*runes)[index]
 			if char == pattern[pidx] {
 				if pidx--; pidx < 0 {
 					sidx = index
@@ -76,27 +75,6 @@ func FuzzyMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
 	return -1, -1
 }
 
-// ExactMatchStrings performs exact-match using strings package.
-// Currently not used.
-func ExactMatchStrings(caseSensitive bool, input *string, pattern []rune) (int, int) {
-	if len(pattern) == 0 {
-		return 0, 0
-	}
-
-	var str string
-	if caseSensitive {
-		str = *input
-	} else {
-		str = strings.ToLower(*input)
-	}
-
-	if idx := strings.Index(str, string(pattern)); idx >= 0 {
-		prefixRuneLen := len([]rune((*input)[:idx]))
-		return prefixRuneLen, prefixRuneLen + len(pattern)
-	}
-	return -1, -1
-}
-
 // ExactMatchNaive is a basic string searching algorithm that handles case
 // sensitivity. Although naive, it still performs better than the combination
 // of strings.ToLower + strings.Index for typical fzf use cases where input
@@ -104,13 +82,12 @@ func ExactMatchStrings(caseSensitive bool, input *string, pattern []rune) (int,
 //
 // We might try to implement better algorithms in the future:
 // http://en.wikipedia.org/wiki/String_searching_algorithm
-func ExactMatchNaive(caseSensitive bool, input *string, pattern []rune) (int, int) {
+func ExactMatchNaive(caseSensitive bool, runes *[]rune, pattern []rune) (int, int) {
 	if len(pattern) == 0 {
 		return 0, 0
 	}
 
-	runes := []rune(*input)
-	numRunes := len(runes)
+	numRunes := len(*runes)
 	plen := len(pattern)
 	if numRunes < plen {
 		return -1, -1
@@ -118,7 +95,7 @@ func ExactMatchNaive(caseSensitive bool, input *string, pattern []rune) (int, in
 
 	pidx := 0
 	for index := 0; index < numRunes; index++ {
-		char := runes[index]
+		char := (*runes)[index]
 		if !caseSensitive {
 			if char >= 'A' && char <= 'Z' {
 				char += 32
@@ -140,14 +117,13 @@ func ExactMatchNaive(caseSensitive bool, input *string, pattern []rune) (int, in
 }
 
 // PrefixMatch performs prefix-match
-func PrefixMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
-	runes := []rune(*input)
-	if len(runes) < len(pattern) {
+func PrefixMatch(caseSensitive bool, runes *[]rune, pattern []rune) (int, int) {
+	if len(*runes) < len(pattern) {
 		return -1, -1
 	}
 
 	for index, r := range pattern {
-		char := runes[index]
+		char := (*runes)[index]
 		if !caseSensitive {
 			char = unicode.ToLower(char)
 		}
@@ -159,8 +135,8 @@ func PrefixMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
 }
 
 // SuffixMatch performs suffix-match
-func SuffixMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
-	runes := []rune(strings.TrimRight(*input, " "))
+func SuffixMatch(caseSensitive bool, input *[]rune, pattern []rune) (int, int) {
+	runes := util.TrimRight(input)
 	trimmedLen := len(runes)
 	diff := trimmedLen - len(pattern)
 	if diff < 0 {
diff --git a/src/algo/algo_test.go b/src/algo/algo_test.go
index ac7aad5a..32056dfb 100644
--- a/src/algo/algo_test.go
+++ b/src/algo/algo_test.go
@@ -5,11 +5,12 @@ import (
 	"testing"
 )
 
-func assertMatch(t *testing.T, fun func(bool, *string, []rune) (int, int), caseSensitive bool, input string, pattern string, sidx int, eidx int) {
+func assertMatch(t *testing.T, fun func(bool, *[]rune, []rune) (int, int), caseSensitive bool, input string, pattern string, sidx int, eidx int) {
 	if !caseSensitive {
 		pattern = strings.ToLower(pattern)
 	}
-	s, e := fun(caseSensitive, &input, []rune(pattern))
+	runes := []rune(input)
+	s, e := fun(caseSensitive, &runes, []rune(pattern))
 	if s != sidx {
 		t.Errorf("Invalid start index: %d (expected: %d, %s / %s)", s, sidx, input, pattern)
 	}
@@ -45,7 +46,6 @@ func TestSuffixMatch(t *testing.T) {
 
 func TestEmptyPattern(t *testing.T) {
 	assertMatch(t, FuzzyMatch, true, "foobar", "", 0, 0)
-	assertMatch(t, ExactMatchStrings, true, "foobar", "", 0, 0)
 	assertMatch(t, ExactMatchNaive, true, "foobar", "", 0, 0)
 	assertMatch(t, PrefixMatch, true, "foobar", "", 0, 0)
 	assertMatch(t, SuffixMatch, true, "foobar", "", 6, 6)
diff --git a/src/cache.go b/src/cache.go
index f2f84a0a..d2ec00b4 100644
--- a/src/cache.go
+++ b/src/cache.go
@@ -2,23 +2,23 @@ package fzf
 
 import "sync"
 
-// QueryCache associates strings to lists of items
-type QueryCache map[string][]*Item
+// queryCache associates strings to lists of items
+type queryCache map[string][]*Item
 
 // ChunkCache associates Chunk and query string to lists of items
 type ChunkCache struct {
 	mutex sync.Mutex
-	cache map[*Chunk]*QueryCache
+	cache map[*Chunk]*queryCache
 }
 
 // NewChunkCache returns a new ChunkCache
 func NewChunkCache() ChunkCache {
-	return ChunkCache{sync.Mutex{}, make(map[*Chunk]*QueryCache)}
+	return ChunkCache{sync.Mutex{}, make(map[*Chunk]*queryCache)}
 }
 
 // Add adds the list to the cache
 func (cc *ChunkCache) Add(chunk *Chunk, key string, list []*Item) {
-	if len(key) == 0 || !chunk.IsFull() {
+	if len(key) == 0 || !chunk.IsFull() || len(list) > queryCacheMax {
 		return
 	}
 
@@ -27,7 +27,7 @@ func (cc *ChunkCache) Add(chunk *Chunk, key string, list []*Item) {
 
 	qc, ok := cc.cache[chunk]
 	if !ok {
-		cc.cache[chunk] = &QueryCache{}
+		cc.cache[chunk] = &queryCache{}
 		qc = cc.cache[chunk]
 	}
 	(*qc)[key] = list
diff --git a/src/cache_test.go b/src/cache_test.go
index 3975eaa3..05f904c9 100644
--- a/src/cache_test.go
+++ b/src/cache_test.go
@@ -4,7 +4,7 @@ import "testing"
 
 func TestChunkCache(t *testing.T) {
 	cache := NewChunkCache()
-	chunk2 := make(Chunk, ChunkSize)
+	chunk2 := make(Chunk, chunkSize)
 	chunk1p := &Chunk{}
 	chunk2p := &chunk2
 	items1 := []*Item{&Item{}}
diff --git a/src/chunklist.go b/src/chunklist.go
index 571a59af..52084f2f 100644
--- a/src/chunklist.go
+++ b/src/chunklist.go
@@ -2,10 +2,7 @@ package fzf
 
 import "sync"
 
-// Capacity of each chunk
-const ChunkSize int = 100
-
-// Chunk is a list of Item pointers whose size has the upper limit of ChunkSize
+// Chunk is a list of Item pointers whose size has the upper limit of chunkSize
 type Chunk []*Item // >>> []Item
 
 // ItemBuilder is a closure type that builds Item object from a pointer to a
@@ -35,7 +32,7 @@ func (c *Chunk) push(trans ItemBuilder, data *string, index int) {
 
 // IsFull returns true if the Chunk is full
 func (c *Chunk) IsFull() bool {
-	return len(*c) == ChunkSize
+	return len(*c) == chunkSize
 }
 
 func (cl *ChunkList) lastChunk() *Chunk {
@@ -47,7 +44,7 @@ func CountItems(cs []*Chunk) int {
 	if len(cs) == 0 {
 		return 0
 	}
-	return ChunkSize*(len(cs)-1) + len(*(cs[len(cs)-1]))
+	return chunkSize*(len(cs)-1) + len(*(cs[len(cs)-1]))
 }
 
 // Push adds the item to the list
@@ -56,7 +53,7 @@ func (cl *ChunkList) Push(data string) {
 	defer cl.mutex.Unlock()
 
 	if len(cl.chunks) == 0 || cl.lastChunk().IsFull() {
-		newChunk := Chunk(make([]*Item, 0, ChunkSize))
+		newChunk := Chunk(make([]*Item, 0, chunkSize))
 		cl.chunks = append(cl.chunks, &newChunk)
 	}
 
diff --git a/src/chunklist_test.go b/src/chunklist_test.go
index 02288d9f..2f8ef7e5 100644
--- a/src/chunklist_test.go
+++ b/src/chunklist_test.go
@@ -45,7 +45,7 @@ func TestChunkList(t *testing.T) {
 	}
 
 	// Add more data
-	for i := 0; i < ChunkSize*2; i++ {
+	for i := 0; i < chunkSize*2; i++ {
 		cl.Push(fmt.Sprintf("item %d", i))
 	}
 
@@ -57,7 +57,7 @@ func TestChunkList(t *testing.T) {
 	// New snapshot
 	snapshot, count = cl.Snapshot()
 	if len(snapshot) != 3 || !snapshot[0].IsFull() ||
-		!snapshot[1].IsFull() || snapshot[2].IsFull() || count != ChunkSize*2+2 {
+		!snapshot[1].IsFull() || snapshot[2].IsFull() || count != chunkSize*2+2 {
 		t.Error("Expected two full chunks and one more chunk")
 	}
 	if len(*snapshot[2]) != 2 {
diff --git a/src/constants.go b/src/constants.go
index 3c7f76cb..07ac7526 100644
--- a/src/constants.go
+++ b/src/constants.go
@@ -1,11 +1,38 @@
 package fzf
 
 import (
+	"time"
+
 	"github.com/junegunn/fzf/src/util"
 )
 
-// Current version
-const Version = "0.9.9"
+const (
+	// Current version
+	Version = "0.9.9"
+
+	// Core
+	coordinatorDelayMax  time.Duration = 100 * time.Millisecond
+	coordinatorDelayStep time.Duration = 10 * time.Millisecond
+
+	// Reader
+	defaultCommand = `find * -path '*/\.*' -prune -o -type f -print -o -type l -print 2> /dev/null`
+
+	// Terminal
+	initialDelay    = 100 * time.Millisecond
+	spinnerDuration = 200 * time.Millisecond
+
+	// Matcher
+	progressMinDuration = 200 * time.Millisecond
+
+	// Capacity of each chunk
+	chunkSize int = 100
+
+	// Do not cache results of low selectivity queries
+	queryCacheMax int = chunkSize / 5
+
+	// Not to cache mergers with large lists
+	mergerCacheMax int = 100000
+)
 
 // fzf events
 const (
diff --git a/src/core.go b/src/core.go
index b56fbf64..7230cffd 100644
--- a/src/core.go
+++ b/src/core.go
@@ -34,9 +34,6 @@ import (
 	"github.com/junegunn/fzf/src/util"
 )
 
-const coordinatorDelayMax time.Duration = 100 * time.Millisecond
-const coordinatorDelayStep time.Duration = 10 * time.Millisecond
-
 func initProcs() {
 	runtime.GOMAXPROCS(runtime.NumCPU())
 }
@@ -99,8 +96,9 @@ func Run(options *Options) {
 	} else {
 		chunkList = NewChunkList(func(data *string, index int) *Item {
 			tokens := Tokenize(data, opts.Delimiter)
+			trans := Transform(tokens, opts.WithNth)
 			item := Item{
-				text:     Transform(tokens, opts.WithNth).whole,
+				text:     joinTokens(trans),
 				origText: data,
 				index:    uint32(index),
 				colors:   nil,
diff --git a/src/item.go b/src/item.go
index 996c5e19..711adbe2 100644
--- a/src/item.go
+++ b/src/item.go
@@ -19,7 +19,7 @@ type colorOffset struct {
 type Item struct {
 	text        *string
 	origText    *string
-	transformed *Transformed
+	transformed *[]Token
 	index       uint32
 	offsets     []Offset
 	colors      []ansiOffset
diff --git a/src/matcher.go b/src/matcher.go
index 0f3b409e..d01ed23e 100644
--- a/src/matcher.go
+++ b/src/matcher.go
@@ -34,10 +34,6 @@ const (
 	reqReset
 )
 
-const (
-	progressMinDuration = 200 * time.Millisecond
-)
-
 // NewMatcher returns a new Matcher
 func NewMatcher(patternBuilder func([]rune) *Pattern,
 	sort bool, tac bool, eventBox *util.EventBox) *Matcher {
@@ -100,7 +96,9 @@ func (m *Matcher) Loop() {
 		}
 
 		if !cancelled {
-			m.mergerCache[patternString] = merger
+			if merger.Cacheable() {
+				m.mergerCache[patternString] = merger
+			}
 			merger.final = request.final
 			m.eventBox.Set(EvtSearchFin, merger)
 		}
diff --git a/src/merger.go b/src/merger.go
index bc5d5de8..4c7966a6 100644
--- a/src/merger.go
+++ b/src/merger.go
@@ -61,8 +61,8 @@ func (mg *Merger) Get(idx int) *Item {
 		if mg.tac {
 			idx = mg.count - idx - 1
 		}
-		chunk := (*mg.chunks)[idx/ChunkSize]
-		return (*chunk)[idx%ChunkSize]
+		chunk := (*mg.chunks)[idx/chunkSize]
+		return (*chunk)[idx%chunkSize]
 	}
 
 	if mg.sorted {
@@ -82,6 +82,10 @@ func (mg *Merger) Get(idx int) *Item {
 	panic(fmt.Sprintf("Index out of bounds (unsorted, %d/%d)", idx, mg.count))
 }
 
+func (mg *Merger) Cacheable() bool {
+	return mg.count < mergerCacheMax
+}
+
 func (mg *Merger) mergedGet(idx int) *Item {
 	for i := len(mg.merged); i <= idx; i++ {
 		minRank := Rank{0, 0, 0}
diff --git a/src/pattern.go b/src/pattern.go
index e6bda5f3..75cc5f80 100644
--- a/src/pattern.go
+++ b/src/pattern.go
@@ -43,7 +43,7 @@ type Pattern struct {
 	hasInvTerm    bool
 	delimiter     *regexp.Regexp
 	nth           []Range
-	procFun       map[termType]func(bool, *string, []rune) (int, int)
+	procFun       map[termType]func(bool, *[]rune, []rune) (int, int)
 }
 
 var (
@@ -122,7 +122,7 @@ func BuildPattern(mode Mode, caseMode Case,
 		hasInvTerm:    hasInvTerm,
 		nth:           nth,
 		delimiter:     delimiter,
-		procFun:       make(map[termType]func(bool, *string, []rune) (int, int))}
+		procFun:       make(map[termType]func(bool, *[]rune, []rune) (int, int))}
 
 	ptr.procFun[termFuzzy] = algo.FuzzyMatch
 	ptr.procFun[termExact] = algo.ExactMatchNaive
@@ -300,28 +300,27 @@ func (p *Pattern) extendedMatch(item *Item) []Offset {
 	return offsets
 }
 
-func (p *Pattern) prepareInput(item *Item) *Transformed {
+func (p *Pattern) prepareInput(item *Item) *[]Token {
 	if item.transformed != nil {
 		return item.transformed
 	}
 
-	var ret *Transformed
+	var ret *[]Token
 	if len(p.nth) > 0 {
 		tokens := Tokenize(item.text, p.delimiter)
 		ret = Transform(tokens, p.nth)
 	} else {
-		trans := Transformed{
-			whole: item.text,
-			parts: []Token{Token{text: item.text, prefixLength: 0}}}
+		runes := []rune(*item.text)
+		trans := []Token{Token{text: &runes, prefixLength: 0}}
 		ret = &trans
 	}
 	item.transformed = ret
 	return ret
 }
 
-func (p *Pattern) iter(pfun func(bool, *string, []rune) (int, int),
-	inputs *Transformed, pattern []rune) (int, int) {
-	for _, part := range inputs.parts {
+func (p *Pattern) iter(pfun func(bool, *[]rune, []rune) (int, int),
+	tokens *[]Token, pattern []rune) (int, int) {
+	for _, part := range *tokens {
 		prefixLength := part.prefixLength
 		if sidx, eidx := pfun(p.caseSensitive, part.text, pattern); sidx >= 0 {
 			return sidx + prefixLength, eidx + prefixLength
diff --git a/src/pattern_test.go b/src/pattern_test.go
index 67542f21..c73e6500 100644
--- a/src/pattern_test.go
+++ b/src/pattern_test.go
@@ -58,8 +58,8 @@ func TestExact(t *testing.T) {
 	clearPatternCache()
 	pattern := BuildPattern(ModeExtended, CaseSmart,
 		[]Range{}, nil, []rune("'abc"))
-	str := "aabbcc abc"
-	sidx, eidx := algo.ExactMatchNaive(pattern.caseSensitive, &str, pattern.terms[0].text)
+	runes := []rune("aabbcc abc")
+	sidx, eidx := algo.ExactMatchNaive(pattern.caseSensitive, &runes, pattern.terms[0].text)
 	if sidx != 7 || eidx != 10 {
 		t.Errorf("%s / %d / %d", pattern.terms, sidx, eidx)
 	}
diff --git a/src/reader.go b/src/reader.go
index 2c10b8aa..d4764119 100644
--- a/src/reader.go
+++ b/src/reader.go
@@ -9,8 +9,6 @@ import (
 	"github.com/junegunn/fzf/src/util"
 )
 
-const defaultCommand = `find * -path '*/\.*' -prune -o -type f -print -o -type l -print 2> /dev/null`
-
 // Reader reads from command or standard input
 type Reader struct {
 	pusher   func(string)
diff --git a/src/terminal.go b/src/terminal.go
index 667d2067..eaec9a3f 100644
--- a/src/terminal.go
+++ b/src/terminal.go
@@ -38,7 +38,7 @@ type Terminal struct {
 	progress   int
 	reading    bool
 	merger     *Merger
-	selected   map[*string]selectedItem
+	selected   map[uint32]selectedItem
 	reqBox     *util.EventBox
 	eventBox   *util.EventBox
 	mutex      sync.Mutex
@@ -79,11 +79,6 @@ const (
 	reqQuit
 )
 
-const (
-	initialDelay    = 100 * time.Millisecond
-	spinnerDuration = 200 * time.Millisecond
-)
-
 // NewTerminal returns new Terminal object
 func NewTerminal(opts *Options, eventBox *util.EventBox) *Terminal {
 	input := []rune(opts.Query)
@@ -103,7 +98,7 @@ func NewTerminal(opts *Options, eventBox *util.EventBox) *Terminal {
 		pressed:    0,
 		printQuery: opts.PrintQuery,
 		merger:     EmptyMerger,
-		selected:   make(map[*string]selectedItem),
+		selected:   make(map[uint32]selectedItem),
 		reqBox:     util.NewEventBox(),
 		eventBox:   eventBox,
 		mutex:      sync.Mutex{},
@@ -273,7 +268,7 @@ func (t *Terminal) printList() {
 }
 
 func (t *Terminal) printItem(item *Item, current bool) {
-	_, selected := t.selected[item.text]
+	_, selected := t.selected[item.index]
 	if current {
 		C.CPrint(C.ColCursor, true, ">")
 		if selected {
@@ -565,16 +560,16 @@ func (t *Terminal) Loop() {
 		toggle := func() {
 			if t.cy < t.merger.Length() {
 				item := t.merger.Get(t.cy)
-				if _, found := t.selected[item.text]; !found {
+				if _, found := t.selected[item.index]; !found {
 					var strptr *string
 					if item.origText != nil {
 						strptr = item.origText
 					} else {
 						strptr = item.text
 					}
-					t.selected[item.text] = selectedItem{time.Now(), strptr}
+					t.selected[item.index] = selectedItem{time.Now(), strptr}
 				} else {
-					delete(t.selected, item.text)
+					delete(t.selected, item.index)
 				}
 				req(reqInfo)
 			}
diff --git a/src/tokenizer.go b/src/tokenizer.go
index d38f46f5..c61b2383 100644
--- a/src/tokenizer.go
+++ b/src/tokenizer.go
@@ -16,15 +16,9 @@ type Range struct {
 	end   int
 }
 
-// Transformed holds the result of tokenization and transformation
-type Transformed struct {
-	whole *string
-	parts []Token
-}
-
 // Token contains the tokenized part of the strings and its prefix length
 type Token struct {
-	text         *string
+	text         *[]rune
 	prefixLength int
 }
 
@@ -81,8 +75,8 @@ func withPrefixLengths(tokens []string, begin int) []Token {
 	for idx, token := range tokens {
 		// Need to define a new local variable instead of the reused token to take
 		// the pointer to it
-		str := token
-		ret[idx] = Token{text: &str, prefixLength: prefixLength}
+		runes := []rune(token)
+		ret[idx] = Token{text: &runes, prefixLength: prefixLength}
 		prefixLength += len([]rune(token))
 	}
 	return ret
@@ -142,33 +136,40 @@ func Tokenize(str *string, delimiter *regexp.Regexp) []Token {
 	return withPrefixLengths(tokens, 0)
 }
 
-func joinTokens(tokens []Token) string {
+func joinTokens(tokens *[]Token) *string {
 	ret := ""
-	for _, token := range tokens {
-		ret += *token.text
+	for _, token := range *tokens {
+		ret += string(*token.text)
 	}
-	return ret
+	return &ret
+}
+
+func joinTokensAsRunes(tokens *[]Token) *[]rune {
+	ret := []rune{}
+	for _, token := range *tokens {
+		ret = append(ret, *token.text...)
+	}
+	return &ret
 }
 
 // Transform is used to transform the input when --with-nth option is given
-func Transform(tokens []Token, withNth []Range) *Transformed {
+func Transform(tokens []Token, withNth []Range) *[]Token {
 	transTokens := make([]Token, len(withNth))
 	numTokens := len(tokens)
-	whole := ""
 	for idx, r := range withNth {
-		part := ""
+		part := []rune{}
 		minIdx := 0
 		if r.begin == r.end {
 			idx := r.begin
 			if idx == rangeEllipsis {
-				part += joinTokens(tokens)
+				part = append(part, *joinTokensAsRunes(&tokens)...)
 			} else {
 				if idx < 0 {
 					idx += numTokens + 1
 				}
 				if idx >= 1 && idx <= numTokens {
 					minIdx = idx - 1
-					part += *tokens[idx-1].text
+					part = append(part, *tokens[idx-1].text...)
 				}
 			}
 		} else {
@@ -195,11 +196,10 @@ func Transform(tokens []Token, withNth []Range) *Transformed {
 			minIdx = util.Max(0, begin-1)
 			for idx := begin; idx <= end; idx++ {
 				if idx >= 1 && idx <= numTokens {
-					part += *tokens[idx-1].text
+					part = append(part, *tokens[idx-1].text...)
 				}
 			}
 		}
-		whole += part
 		var prefixLength int
 		if minIdx < numTokens {
 			prefixLength = tokens[minIdx].prefixLength
@@ -208,7 +208,5 @@ func Transform(tokens []Token, withNth []Range) *Transformed {
 		}
 		transTokens[idx] = Token{&part, prefixLength}
 	}
-	return &Transformed{
-		whole: &whole,
-		parts: transTokens}
+	return &transTokens
 }
diff --git a/src/tokenizer_test.go b/src/tokenizer_test.go
index 5195a1b6..0362b5ab 100644
--- a/src/tokenizer_test.go
+++ b/src/tokenizer_test.go
@@ -44,13 +44,13 @@ func TestTokenize(t *testing.T) {
 	// AWK-style
 	input := "  abc:  def:  ghi  "
 	tokens := Tokenize(&input, nil)
-	if *tokens[0].text != "abc:  " || tokens[0].prefixLength != 2 {
+	if string(*tokens[0].text) != "abc:  " || tokens[0].prefixLength != 2 {
 		t.Errorf("%s", tokens)
 	}
 
 	// With delimiter
 	tokens = Tokenize(&input, delimiterRegexp(":"))
-	if *tokens[0].text != "  abc:" || tokens[0].prefixLength != 0 {
+	if string(*tokens[0].text) != "  abc:" || tokens[0].prefixLength != 0 {
 		t.Errorf("%s", tokens)
 	}
 }
@@ -62,19 +62,19 @@ func TestTransform(t *testing.T) {
 		{
 			ranges := splitNth("1,2,3")
 			tx := Transform(tokens, ranges)
-			if *tx.whole != "abc:  def:  ghi:  " {
+			if *joinTokens(tx) != "abc:  def:  ghi:  " {
 				t.Errorf("%s", *tx)
 			}
 		}
 		{
 			ranges := splitNth("1..2,3,2..,1")
 			tx := Transform(tokens, ranges)
-			if *tx.whole != "abc:  def:  ghi:  def:  ghi:  jklabc:  " ||
-				len(tx.parts) != 4 ||
-				*tx.parts[0].text != "abc:  def:  " || tx.parts[0].prefixLength != 2 ||
-				*tx.parts[1].text != "ghi:  " || tx.parts[1].prefixLength != 14 ||
-				*tx.parts[2].text != "def:  ghi:  jkl" || tx.parts[2].prefixLength != 8 ||
-				*tx.parts[3].text != "abc:  " || tx.parts[3].prefixLength != 2 {
+			if *joinTokens(tx) != "abc:  def:  ghi:  def:  ghi:  jklabc:  " ||
+				len(*tx) != 4 ||
+				string(*(*tx)[0].text) != "abc:  def:  " || (*tx)[0].prefixLength != 2 ||
+				string(*(*tx)[1].text) != "ghi:  " || (*tx)[1].prefixLength != 14 ||
+				string(*(*tx)[2].text) != "def:  ghi:  jkl" || (*tx)[2].prefixLength != 8 ||
+				string(*(*tx)[3].text) != "abc:  " || (*tx)[3].prefixLength != 2 {
 				t.Errorf("%s", *tx)
 			}
 		}
@@ -84,12 +84,12 @@ func TestTransform(t *testing.T) {
 		{
 			ranges := splitNth("1..2,3,2..,1")
 			tx := Transform(tokens, ranges)
-			if *tx.whole != "  abc:  def:  ghi:  def:  ghi:  jkl  abc:" ||
-				len(tx.parts) != 4 ||
-				*tx.parts[0].text != "  abc:  def:" || tx.parts[0].prefixLength != 0 ||
-				*tx.parts[1].text != "  ghi:" || tx.parts[1].prefixLength != 12 ||
-				*tx.parts[2].text != "  def:  ghi:  jkl" || tx.parts[2].prefixLength != 6 ||
-				*tx.parts[3].text != "  abc:" || tx.parts[3].prefixLength != 0 {
+			if *joinTokens(tx) != "  abc:  def:  ghi:  def:  ghi:  jkl  abc:" ||
+				len(*tx) != 4 ||
+				string(*(*tx)[0].text) != "  abc:  def:" || (*tx)[0].prefixLength != 0 ||
+				string(*(*tx)[1].text) != "  ghi:" || (*tx)[1].prefixLength != 12 ||
+				string(*(*tx)[2].text) != "  def:  ghi:  jkl" || (*tx)[2].prefixLength != 6 ||
+				string(*(*tx)[3].text) != "  abc:" || (*tx)[3].prefixLength != 0 {
 				t.Errorf("%s", *tx)
 			}
 		}
diff --git a/src/util/util.go b/src/util/util.go
index 931b14a8..511de1e5 100644
--- a/src/util/util.go
+++ b/src/util/util.go
@@ -77,3 +77,14 @@ func Between(val int, min int, max int) bool {
 func IsTty() bool {
 	return int(C.isatty(C.int(os.Stdin.Fd()))) != 0
 }
+
+func TrimRight(runes *[]rune) []rune {
+	var i int
+	for i = len(*runes) - 1; i >= 0; i-- {
+		char := (*runes)[i]
+		if char != ' ' && char != '\t' {
+			break
+		}
+	}
+	return (*runes)[0 : i+1]
+}
author	Junegunn Choi <junegunn.c@gmail.com>	2015-04-17 22:23:52 +0900
committer	Junegunn Choi <junegunn.c@gmail.com>	2015-04-17 22:23:52 +0900
commit	2fe1e28220c543ddbf4e12ee7396e44ee85ad8e0 (patch)
tree	82c77b5e639cd66b941356788bcb8e0e053949be /src
parent	288131ac5a895ba335681339d85ee039557490da (diff)