summaryrefslogtreecommitdiffstats
path: root/src/tokenizer.go
diff options
context:
space:
mode:
authorJunegunn Choi <junegunn.c@gmail.com>2015-08-02 14:00:18 +0900
committerJunegunn Choi <junegunn.c@gmail.com>2015-08-02 14:00:18 +0900
commit0ea66329b84cc6e4f8ff61ee99c00bb238070247 (patch)
tree72c3bc62ec491246390b56b2aac5b33645839503 /src/tokenizer.go
parent634670e3ea51a2fa1498a3de0c074b819828e2d8 (diff)
Performance tuning - eager rune array conversion
> wc -l /tmp/list2 2594098 /tmp/list2 > time cat /tmp/list2 | fzf-0.10.1-darwin_amd64 -fqwerty > /dev/null real 0m5.418s user 0m10.990s sys 0m1.302s > time cat /tmp/list2 | fzf-head -fqwerty > /dev/null real 0m4.862s user 0m6.619s sys 0m0.982s
Diffstat (limited to 'src/tokenizer.go')
-rw-r--r--src/tokenizer.go45
1 files changed, 22 insertions, 23 deletions
diff --git a/src/tokenizer.go b/src/tokenizer.go
index c61b2383..a616c6ba 100644
--- a/src/tokenizer.go
+++ b/src/tokenizer.go
@@ -18,7 +18,7 @@ type Range struct {
// Token contains the tokenized part of the strings and its prefix length
type Token struct {
- text *[]rune
+ text []rune
prefixLength int
}
@@ -75,8 +75,7 @@ func withPrefixLengths(tokens []string, begin int) []Token {
for idx, token := range tokens {
// Need to define a new local variable instead of the reused token to take
// the pointer to it
- runes := []rune(token)
- ret[idx] = Token{text: &runes, prefixLength: prefixLength}
+ ret[idx] = Token{text: []rune(token), prefixLength: prefixLength}
prefixLength += len([]rune(token))
}
return ret
@@ -88,13 +87,13 @@ const (
awkWhite
)
-func awkTokenizer(input *string) ([]string, int) {
+func awkTokenizer(input []rune) ([]string, int) {
// 9, 32
ret := []string{}
str := []rune{}
prefixLength := 0
state := awkNil
- for _, r := range []rune(*input) {
+ for _, r := range input {
white := r == 9 || r == 32
switch state {
case awkNil:
@@ -126,34 +125,34 @@ func awkTokenizer(input *string) ([]string, int) {
}
// Tokenize tokenizes the given string with the delimiter
-func Tokenize(str *string, delimiter *regexp.Regexp) []Token {
+func Tokenize(runes []rune, delimiter *regexp.Regexp) []Token {
if delimiter == nil {
// AWK-style (\S+\s*)
- tokens, prefixLength := awkTokenizer(str)
+ tokens, prefixLength := awkTokenizer(runes)
return withPrefixLengths(tokens, prefixLength)
}
- tokens := delimiter.FindAllString(*str, -1)
+ tokens := delimiter.FindAllString(string(runes), -1)
return withPrefixLengths(tokens, 0)
}
-func joinTokens(tokens *[]Token) *string {
- ret := ""
- for _, token := range *tokens {
- ret += string(*token.text)
+func joinTokens(tokens []Token) []rune {
+ ret := []rune{}
+ for _, token := range tokens {
+ ret = append(ret, token.text...)
}
- return &ret
+ return ret
}
-func joinTokensAsRunes(tokens *[]Token) *[]rune {
+func joinTokensAsRunes(tokens []Token) []rune {
ret := []rune{}
- for _, token := range *tokens {
- ret = append(ret, *token.text...)
+ for _, token := range tokens {
+ ret = append(ret, token.text...)
}
- return &ret
+ return ret
}
// Transform is used to transform the input when --with-nth option is given
-func Transform(tokens []Token, withNth []Range) *[]Token {
+func Transform(tokens []Token, withNth []Range) []Token {
transTokens := make([]Token, len(withNth))
numTokens := len(tokens)
for idx, r := range withNth {
@@ -162,14 +161,14 @@ func Transform(tokens []Token, withNth []Range) *[]Token {
if r.begin == r.end {
idx := r.begin
if idx == rangeEllipsis {
- part = append(part, *joinTokensAsRunes(&tokens)...)
+ part = append(part, joinTokensAsRunes(tokens)...)
} else {
if idx < 0 {
idx += numTokens + 1
}
if idx >= 1 && idx <= numTokens {
minIdx = idx - 1
- part = append(part, *tokens[idx-1].text...)
+ part = append(part, tokens[idx-1].text...)
}
}
} else {
@@ -196,7 +195,7 @@ func Transform(tokens []Token, withNth []Range) *[]Token {
minIdx = util.Max(0, begin-1)
for idx := begin; idx <= end; idx++ {
if idx >= 1 && idx <= numTokens {
- part = append(part, *tokens[idx-1].text...)
+ part = append(part, tokens[idx-1].text...)
}
}
}
@@ -206,7 +205,7 @@ func Transform(tokens []Token, withNth []Range) *[]Token {
} else {
prefixLength = 0
}
- transTokens[idx] = Token{&part, prefixLength}
+ transTokens[idx] = Token{part, prefixLength}
}
- return &transTokens
+ return transTokens
}