summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJunegunn Choi <junegunn.c@gmail.com>2016-08-14 01:53:06 +0900
committerJunegunn Choi <junegunn.c@gmail.com>2016-08-14 02:19:29 +0900
commitddc7bb9064042a0d5da9546eaf6ff888dca63f0c (patch)
tree0eb63cd7c0cdc221332496ca2ddf3dcfe3cbb876
parent1d4057c20907b7d263d6f2b8cb4350a024859dfe (diff)
[perf] Optimize AWK-style tokenizer for --nth
Approx. 50% less memory footprint and 40% improvement in query time
-rw-r--r--src/tokenizer.go44
-rw-r--r--src/util/chars.go7
-rw-r--r--src/util/chars_test.go21
-rw-r--r--src/util/util.go24
-rw-r--r--src/util/util_test.go20
5 files changed, 57 insertions, 59 deletions
diff --git a/src/tokenizer.go b/src/tokenizer.go
index 05b890a2..eec19898 100644
--- a/src/tokenizer.go
+++ b/src/tokenizer.go
@@ -97,10 +97,11 @@ const (
func awkTokenizer(input util.Chars) ([]util.Chars, int) {
// 9, 32
ret := []util.Chars{}
- str := []rune{}
prefixLength := 0
state := awkNil
numChars := input.Length()
+ begin := 0
+ end := 0
for idx := 0; idx < numChars; idx++ {
r := input.Get(idx)
white := r == 9 || r == 32
@@ -109,26 +110,24 @@ func awkTokenizer(input util.Chars) ([]util.Chars, int) {
if white {
prefixLength++
} else {
- state = awkBlack
- str = append(str, r)
+ state, begin, end = awkBlack, idx, idx+1
}
case awkBlack:
- str = append(str, r)
+ end = idx + 1
if white {
state = awkWhite
}
case awkWhite:
if white {
- str = append(str, r)
+ end = idx + 1
} else {
- ret = append(ret, util.RunesToChars(str))
- state = awkBlack
- str = []rune{r}
+ ret = append(ret, input.Slice(begin, end))
+ state, begin, end = awkBlack, idx, idx+1
}
}
}
- if len(str) > 0 {
- ret = append(ret, util.RunesToChars(str))
+ if begin < end {
+ ret = append(ret, input.Slice(begin, end))
}
return ret, prefixLength
}
@@ -187,19 +186,19 @@ func Transform(tokens []Token, withNth []Range) []Token {
transTokens := make([]Token, len(withNth))
numTokens := len(tokens)
for idx, r := range withNth {
- part := []rune{}
+ parts := []util.Chars{}
minIdx := 0
if r.begin == r.end {
idx := r.begin
if idx == rangeEllipsis {
- part = append(part, joinTokensAsRunes(tokens)...)
+ parts = append(parts, util.RunesToChars(joinTokensAsRunes(tokens)))
} else {
if idx < 0 {
idx += numTokens + 1
}
if idx >= 1 && idx <= numTokens {
minIdx = idx - 1
- part = append(part, tokens[idx-1].text.ToRunes()...)
+ parts = append(parts, tokens[idx-1].text)
}
}
} else {
@@ -226,17 +225,32 @@ func Transform(tokens []Token, withNth []Range) []Token {
minIdx = util.Max(0, begin-1)
for idx := begin; idx <= end; idx++ {
if idx >= 1 && idx <= numTokens {
- part = append(part, tokens[idx-1].text.ToRunes()...)
+ parts = append(parts, tokens[idx-1].text)
}
}
}
+ // Merge multiple parts
+ var merged util.Chars
+ switch len(parts) {
+ case 0:
+ merged = util.RunesToChars([]rune{})
+ case 1:
+ merged = parts[0]
+ default:
+ runes := []rune{}
+ for _, part := range parts {
+ runes = append(runes, part.ToRunes()...)
+ }
+ merged = util.RunesToChars(runes)
+ }
+
var prefixLength int
if minIdx < numTokens {
prefixLength = tokens[minIdx].prefixLength
} else {
prefixLength = 0
}
- transTokens[idx] = Token{util.RunesToChars(part), prefixLength, util.TrimLen(part)}
+ transTokens[idx] = Token{merged, prefixLength, merged.TrimLength()}
}
return transTokens
}
diff --git a/src/util/chars.go b/src/util/chars.go
index 25a15ddd..6034ee53 100644
--- a/src/util/chars.go
+++ b/src/util/chars.go
@@ -111,3 +111,10 @@ func (chars *Chars) ToRunes() []rune {
}
return runes
}
+
+func (chars *Chars) Slice(b int, e int) Chars {
+ if chars.runes != nil {
+ return Chars{runes: chars.runes[b:e]}
+ }
+ return Chars{bytes: chars.bytes[b:e]}
+}
diff --git a/src/util/chars_test.go b/src/util/chars_test.go
index e42cfb7c..2cb6fc76 100644
--- a/src/util/chars_test.go
+++ b/src/util/chars_test.go
@@ -34,3 +34,24 @@ func TestCharsToString(t *testing.T) {
t.Error()
}
}
+
+func TestTrimLength(t *testing.T) {
+ check := func(str string, exp int) {
+ chars := ToChars([]byte(str))
+ trimmed := chars.TrimLength()
+ if trimmed != exp {
+ t.Errorf("Invalid TrimLength result for '%s': %d (expected %d)",
+ str, trimmed, exp)
+ }
+ }
+ check("hello", 5)
+ check("hello ", 5)
+ check("hello ", 5)
+ check(" hello", 5)
+ check(" hello", 5)
+ check(" hello ", 5)
+ check(" hello ", 5)
+ check("h o", 5)
+ check(" h o ", 5)
+ check(" ", 0)
+}
diff --git a/src/util/util.go b/src/util/util.go
index 90cc28b4..a95340e7 100644
--- a/src/util/util.go
+++ b/src/util/util.go
@@ -83,30 +83,6 @@ func IsTty() bool {
return int(C.isatty(C.int(os.Stdin.Fd()))) != 0
}
-// TrimLen returns the length of trimmed rune array
-func TrimLen(runes []rune) int {
- var i int
- for i = len(runes) - 1; i >= 0; i-- {
- char := runes[i]
- if char != ' ' && char != '\t' {
- break
- }
- }
- // Completely empty
- if i < 0 {
- return 0
- }
-
- var j int
- for j = 0; j < len(runes); j++ {
- char := runes[j]
- if char != ' ' && char != '\t' {
- break
- }
- }
- return i - j + 1
-}
-
// ExecCommand executes the given command with $SHELL
func ExecCommand(command string) *exec.Cmd {
shell := os.Getenv("SHELL")
diff --git a/src/util/util_test.go b/src/util/util_test.go
index 8aeaeac5..06cfd4f2 100644
--- a/src/util/util_test.go
+++ b/src/util/util_test.go
@@ -20,23 +20,3 @@ func TestContrain(t *testing.T) {
t.Error("Expected", 3)
}
}
-
-func TestTrimLen(t *testing.T) {
- check := func(str string, exp int) {
- trimmed := TrimLen([]rune(str))
- if trimmed != exp {
- t.Errorf("Invalid TrimLen result for '%s': %d (expected %d)",
- str, trimmed, exp)
- }
- }
- check("hello", 5)
- check("hello ", 5)
- check("hello ", 5)
- check(" hello", 5)
- check(" hello", 5)
- check(" hello ", 5)
- check(" hello ", 5)
- check("h o", 5)
- check(" h o ", 5)
- check(" ", 0)
-}