summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorJunegunn Choi <junegunn.c@gmail.com>2016-08-14 04:23:37 +0900
committerJunegunn Choi <junegunn.c@gmail.com>2016-08-14 04:30:55 +0900
commitd9c8a9a880dd9fc28c9fb466bd1c81cd6a8a159c (patch)
tree98a2018200734947aa6fa40552f3ccc7463404a2 /src
parentddc7bb9064042a0d5da9546eaf6ff888dca63f0c (diff)
[perf] Remove memory copy when using string delimiter
Diffstat (limited to 'src')
-rw-r--r--src/tokenizer.go12
-rw-r--r--src/util/chars.go36
-rw-r--r--src/util/chars_test.go25
3 files changed, 67 insertions, 6 deletions
diff --git a/src/tokenizer.go b/src/tokenizer.go
index eec19898..e48f48bf 100644
--- a/src/tokenizer.go
+++ b/src/tokenizer.go
@@ -140,13 +140,13 @@ func Tokenize(text util.Chars, delimiter Delimiter) []Token {
return withPrefixLengths(tokens, prefixLength)
}
- var tokens []string
if delimiter.str != nil {
- tokens = strings.Split(text.ToString(), *delimiter.str)
- for i := 0; i < len(tokens)-1; i++ {
- tokens[i] = tokens[i] + *delimiter.str
- }
- } else if delimiter.regex != nil {
+ return withPrefixLengths(text.Split(*delimiter.str), 0)
+ }
+
+ // FIXME performance
+ var tokens []string
+ if delimiter.regex != nil {
str := text.ToString()
for len(str) > 0 {
loc := delimiter.regex.FindStringIndex(str)
diff --git a/src/util/chars.go b/src/util/chars.go
index 6034ee53..12417c66 100644
--- a/src/util/chars.go
+++ b/src/util/chars.go
@@ -118,3 +118,39 @@ func (chars *Chars) Slice(b int, e int) Chars {
}
return Chars{bytes: chars.bytes[b:e]}
}
+
+func (chars *Chars) Split(delimiter string) []Chars {
+ delim := []rune(delimiter)
+ numChars := chars.Length()
+ numDelim := len(delim)
+ begin := 0
+ ret := make([]Chars, 0, 1)
+
+ for index := 0; index < numChars; {
+ if index+numDelim <= numChars {
+ match := true
+ for off, d := range delim {
+ if chars.Get(index+off) != d {
+ match = false
+ break
+ }
+ }
+ // Found the delimiter
+ if match {
+ incr := Max(numDelim, 1)
+ ret = append(ret, chars.Slice(begin, index+incr))
+ index += incr
+ begin = index
+ continue
+ }
+ } else {
+ // Impossible to find the delimiter in the remaining substring
+ break
+ }
+ index++
+ }
+ if begin < numChars || len(ret) == 0 {
+ ret = append(ret, chars.Slice(begin, numChars))
+ }
+ return ret
+}
diff --git a/src/util/chars_test.go b/src/util/chars_test.go
index 2cb6fc76..12c629d5 100644
--- a/src/util/chars_test.go
+++ b/src/util/chars_test.go
@@ -55,3 +55,28 @@ func TestTrimLength(t *testing.T) {
check(" h o ", 5)
check(" ", 0)
}
+
+func TestSplit(t *testing.T) {
+ check := func(str string, delim string, tokens ...string) {
+ input := ToChars([]byte(str))
+ result := input.Split(delim)
+ if len(result) != len(tokens) {
+ t.Errorf("Invalid Split result for '%s': %d tokens found (expected %d): %s",
+ str, len(result), len(tokens), result)
+ }
+ for idx, token := range tokens {
+ if result[idx].ToString() != token {
+ t.Errorf("Invalid Split result for '%s': %s (expected %s)",
+ str, result[idx].ToString(), token)
+ }
+ }
+ }
+ check("abc:def::", ":", "abc:", "def:", ":")
+ check("abc:def::", "-", "abc:def::")
+ check("abc", "", "a", "b", "c")
+ check("abc", "a", "a", "bc")
+ check("abc", "ab", "ab", "c")
+ check("abc", "abc", "abc")
+ check("abc", "abcd", "abc")
+ check("", "abcd", "")
+}