summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJunegunn Choi <junegunn.c@gmail.com>2017-07-18 02:17:05 +0900
committerJunegunn Choi <junegunn.c@gmail.com>2017-07-18 02:17:05 +0900
commit5e72709613b816531c1e0aed6a710257e08bb5d8 (patch)
tree5298c2840ed64e7be620494edd8a63830a6d4213
parent9e85cba0d06025983a1a747bfc06c9955388d9c0 (diff)
Speed up initial scanning with bitwise AND operation
-rw-r--r--src/util/chars.go58
1 files changed, 36 insertions, 22 deletions
diff --git a/src/util/chars.go b/src/util/chars.go
index 8325cf44..5e702004 100644
--- a/src/util/chars.go
+++ b/src/util/chars.go
@@ -6,6 +6,11 @@ import (
"unsafe"
)
+const (
+ overflow64 uint64 = 0x8080808080808080
+ overflow32 uint32 = 0x80808080
+)
+
type Chars struct {
slice []byte // or []rune
inBytes bool
@@ -17,33 +22,42 @@ type Chars struct {
Index int32
}
-// ToChars converts byte array into rune array
-func ToChars(bytes []byte) Chars {
- var runes []rune
- inBytes := true
- numBytes := len(bytes)
- for i := 0; i < numBytes; {
- if bytes[i] < utf8.RuneSelf {
- if !inBytes {
- runes = append(runes, rune(bytes[i]))
- }
- i++
- } else {
- if inBytes {
- inBytes = false
- runes = make([]rune, i, numBytes)
- for j := 0; j < i; j++ {
- runes[j] = rune(bytes[j])
- }
- }
- r, sz := utf8.DecodeRune(bytes[i:])
- i += sz
- runes = append(runes, r)
+func checkAscii(bytes []byte) (bool, int) {
+ i := 0
+ for ; i < len(bytes)-8; i += 8 {
+ if (overflow64 & *(*uint64)(unsafe.Pointer(&bytes[i]))) > 0 {
+ return false, i
+ }
+ }
+ for ; i < len(bytes)-4; i += 4 {
+ if (overflow32 & *(*uint32)(unsafe.Pointer(&bytes[i]))) > 0 {
+ return false, i
}
}
+ for ; i < len(bytes); i++ {
+ if bytes[i] >= utf8.RuneSelf {
+ return false, i
+ }
+ }
+ return true, 0
+}
+
+// ToChars converts byte array into rune array
+func ToChars(bytes []byte) Chars {
+ inBytes, bytesUntil := checkAscii(bytes)
if inBytes {
return Chars{slice: bytes, inBytes: inBytes}
}
+
+ runes := make([]rune, bytesUntil, len(bytes))
+ for i := 0; i < bytesUntil; i++ {
+ runes[i] = rune(bytes[i])
+ }
+ for i := bytesUntil; i < len(bytes); {
+ r, sz := utf8.DecodeRune(bytes[i:])
+ i += sz
+ runes = append(runes, r)
+ }
return RunesToChars(runes)
}