summaryrefslogtreecommitdiffstats
path: root/src/algo
diff options
context:
space:
mode:
authorJunegunn Choi <junegunn.c@gmail.com>2017-07-30 17:31:50 +0900
committerJunegunn Choi <junegunn.c@gmail.com>2017-07-30 17:31:50 +0900
commit69aa2fea686b6e26418fa352abebd81e0a1ecc7b (patch)
tree12a49eac222198bb7814da1703a190781996e3e8 /src/algo
parent298749bfcd0190745aba83addd9f504363d36924 (diff)
Optimize fuzzy search performance for ASCII strings
Diffstat (limited to 'src/algo')
-rw-r--r--src/algo/algo.go68
1 files changed, 59 insertions, 9 deletions
diff --git a/src/algo/algo.go b/src/algo/algo.go
index c4930c11..a2f0f2b4 100644
--- a/src/algo/algo.go
+++ b/src/algo/algo.go
@@ -78,9 +78,11 @@ Scoring criteria
*/
import (
+ "bytes"
"fmt"
"strings"
"unicode"
+ "unicode/utf8"
"github.com/junegunn/fzf/src/util"
)
@@ -251,19 +253,37 @@ func normalizeRune(r rune) rune {
// 2. "pattern" is already normalized if "normalize" is true
type Algo func(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int)
+func trySkip(input *util.Chars, caseSensitive bool, b byte, from int) int {
+ byteArray := input.Bytes()[from:]
+ idx := bytes.IndexByte(byteArray, b)
+ if idx == 0 {
+ // Can't skip any further
+ return from
+ }
+ // We may need to search for the uppercase letter again. We don't have to
+ // consider normalization as we can be sure that this is an ASCII string.
+ if !caseSensitive && b >= 'a' && b <= 'z' {
+ uidx := bytes.IndexByte(byteArray, b-32)
+ if idx < 0 || uidx >= 0 && uidx < idx {
+ idx = uidx
+ }
+ }
+ if idx < 0 {
+ return -1
+ }
+ return from + idx
+}
+
func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
// Assume that pattern is given in lowercase if case-insensitive.
// First check if there's a match and calculate bonus for each position.
// If the input string is too long, consider finding the matching chars in
// this phase as well (non-optimal alignment).
- N := input.Length()
M := len(pattern)
- switch M {
- case 0:
+ if M == 0 {
return Result{0, 0, 0}, posArray(withPos, M)
- case 1:
- return ExactMatchNaive(caseSensitive, normalize, forward, input, pattern[0:1], withPos, slab)
}
+ N := input.Length()
// Since O(nm) algorithm can be prohibitively expensive for large input,
// we fall back to the greedy algorithm.
@@ -281,10 +301,31 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.C
// Rune array
offset32, T := alloc32(offset32, slab, N, false)
- // Phase 1. Check if there's a match and calculate bonus for each point
+ // Phase 1. Optimized search for ASCII string
+ firstIdx := 0
+ if input.IsBytes() {
+ idx := 0
+ for pidx := 0; pidx < M; pidx++ {
+ // Not possible
+ if pattern[pidx] >= utf8.RuneSelf {
+ return Result{-1, -1, 0}, nil
+ }
+ idx = trySkip(&input, caseSensitive, byte(pattern[pidx]), idx)
+ if idx < 0 {
+ return Result{-1, -1, 0}, nil
+ }
+ if pidx == 0 && idx > 0 {
+ // Step back to find the right bonus point
+ firstIdx = idx - 1
+ }
+ idx++
+ }
+ }
+
+ // Phase 2. Calculate bonus for each point
pidx, lastIdx, prevClass := 0, 0, charNonWord
input.CopyRunes(T)
- for idx := 0; idx < N; idx++ {
+ for idx := firstIdx; idx < N; idx++ {
char := T[idx]
var class charClass
if char <= unicode.MaxASCII {
@@ -324,8 +365,17 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.C
if pidx != M {
return Result{-1, -1, 0}, nil
}
+ if M == 1 && B[F[0]] == bonusBoundary {
+ p := int(F[0])
+ result := Result{p, p + 1, scoreMatch + bonusBoundary*bonusFirstCharMultiplier}
+ if !withPos {
+ return result, nil
+ }
+ pos := []int{p}
+ return result, &pos
+ }
- // Phase 2. Fill in score matrix (H)
+ // Phase 3. Fill in score matrix (H)
// Unlike the original algorithm, we do not allow omission.
width := lastIdx - int(F[0]) + 1
offset16, H := alloc16(offset16, slab, width*M, false)
@@ -414,7 +464,7 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.C
}
}
- // Phase 3. (Optional) Backtrace to find character positions
+ // Phase 4. (Optional) Backtrace to find character positions
pos := posArray(withPos, M)
j := int(F[0])
if withPos {