summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJunegunn Choi <junegunn.c@gmail.com>2016-09-07 09:58:18 +0900
committerJunegunn Choi <junegunn.c@gmail.com>2016-09-18 14:34:46 +0900
commit2fc7c18747250ebf8adf68d2057ec22af6976f29 (patch)
treeaab013c4492a82dd613866a35b98fc9de42f533d
parent8ef2420677abf5cca27b47bead6e70e42220c7aa (diff)
Revise ranking algorithm
-rw-r--r--CHANGELOG.md7
-rwxr-xr-xinstall4
-rw-r--r--man/man1/fzf-tmux.12
-rw-r--r--man/man1/fzf.112
-rw-r--r--src/README.md17
-rw-r--r--src/algo/algo.go621
-rw-r--r--src/algo/algo_test.go145
-rw-r--r--src/chunklist_test.go2
-rw-r--r--src/constants.go10
-rw-r--r--src/core.go7
-rw-r--r--src/matcher.go14
-rw-r--r--src/options.go29
-rw-r--r--src/pattern.go83
-rw-r--r--src/pattern_test.go61
-rw-r--r--src/result.go70
-rw-r--r--src/result_test.go26
-rw-r--r--src/terminal.go23
-rw-r--r--src/util/slab.go12
-rw-r--r--src/util/util.go24
-rw-r--r--test/test_go.rb224
20 files changed, 947 insertions, 446 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6feff3e4..dee72641 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,13 @@
CHANGELOG
=========
+0.15.0
+------
+- Improved fuzzy search algorithm
+ - Added `--algo=[v1|v2]` option so one can still choose the old algorithm
+ which values the search performance over the quality of the result
+- Advanced scoring criteria
+
0.13.5
------
- Memory and performance optimization
diff --git a/install b/install
index 67447874..4d9e44ac 100755
--- a/install
+++ b/install
@@ -2,8 +2,8 @@
set -u
-[[ "$@" =~ --pre ]] && version=0.13.5 pre=1 ||
- version=0.13.5 pre=0
+[[ "$@" =~ --pre ]] && version=0.15.0 pre=1 ||
+ version=0.15.0 pre=0
auto_completion=
key_bindings=
diff --git a/man/man1/fzf-tmux.1 b/man/man1/fzf-tmux.1
index 7be73171..92d25e9f 100644
--- a/man/man1/fzf-tmux.1
+++ b/man/man1/fzf-tmux.1
@@ -21,7 +21,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
..
-.TH fzf-tmux 1 "Aug 2016" "fzf 0.13.5" "fzf-tmux - open fzf in tmux split pane"
+.TH fzf-tmux 1 "Sep 2016" "fzf 0.15.0" "fzf-tmux - open fzf in tmux split pane"
.SH NAME
fzf-tmux - open fzf in tmux split pane
diff --git a/man/man1/fzf.1 b/man/man1/fzf.1
index 69c360f3..93882e3d 100644
--- a/man/man1/fzf.1
+++ b/man/man1/fzf.1
@@ -21,7 +21,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
..
-.TH fzf 1 "Aug 2016" "fzf 0.13.5" "fzf - a command-line fuzzy finder"
+.TH fzf 1 "Sep 2016" "fzf 0.15.0" "fzf - a command-line fuzzy finder"
.SH NAME
fzf - a command-line fuzzy finder
@@ -48,6 +48,16 @@ Case-insensitive match (default: smart-case match)
.B "+i"
Case-sensitive match
.TP
+.BI "--algo=" TYPE
+Fuzzy matching algorithm (default: v2)
+
+.br
+.BR v2 " Optimal scoring algorithm (quality)"
+.br
+.BR v1 " Faster but not guaranteed to find the optimal result (performance)"
+.br
+
+.TP
.BI "-n, --nth=" "N[,..]"
Comma-separated list of field index expressions for limiting search scope.
See \fBFIELD INDEX EXPRESSION\fR for the details.
diff --git a/src/README.md b/src/README.md
index ced4296b..272c7554 100644
--- a/src/README.md
+++ b/src/README.md
@@ -61,6 +61,23 @@ make install
make linux
```
+Test
+----
+
+Unit tests can be run with `make test`. Integration tests are written in Ruby
+script that should be run on tmux.
+
+```sh
+# Unit tests
+make test
+
+# Install the executable to ../bin directory
+make install
+
+# Integration tests
+ruby ../test/test_go.rb
+```
+
Third-party libraries used
--------------------------
diff --git a/src/algo/algo.go b/src/algo/algo.go
index 89723964..622c9607 100644
--- a/src/algo/algo.go
+++ b/src/algo/algo.go
@@ -1,19 +1,91 @@
package algo
+/*
+
+Algorithm
+---------
+
+FuzzyMatchV1 finds the first "fuzzy" occurrence of the pattern within the given
+text in O(n) time where n is the length of the text. Once the position of the
+last character is located, it traverses backwards to see if there's a shorter
+substring that matches the pattern.
+
+ a_____b___abc__ To find "abc"
+ *-----*-----*> 1. Forward scan
+ <*** 2. Backward scan
+
+The algorithm is simple and fast, but as it only sees the first occurrence,
+it is not guaranteed to find the occurrence with the highest score.
+
+ a_____b__c__abc
+ *-----*--* ***
+
+FuzzyMatchV2 implements a modified version of Smith-Waterman algorithm to find
+the optimal solution (highest score) according to the scoring criteria. Unlike
+the original algorithm, omission or mismatch of a character in the pattern is
+not allowed.
+
+Performance
+-----------
+
+The new V2 algorithm is slower than V1 as it examines all occurrences of the
+pattern instead of stopping immediately after finding the first one. The time
+complexity of the algorithm is O(nm) if a match is found and O(n) otherwise
+where n is the length of the item and m is the length of the pattern. Thus, the
+performance overhead may not be noticeable for a query with high selectivity.
+However, if the performance is more important than the quality of the result,
+you can still choose v1 algorithm with --algo=v1.
+
+Scoring criteria
+----------------
+
+- We prefer matches at special positions, such as the start of a word, or
+ uppercase character in camelCase words.
+
+- That is, we prefer an occurrence of the pattern with more characters
+ matching at special positions, even if the total match length is longer.
+ e.g. "fuzzyfinder" vs. "fuzzy-finder" on "ff"
+ ````````````
+- Also, if the first character in the pattern appears at one of the special
+ positions, the bonus point for the position is multiplied by a constant
+ as it is extremely likely that the first character in the typed pattern
+ has more significance than the rest.
+ e.g. "fo-bar" vs. "foob-r" on "br"
+ ``````
+- But since fzf is still a fuzzy finder, not an acronym finder, we should also
+ consider the total length of the matched substring. This is why we have the
+ gap penalty. The gap penalty increases as the length of the gap (distance
+ between the matching characters) increases, so the effect of the bonus is
+ eventually cancelled at some point.
+ e.g. "fuzzyfinder" vs. "fuzzy-blurry-finder" on "ff"
+ ```````````
+- Consequently, it is crucial to find the right balance between the bonus
+ and the gap penalty. The parameters were chosen that the bonus is cancelled
+ when the gap size increases beyond 8 characters.
+
+- The bonus mechanism can have the undesirable side effect where consecutive
+ matches are ranked lower than the ones with gaps.
+ e.g. "foobar" vs. "foo-bar" on "foob"
+ ```````
+- To correct this anomaly, we also give extra bonus point to each character
+ in a consecutive matching chunk.
+ e.g. "foobar" vs. "foo-bar" on "foob"
+ ``````
+- The amount of consecutive bonus is primarily determined by the bonus of the
+ first character in the chunk.
+ e.g. "foobar" vs. "out-of-bound" on "oob"
+ ````````````
+*/
+
import (
+ "fmt"
"strings"
"unicode"
"github.com/junegunn/fzf/src/util"
)
-/*
- * String matching algorithms here do not use strings.ToLower to avoid
- * performance penalty. And they assume pattern runes are given in lowercase
- * letters when caseSensitive is false.
- *
- * In short: They try to do as little work as possible.
- */
+var DEBUG bool
func indexAt(index int, max int, forward bool) int {
if forward {
@@ -24,14 +96,48 @@ func indexAt(index int, max int, forward bool) int {
// Result contains the results of running a match function.
type Result struct {
+ // TODO int32 should suffice
Start int
End int
-
- // Items are basically sorted by the lengths of matched substrings.
- // But we slightly adjust the score with bonus for better results.
- Bonus int
+ Score int
}
+const (
+ scoreMatch = 16
+ scoreGapStart = -3
+ scoreGapExtention = -1
+
+ // We prefer matches at the beginning of a word, but the bonus should not be
+ // too great to prevent the longer acronym matches from always winning over
+ // shorter fuzzy matches. The bonus point here was specifically chosen that
+ // the bonus is cancelled when the gap between the acronyms grows over
+ // 8 characters, which is approximately the average length of the words found
+ // in web2 dictionary and my file system.
+ bonusBoundary = scoreMatch / 2
+
+ // Although bonus point for non-word characters is non-contextual, we need it
+ // for computing bonus points for consecutive chunks starting with a non-word
+ // character.
+ bonusNonWord = scoreMatch / 2
+
+ // Edge-triggered bonus for matches in camelCase words.
+ // Compared to word-boundary case, they don't accompany single-character gaps
+ // (e.g. FooBar vs. foo-bar), so we deduct bonus point accordingly.
+ bonusCamel123 = bonusBoundary + scoreGapExtention
+
+ // Minimum bonus point given to characters in consecutive chunks.
+ // Note that bonus points for consecutive matches shouldn't have needed if we
+ // used fixed match score as in the original algorithm.
+ bonusConsecutive = -(scoreGapStart + scoreGapExtention)
+
+ // The first character in the typed pattern usually has more significance
+ // than the rest so it's important that it appears at special positions where
+ // bonus points are given. e.g. "to-go" vs. "ongoing" on "og" or on "ogo".
+ // The amount of the extra bonus should be limited so that the gap penalty is
+ // still respected.
+ bonusFirstCharMultiplier = 2
+)
+
type charClass int
const (
@@ -42,85 +148,350 @@ const (
charNumber
)
-func evaluateBonus(caseSensitive bool, text util.Chars, pattern []rune, sidx int, eidx int) int {
- var bonus int
- pidx := 0
- lenPattern := len(pattern)
- consecutive := false
- prevClass := charNonWord
- for index := util.Max(0, sidx-1); index < eidx; index++ {
- char := text.Get(index)
+func posArray(withPos bool, len int) *[]int {
+ if withPos {
+ pos := make([]int, 0, len)
+ return &pos
+ }
+ return nil
+}
+
+func alloc16(offset int, slab *util.Slab, size int, clear bool) (int, []int16) {
+ if slab != nil && cap(slab.I16) > offset+size {
+ slice := slab.I16[offset : offset+size]
+ if clear {
+ for idx := range slice {
+ slice[idx] = 0
+ }
+ }
+ return offset + size, slice
+ }
+ return offset, make([]int16, size)
+}
+
+func alloc32(offset int, slab *util.Slab, size int, clear bool) (int, []int32) {
+ if slab != nil && cap(slab.I32) > offset+size {
+ slice := slab.I32[offset : offset+size]
+ if clear {
+ for idx := range slice {
+ slice[idx] = 0
+ }
+ }
+ return offset + size, slice
+ }
+ return offset, make([]int32, size)
+}
+
+func charClassOfAscii(char rune) charClass {
+ if char >= 'a' && char <= 'z' {
+ return charLower
+ } else if char >= 'A' && char <= 'Z' {
+ return charUpper
+ } else if char >= '0' && char <= '9' {
+ return charNumber
+ }
+ return charNonWord
+}
+
+func charClassOfNonAscii(char rune) charClass {
+ if unicode.IsLower(char) {
+ return charLower
+ } else if unicode.IsUpper(char) {
+ return charUpper
+ } else if unicode.IsNumber(char) {
+ return charNumber
+ } else if unicode.IsLetter(char) {
+ return charLetter
+ }
+ return charNonWord
+}
+
+func charClassOf(char rune) charClass {
+ if char <= unicode.MaxASCII {
+ return charClassOfAscii(char)
+ }
+ return charClassOfNonAscii(char)
+}
+
+func bonusFor(prevClass charClass, class charClass) int16 {
+ if prevClass == charNonWord && class != charNonWord {
+ // Word boundary
+ return bonusBoundary
+ } else if prevClass == charLower && class == charUpper ||
+ prevClass != charNumber && class == charNumber {
+ // camelCase letter123
+ return bonusCamel123
+ } else if class == charNonWord {
+ return bonusNonWord
+ }
+ return 0
+}
+
+func bonusAt(input util.Chars, idx int) int16 {
+ if idx == 0 {
+ return bonusBoundary
+ }
+ return bonusFor(charClassOf(input.Get(idx-1)), charClassOf(input.Get(idx)))
+}
+
+type Algo func(caseSensitive bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int)
+
+func FuzzyMatchV2(caseSensitive bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
+ // Assume that pattern is given in lowercase if case-insensitive.
+ // First check if there's a match and calculate bonus for each position.
+ // If the input string is too long, consider finding the matching chars in
+ // this phase as well (non-optimal alignment).
+ N := input.Length()
+ M := len(pattern)
+ switch M {
+ case 0:
+ return Result{0, 0, 0}, posArray(withPos, M)
+ case 1:
+ return ExactMatchNaive(caseSensitive, forward, input, pattern[0:1], withPos, slab)
+ }
+
+ // Since O(nm) algorithm can be prohibitively expensive for large input,
+ // we fall back to the greedy algorithm.
+ if slab != nil && N*M > cap(slab.I16) {
+ return FuzzyMatchV1(caseSensitive, forward, input, pattern, withPos, slab)
+ }
+
+ // Reuse pre-allocated integer slice to avoid unnecessary sweeping of garbages
+ offset := 0
+ // Bonus point for each position
+ offset, B := alloc16(offset, slab, N, false)
+ // The first occurrence of each character in the pattern
+ offset, F := alloc16(offset, slab, M, false)
+ // Rune array
+ _, T := alloc32(0, slab, N, false)
+
+ // Phase 1. Check if there's a match and calculate bonus for each point
+ pidx, lastIdx, prevClass := 0, 0, charNonWord
+ for idx := 0; idx < N; idx++ {
+ char := input.Get(idx)
var class charClass
- if unicode.IsLower(char) {
- class = charLower
- } else if unicode.IsUpper(char) {
- class = charUpper
- } else if unicode.IsLetter(char) {
- class = charLetter
- } else if unicode.IsNumber(char) {
- class = charNumber
+ if char <= unicode.MaxASCII {
+ class = charClassOfAscii(char)
} else {
- class = charNonWord
+ class = charClassOfNonAscii(char)
}
- var point int
- if prevClass == charNonWord && class != charNonWord {
- // Word boundary
- point = 2
- } else if prevClass == charLower && class == charUpper ||
- prevClass != charNumber && class == charNumber {
- // camelCase letter123
- point = 1
+ if !caseSensitive && class == charUpper {
+ if char <= unicode.MaxASCII {
+ char += 32
+ } else {
+ char = unicode.To(unicode.LowerCase, char)
+ }
}
+
+ T[idx] = char
+ B[idx] = bonusFor(prevClass, class)
prevClass = class
- if index >= sidx {
- if !caseSensitive {
- if char >= 'A' && char <= 'Z' {
- char += 32
- } else if char > unicode.MaxASCII {
- char = unicode.To(unicode.LowerCase, char)
+ if pidx < M {
+ if char == pattern[pidx] {
+ lastIdx = idx
+ F[pidx] = int16(idx)
+ pidx++
+ }
+ } else {
+ if char == pattern[M-1] {
+ lastIdx = idx
+ }
+ }
+ }
+ if pidx != M {
+ return Result{-1, -1, 0}, nil
+ }
+
+ // Phase 2. Fill in score matrix (H)
+ // Unlike the original algorithm, we do not allow omission.
+ width := lastIdx - int(F[0]) + 1
+ offset, H := alloc16(offset, slab, width*M, false)
+
+ // Possible length of consecutive chunk at each position.
+ offset, C := alloc16(offset, slab, width*M, false)
+
+ maxScore, maxScorePos := int16(0), 0
+ for i := 0; i < M; i++ {
+ I := i * width
+ inGap := false
+ for j := int(F[i]); j <= lastIdx; j++ {
+ j0 := j - int(F[0])
+ var s1, s2, consecutive int16
+
+ if j > int(F[i]) {
+ if inGap {
+ s2 = H[I+j0-1] + scoreGapExtention
+ } else {
+ s2 = H[I+j0-1] + scoreGapStart
}
}
- pchar := pattern[pidx]
- if pchar == char {
- // Boost bonus for the first character in the pattern
- if pidx == 0 {
- point *= 2
+
+ if pattern[i] == T[j] {
+ var diag int16
+ if i > 0 && j0 > 0 {
+ diag = H[I-width+j0-1]
}
- // Bonus to consecutive matching chars
- if consecutive {
- point++
+ s1 = diag + scoreMatch
+ b := B[j]
+ if i > 0 {
+ // j > 0 if i > 0
+ consecutive = C[I-width+j0-1] + 1
+ // Break consecutive chunk
+ if b == bonusBoundary {
+ consecutive = 1
+ } else if consecutive > 1 {
+ b = util.Max16(b, util.Max16(bonusConsecutive, B[j-int(consecutive)+1]))
+ }
+ } else {
+ consecutive = 1
+ b *= bonusFirstCharMultiplier
}
- bonus += point
+ if s1+b < s2 {
+ s1 += B[j]
+ consecutive = 0
+ } else {
+ s1 += b
+ }
+ }
+ C[I+j0] = consecutive
- if pidx++; pidx == lenPattern {
+ inGap = s1 < s2
+ score := util.Max16(util.Max16(s1, s2), 0)
+ if i == M-1 && (forward && score > maxScore || !forward && score >= maxScore) {
+ maxScore, maxScorePos = score, j
+ }
+ H[I+j0] = score
+ }
+
+ if DEBUG {
+ if i == 0 {
+ fmt.Print(" ")
+ for j := int(F[i]); j <= lastIdx; j++ {
+ fmt.Printf(" " + string(input.Get(j)) + " ")
+ }
+ fmt.Println()
+ }
+ fmt.Print(string(pattern[i]) + " ")
+ for idx := int(F[0]); idx < int(F[i]); idx++ {
+ fmt.Print(" 0 ")
+ }
+ for idx := int(F[i]); idx <= lastIdx; idx++ {
+ fmt.Printf("%2d ", H[i*width+idx-int(F[0])])
+ }
+ fmt.Println()
+
+ fmt.Print(" ")
+ for idx, p := range C[I : I+width] {
+ if idx+int(F[0]) < int(F[i]) {
+ p = 0
+ }
+ fmt.Printf("%2d ", p)
+ }
+ fmt.Println()
+ }
+ }
+
+ // Phase 3. (Optional) Backtrace to find character positions
+ pos := posArray(withPos, M)
+ j := int(F[0])
+ if withPos {
+ i := M - 1
+ j = maxScorePos
+ preferMatch := true
+ for {
+ I := i * width
+ j0 := j - int(F[0])
+ s := H[I+j0]
+
+ var s1, s2 int16
+ if i > 0 && j >= int(F[i]) {
+ s1 = H[I-width+j0-1]
+ }
+ if j > int(F[i]) {
+ s2 = H[I+j0-1]
+ }
+
+ if s > s1 && (s > s2 || s == s2 && preferMatch) {
+ *pos = append(*pos, j)
+ if i == 0 {
break
}
- consecutive = true
+ i--
+ }
+ preferMatch = C[I+j0] > 1 || I+width+j0+1 < len(C) && C[I+width+j0+1] > 0
+ j--
+ }
+ }
+ // Start offset we return here is only relevant when begin tiebreak is used.
+ // However finding the accurate offset requires backtracking, and we don't
+ // want to pay extra cost for the option that has lost its importance.
+ return Result{j, maxScorePos + 1, int(maxScore)}, pos
+}
+
+// Implement the same sorting criteria as V2
+func calculateScore(caseSensitive bool, text util.Chars, pattern []rune, sidx int, eidx int, withPos bool) (int, *[]int) {
+ pidx, score, inGap, consecutive, firstBonus := 0, 0, false, 0, int16(0)
+ pos := posArray(withPos, len(pattern))
+ prevClass := charNonWord
+ if sidx > 0 {
+ prevClass = charClassOf(text.Get(sidx - 1))
+ }
+ for idx := sidx; idx < eidx; idx++ {
+ char := text.Get(idx)
+ class := charClassOf(char)
+ if !caseSensitive {
+ if char >= 'A' && char <= 'Z' {
+ char += 32
+ } else if char > unicode.MaxASCII {
+ char = unicode.To(unicode.LowerCase, char)
+ }
+ }
+ if char == pattern[pidx] {
+ if withPos {
+ *pos = append(*pos, idx)
+ }
+ score += scoreMatch
+ bonus := bonusFor(prevClass, class)
+ if consecutive == 0 {
+ firstBonus = bonus
} else {
- consecutive = false
+ // Break consecutive chunk
+ if bonus == bonusBoundary {
+ firstBonus = bonus
+ }
+ bonus = util.Max16(util.Max16(bonus, firstBonus), bonusConsecutive)
}
+ if pidx == 0 {
+ score += int(bonus * bonusFirstCharMultiplier)
+ } else {
+ score += int(bonus)
+ }
+ inGap = false
+ consecutive++
+ pidx++
+ } else {
+ if inGap {
+ score += scoreGapExtention
+ } else {
+ score += scoreGapStart
+ }
+ inGap = true
+ consecutive = 0
+ firstBonus = 0
}
+ prevClass = class
}
- return bonus
+ return score, pos
}
-// FuzzyMatch performs fuzzy-match
-func FuzzyMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result {
+// FuzzyMatchV1 performs fuzzy-match
+func FuzzyMatchV1(caseSensitive bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
if len(pattern) == 0 {
- return Result{0, 0, 0}
- }
-
- // 0. (FIXME) How to find the shortest match?
- // a_____b__c__abc
- // ^^^^^^^^^^ ^^^
- // 1. forward scan (abc)
- // *-----*-----*>
- // a_____b___abc__
- // 2. reverse scan (cba)
- // a_____b___abc__
- // <***
+ return Result{0, 0, 0}, nil
+ }
+
pidx := 0
sidx := -1
eidx := -1
@@ -157,7 +528,8 @@ func FuzzyMatch(caseSensitive bool, forward bool, text util.Chars, pattern []run
if sidx >= 0 && eidx >= 0 {
pidx--
for index := eidx - 1; index >= sidx; index-- {
- char := text.Get(indexAt(index, lenRunes, forward))
+ tidx := indexAt(index, lenRunes, forward)
+ char := text.Get(tidx)
if !caseSensitive {
if char >= 'A' && char <= 'Z' {
char += 32
@@ -166,7 +538,8 @@ func FuzzyMatch(caseSensitive bool, forward bool, text util.Chars, pattern []run
}
}
- pchar := pattern[indexAt(pidx, lenPattern, forward)]
+ pidx_ := indexAt(pidx, lenPattern, forward)
+ pchar := pattern[pidx_]
if char == pchar {
if pidx--; pidx < 0 {
sidx = index
@@ -175,16 +548,14 @@ func FuzzyMatch(caseSensitive bool, forward bool, text util.Chars, pattern []run
}
}
- // Calculate the bonus. This can't be done at the same time as the
- // pattern scan above because 'forward' may be false.
if !forward {
sidx, eidx = lenRunes-eidx, lenRunes-sidx
}
- return Result{sidx, eidx,
- evaluateBonus(caseSensitive, text, pattern, sidx, eidx)}
+ score, pos := calculateScore(caseSensitive, text, pattern, sidx, eidx, withPos)
+ return Result{sidx, eidx, score}, pos
}
- return Result{-1, -1, 0}
+ return Result{-1, -1, 0}, nil
}
// ExactMatchNaive is a basic string searching algorithm that handles case
@@ -192,23 +563,28 @@ func FuzzyMatch(caseSensitive bool, forward bool, text util.Chars, pattern []run
// of strings.ToLower + strings.Index for typical fzf use cases where input
// strings and patterns are not very long.
//
-// We might try to implement better algorithms in the future:
-// http://en.wikipedia.org/wiki/String_searching_algorithm
-func ExactMatchNaive(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result {
+// Since 0.15.0, this function searches for the match with the highest
+// bonus point, instead of stopping immediately after finding the first match.
+// The solution is much cheaper since there is only one possible alignment of
+// the pattern.
+func ExactMatchNaive(caseSensitive bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
if len(pattern) == 0 {
- return Result{0, 0, 0}
+ return Result{0, 0, 0}, nil
}
lenRunes := text.Length()
lenPattern := len(pattern)
if lenRunes < lenPattern {
- return Result{-1, -1, 0}
+ return Result{-1, -1, 0}, nil
}
+ // For simplicity, only look at the bonus at the first character position
pidx := 0
+ bestPos, bonus, bestBonus := -1, int16(0), int16(-1)
for index := 0; index < lenRunes; index++ {
- char := text.Get(indexAt(index, lenRunes, forward))
+ index_ := indexAt(index, lenRunes, forward)
+ char := text.Get(index_)
if !caseSensitive {
if char >= 'A' && char <= 'Z' {
char += 32
@@ -216,33 +592,51 @@ func ExactMatchNaive(caseSensitive bool, forward bool, text util.Chars, pattern
char = unicode.To(unicode.LowerCase, char)
}
}
- pchar := pattern[indexAt(pidx, lenPattern, forward)]
+ pidx_ := indexAt(pidx, lenPattern, forward)
+ pchar := pattern[pidx_]
if pchar == char {
+ if pidx_ == 0 {
+ bonus = bonusAt(text, index_)
+ }
pidx++
if pidx == lenPattern {
- var sidx, eidx int
- if forward {
- sidx = index - lenPattern + 1
- eidx = index + 1
- } else {
- sidx = lenRunes - (index + 1)
- eidx = lenRunes - (index - lenPattern + 1)
+ if bonus > bestBonus {
+ bestPos, bestBonus = index, bonus
+ }
+ if bonus == bonusBoundary {
+ break
}
- return Result{sidx, eidx,
- evaluateBonus(caseSensitive, text, pattern, sidx, eidx)}
+ index -= pidx - 1
+ pidx, bonus = 0, 0
}
} else {
index -= pidx
- pidx = 0
+ pidx, bonus = 0, 0
+ }
+ }
+ if bestPos >= 0 {
+ var sidx, eidx int
+ if forward {
+ sidx = bestPos - lenPattern + 1
+ eidx = bestPos + 1
+ } else {
+ sidx = lenRunes - (bestPos + 1)
+ eidx = lenRunes - (bestPos - lenPattern + 1)
}
+ score, _ := calculateScore(caseSensitive, text, pattern, sidx, eidx, false)
+ return Result{sidx, eidx, score}, nil
}
- return Result{-1, -1, 0}
+ return Result{-1, -1, 0}, nil
}
// PrefixMatch performs prefix-match
-func PrefixMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result {
+func PrefixMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
+ if len(pattern) == 0 {
+ return Result{0, 0, 0}, nil
+ }
+
if text.Length() < len(pattern) {
- return Result{-1, -1, 0}
+ return Result{-1, -1, 0}, nil
}
for index, r := range pattern {
@@ -251,20 +645,24 @@ func PrefixMatch(caseSensitive bool, forward bool, text util.Chars, pattern []ru
char = unicode.ToLower(char)
}
if char != r {
- return Result{-1, -1, 0}
+ return Result{-1, -1, 0}, nil
}
}
lenPattern := len(pattern)
- return Result{0, lenPattern,
- evaluateBonus(caseSensitive, text, pattern, 0, lenPattern)}
+ score, _ := calculateScore(caseSensitive, text, pattern, 0, lenPattern, false)
+ return Result{0, lenPattern, score}, nil
}
// SuffixMatch performs suffix-match
-func SuffixMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result {
- trimmedLen := text.Length() - text.TrailingWhitespaces()
+func SuffixMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
+ lenRunes := text.Length()
+ trimmedLen := lenRunes - text.TrailingWhitespaces()
+ if len(pattern) == 0 {
+ return Result{trimmedLen, trimmedLen, 0}, nil
+ }
diff := trimmedLen - len(pattern)
if diff < 0 {
- return Result{-1, -1, 0}
+ return Result{-1, -1, 0}, nil
}
for index, r := range pattern {
@@ -273,28 +671,29 @@ func SuffixMatch(caseSensitive bool, forward bool, text util.Chars, pattern []ru
char = unicode.ToLower(char)
}
if char != r {
- return Result{-1, -1, 0}
+ return Result{-1, -1, 0}, nil
}
}
lenPattern := len(pattern)
sidx := trimmedLen - lenPattern
eidx := trimmedLen
- return Result{sidx, eidx,
- evaluateBonus(caseSensitive, text, pattern, sidx, eidx)}
+ score, _ := calculateScore(caseSensitive, text, pattern, sidx, eidx, false)
+ return Result{sidx, eidx, score}, nil
}
// EqualMatch performs equal-match
-func EqualMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result {
- // Note: EqualMatch always return a zero bonus.
- if text.Length() != len(pattern) {
- return Result{-1, -1, 0}
+func EqualMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
+ lenPattern := len(pattern)
+ if text.Length() != lenPattern {
+ return Result{-1, -1, 0}, nil
}
runesStr := text.ToString()
if !caseSensitive {
runesStr = strings.ToLower(runesStr)
}
if runesStr == string(pattern) {
- return Result{0, len(pattern), 0}
+ return Result{0, lenPattern, (scoreMatch+bonusBoundary)*lenPattern +
+ (bonusFirstCharMultiplier-1)*bonusBoundary}, nil
}
- return Result{-1, -1, 0}
+ return Result{-1, -1, 0}, nil
}
diff --git a/src/algo/algo_test.go b/src/algo/algo_test.go
index 7034dcef..7317eb18 100644
--- a/src/algo/algo_test.go
+++ b/src/algo/algo_test.go
@@ -1,95 +1,154 @@
package algo
import (
+ "sort"
"strings"
"testing"
"github.com/junegunn/fzf/src/util"
)
-func assertMatch(t *testing.T, fun func(bool, bool, util.Chars, []rune) Result, caseSensitive, forward bool, input, pattern string, sidx int, eidx int, bonus int) {
+func assertMatch(t *testing.T, fun Algo, caseSensitive, forward bool, input, pattern string, sidx int, eidx int, score int) {
if !caseSensitive {
pattern = strings.ToLower(pattern)
}
- res := fun(caseSensitive, forward, util.RunesToChars([]rune(input)), []rune(pattern))
- if res.Start != sidx {
- t.Errorf("Invalid start index: %d (expected: %d, %s / %s)", res.Start, sidx, input, pattern)
+ res, pos := fun(caseSensitive, forward, util.RunesToChars([]rune(input)), []rune(pattern), true, nil)
+ var start, end int
+ if pos == nil || len(*pos) == 0 {
+ start = res.Start
+ end = res.End
+ } else {
+ sort.Ints(*pos)
+ start = (*pos)[0]
+ end = (*pos)[len(*pos)-1] + 1
}
- if res.End != eidx {
- t.Errorf("Invalid end index: %d (expected: %d, %s / %s)", res.End, eidx, input, pattern)
+ if start != sidx {
+ t.Errorf("Invalid start index: %d (expected: %d, %s / %s)", start, sidx, input, pattern)
}
- if res.Bonus != bonus {
- t.Errorf("Invalid bonus: %d (expected: %d, %s / %s)", res.Bonus, bonus, input, pattern)
+ if end != eidx {
+ t.Errorf("Invalid end index: %d (expected: %d, %s / %s)", end, eidx, input, pattern)
+ }
+ if res.Score != score {
+ t.Errorf("Invalid score: %d (expected: %d, %s / %s)", res.Score, score, input, pattern)
}
}
func TestFuzzyMatch(t *testing.T) {
- assertMatch(t, FuzzyMatch, false, true, "fooBarbaz", "oBZ", 2, 9, 2)
- assertMatch(t, FuzzyMatch, false, true, "foo bar baz", "fbb", 0, 9, 8)
- assertMatch(t, FuzzyMatch, false, true, "/AutomatorDocument.icns", "rdoc", 9, 13, 4)
- assertMatch(t, FuzzyMatch, false, true, "/man1/zshcompctl.1", "zshc", 6, 10, 7)
- assertMatch(t, FuzzyMatch, false, true, "/.oh-my-zsh/cache", "zshc", 8, 13, 8)
- assertMatch(t, FuzzyMatch, false, true, "ab0123 456", "12356", 3, 10, 3)
- assertMatch(t, FuzzyMatch, false, true, "abc123 456", "12356", 3, 10, 5)
-
- assertMatch(t, FuzzyMatch, false, true, "foo/bar/baz", "fbb", 0, 9, 8)
- assertMatch(t, FuzzyMatch, false, true, "fooBarBaz", "fbb", 0, 7, 6)
- assertMatch(t, FuzzyMatch, false, true, "foo barbaz", "fbb", 0, 8, 6)
- assertMatch(t, FuzzyMatch, false, true, "fooBar Baz", "foob", 0, 4, 8)
- assertMatch(t, FuzzyMatch, true, true, "fooBarbaz", "oBZ", -1, -1, 0)
- assertMatch(t, FuzzyMatch, true, true, "fooBarbaz", "oBz", 2, 9, 2)
- assertMatch(t, FuzzyMatch, true, true, "Foo Bar Baz", "fbb", -1, -1, 0)