diff options
author | Junegunn Choi <junegunn.c@gmail.com> | 2016-09-07 09:58:18 +0900 |
---|---|---|
committer | Junegunn Choi <junegunn.c@gmail.com> | 2016-09-18 14:34:46 +0900 |
commit | 2fc7c18747250ebf8adf68d2057ec22af6976f29 (patch) | |
tree | aab013c4492a82dd613866a35b98fc9de42f533d /src | |
parent | 8ef2420677abf5cca27b47bead6e70e42220c7aa (diff) |
Revise ranking algorithm
Diffstat (limited to 'src')
-rw-r--r-- | src/README.md | 17 | ||||
-rw-r--r-- | src/algo/algo.go | 621 | ||||
-rw-r--r-- | src/algo/algo_test.go | 145 | ||||
-rw-r--r-- | src/chunklist_test.go | 2 | ||||
-rw-r--r-- | src/constants.go | 10 | ||||
-rw-r--r-- | src/core.go | 7 | ||||
-rw-r--r-- | src/matcher.go | 14 | ||||
-rw-r--r-- | src/options.go | 29 | ||||
-rw-r--r-- | src/pattern.go | 83 | ||||
-rw-r--r-- | src/pattern_test.go | 61 | ||||
-rw-r--r-- | src/result.go | 70 | ||||
-rw-r--r-- | src/result_test.go | 26 | ||||
-rw-r--r-- | src/terminal.go | 23 | ||||
-rw-r--r-- | src/util/slab.go | 12 | ||||
-rw-r--r-- | src/util/util.go | 24 |
15 files changed, 855 insertions, 289 deletions
diff --git a/src/README.md b/src/README.md index ced4296b..272c7554 100644 --- a/src/README.md +++ b/src/README.md @@ -61,6 +61,23 @@ make install make linux ``` +Test +---- + +Unit tests can be run with `make test`. Integration tests are written in Ruby +script that should be run on tmux. + +```sh +# Unit tests +make test + +# Install the executable to ../bin directory +make install + +# Integration tests +ruby ../test/test_go.rb +``` + Third-party libraries used -------------------------- diff --git a/src/algo/algo.go b/src/algo/algo.go index 89723964..622c9607 100644 --- a/src/algo/algo.go +++ b/src/algo/algo.go @@ -1,19 +1,91 @@ package algo +/* + +Algorithm +--------- + +FuzzyMatchV1 finds the first "fuzzy" occurrence of the pattern within the given +text in O(n) time where n is the length of the text. Once the position of the +last character is located, it traverses backwards to see if there's a shorter +substring that matches the pattern. + + a_____b___abc__ To find "abc" + *-----*-----*> 1. Forward scan + <*** 2. Backward scan + +The algorithm is simple and fast, but as it only sees the first occurrence, +it is not guaranteed to find the occurrence with the highest score. + + a_____b__c__abc + *-----*--* *** + +FuzzyMatchV2 implements a modified version of Smith-Waterman algorithm to find +the optimal solution (highest score) according to the scoring criteria. Unlike +the original algorithm, omission or mismatch of a character in the pattern is +not allowed. + +Performance +----------- + +The new V2 algorithm is slower than V1 as it examines all occurrences of the +pattern instead of stopping immediately after finding the first one. The time +complexity of the algorithm is O(nm) if a match is found and O(n) otherwise +where n is the length of the item and m is the length of the pattern. Thus, the +performance overhead may not be noticeable for a query with high selectivity. +However, if the performance is more important than the quality of the result, +you can still choose v1 algorithm with --algo=v1. + +Scoring criteria +---------------- + +- We prefer matches at special positions, such as the start of a word, or + uppercase character in camelCase words. + +- That is, we prefer an occurrence of the pattern with more characters + matching at special positions, even if the total match length is longer. + e.g. "fuzzyfinder" vs. "fuzzy-finder" on "ff" + ```````````` +- Also, if the first character in the pattern appears at one of the special + positions, the bonus point for the position is multiplied by a constant + as it is extremely likely that the first character in the typed pattern + has more significance than the rest. + e.g. "fo-bar" vs. "foob-r" on "br" + `````` +- But since fzf is still a fuzzy finder, not an acronym finder, we should also + consider the total length of the matched substring. This is why we have the + gap penalty. The gap penalty increases as the length of the gap (distance + between the matching characters) increases, so the effect of the bonus is + eventually cancelled at some point. + e.g. "fuzzyfinder" vs. "fuzzy-blurry-finder" on "ff" + ``````````` +- Consequently, it is crucial to find the right balance between the bonus + and the gap penalty. The parameters were chosen that the bonus is cancelled + when the gap size increases beyond 8 characters. + +- The bonus mechanism can have the undesirable side effect where consecutive + matches are ranked lower than the ones with gaps. + e.g. "foobar" vs. "foo-bar" on "foob" + ``````` +- To correct this anomaly, we also give extra bonus point to each character + in a consecutive matching chunk. + e.g. "foobar" vs. "foo-bar" on "foob" + `````` +- The amount of consecutive bonus is primarily determined by the bonus of the + first character in the chunk. + e.g. "foobar" vs. "out-of-bound" on "oob" + ```````````` +*/ + import ( + "fmt" "strings" "unicode" "github.com/junegunn/fzf/src/util" ) -/* - * String matching algorithms here do not use strings.ToLower to avoid - * performance penalty. And they assume pattern runes are given in lowercase - * letters when caseSensitive is false. - * - * In short: They try to do as little work as possible. - */ +var DEBUG bool func indexAt(index int, max int, forward bool) int { if forward { @@ -24,14 +96,48 @@ func indexAt(index int, max int, forward bool) int { // Result contains the results of running a match function. type Result struct { + // TODO int32 should suffice Start int End int - - // Items are basically sorted by the lengths of matched substrings. - // But we slightly adjust the score with bonus for better results. - Bonus int + Score int } +const ( + scoreMatch = 16 + scoreGapStart = -3 + scoreGapExtention = -1 + + // We prefer matches at the beginning of a word, but the bonus should not be + // too great to prevent the longer acronym matches from always winning over + // shorter fuzzy matches. The bonus point here was specifically chosen that + // the bonus is cancelled when the gap between the acronyms grows over + // 8 characters, which is approximately the average length of the words found + // in web2 dictionary and my file system. + bonusBoundary = scoreMatch / 2 + + // Although bonus point for non-word characters is non-contextual, we need it + // for computing bonus points for consecutive chunks starting with a non-word + // character. + bonusNonWord = scoreMatch / 2 + + // Edge-triggered bonus for matches in camelCase words. + // Compared to word-boundary case, they don't accompany single-character gaps + // (e.g. FooBar vs. foo-bar), so we deduct bonus point accordingly. + bonusCamel123 = bonusBoundary + scoreGapExtention + + // Minimum bonus point given to characters in consecutive chunks. + // Note that bonus points for consecutive matches shouldn't have needed if we + // used fixed match score as in the original algorithm. + bonusConsecutive = -(scoreGapStart + scoreGapExtention) + + // The first character in the typed pattern usually has more significance + // than the rest so it's important that it appears at special positions where + // bonus points are given. e.g. "to-go" vs. "ongoing" on "og" or on "ogo". + // The amount of the extra bonus should be limited so that the gap penalty is + // still respected. + bonusFirstCharMultiplier = 2 +) + type charClass int const ( @@ -42,85 +148,350 @@ const ( charNumber ) -func evaluateBonus(caseSensitive bool, text util.Chars, pattern []rune, sidx int, eidx int) int { - var bonus int - pidx := 0 - lenPattern := len(pattern) - consecutive := false - prevClass := charNonWord - for index := util.Max(0, sidx-1); index < eidx; index++ { - char := text.Get(index) +func posArray(withPos bool, len int) *[]int { + if withPos { + pos := make([]int, 0, len) + return &pos + } + return nil +} + +func alloc16(offset int, slab *util.Slab, size int, clear bool) (int, []int16) { + if slab != nil && cap(slab.I16) > offset+size { + slice := slab.I16[offset : offset+size] + if clear { + for idx := range slice { + slice[idx] = 0 + } + } + return offset + size, slice + } + return offset, make([]int16, size) +} + +func alloc32(offset int, slab *util.Slab, size int, clear bool) (int, []int32) { + if slab != nil && cap(slab.I32) > offset+size { + slice := slab.I32[offset : offset+size] + if clear { + for idx := range slice { + slice[idx] = 0 + } + } + return offset + size, slice + } + return offset, make([]int32, size) +} + +func charClassOfAscii(char rune) charClass { + if char >= 'a' && char <= 'z' { + return charLower + } else if char >= 'A' && char <= 'Z' { + return charUpper + } else if char >= '0' && char <= '9' { + return charNumber + } + return charNonWord +} + +func charClassOfNonAscii(char rune) charClass { + if unicode.IsLower(char) { + return charLower + } else if unicode.IsUpper(char) { + return charUpper + } else if unicode.IsNumber(char) { + return charNumber + } else if unicode.IsLetter(char) { + return charLetter + } + return charNonWord +} + +func charClassOf(char rune) charClass { + if char <= unicode.MaxASCII { + return charClassOfAscii(char) + } + return charClassOfNonAscii(char) +} + +func bonusFor(prevClass charClass, class charClass) int16 { + if prevClass == charNonWord && class != charNonWord { + // Word boundary + return bonusBoundary + } else if prevClass == charLower && class == charUpper || + prevClass != charNumber && class == charNumber { + // camelCase letter123 + return bonusCamel123 + } else if class == charNonWord { + return bonusNonWord + } + return 0 +} + +func bonusAt(input util.Chars, idx int) int16 { + if idx == 0 { + return bonusBoundary + } + return bonusFor(charClassOf(input.Get(idx-1)), charClassOf(input.Get(idx))) +} + +type Algo func(caseSensitive bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) + +func FuzzyMatchV2(caseSensitive bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { + // Assume that pattern is given in lowercase if case-insensitive. + // First check if there's a match and calculate bonus for each position. + // If the input string is too long, consider finding the matching chars in + // this phase as well (non-optimal alignment). + N := input.Length() + M := len(pattern) + switch M { + case 0: + return Result{0, 0, 0}, posArray(withPos, M) + case 1: + return ExactMatchNaive(caseSensitive, forward, input, pattern[0:1], withPos, slab) + } + + // Since O(nm) algorithm can be prohibitively expensive for large input, + // we fall back to the greedy algorithm. + if slab != nil && N*M > cap(slab.I16) { + return FuzzyMatchV1(caseSensitive, forward, input, pattern, withPos, slab) + } + + // Reuse pre-allocated integer slice to avoid unnecessary sweeping of garbages + offset := 0 + // Bonus point for each position + offset, B := alloc16(offset, slab, N, false) + // The first occurrence of each character in the pattern + offset, F := alloc16(offset, slab, M, false) + // Rune array + _, T := alloc32(0, slab, N, false) + + // Phase 1. Check if there's a match and calculate bonus for each point + pidx, lastIdx, prevClass := 0, 0, charNonWord + for idx := 0; idx < N; idx++ { + char := input.Get(idx) var class charClass - if unicode.IsLower(char) { - class = charLower - } else if unicode.IsUpper(char) { - class = charUpper - } else if unicode.IsLetter(char) { - class = charLetter - } else if unicode.IsNumber(char) { - class = charNumber + if char <= unicode.MaxASCII { + class = charClassOfAscii(char) } else { - class = charNonWord + class = charClassOfNonAscii(char) } - var point int - if prevClass == charNonWord && class != charNonWord { - // Word boundary - point = 2 - } else if prevClass == charLower && class == charUpper || - prevClass != charNumber && class == charNumber { - // camelCase letter123 - point = 1 + if !caseSensitive && class == charUpper { + if char <= unicode.MaxASCII { + char += 32 + } else { + char = unicode.To(unicode.LowerCase, char) + } } + + T[idx] = char + B[idx] = bonusFor(prevClass, class) prevClass = class - if index >= sidx { - if !caseSensitive { - if char >= 'A' && char <= 'Z' { - char += 32 - } else if char > unicode.MaxASCII { - char = unicode.To(unicode.LowerCase, char) + if pidx < M { + if char == pattern[pidx] { + lastIdx = idx + F[pidx] = int16(idx) + pidx++ + } + } else { + if char == pattern[M-1] { + lastIdx = idx + } + } + } + if pidx != M { + return Result{-1, -1, 0}, nil + } + + // Phase 2. Fill in score matrix (H) + // Unlike the original algorithm, we do not allow omission. + width := lastIdx - int(F[0]) + 1 + offset, H := alloc16(offset, slab, width*M, false) + + // Possible length of consecutive chunk at each position. + offset, C := alloc16(offset, slab, width*M, false) + + maxScore, maxScorePos := int16(0), 0 + for i := 0; i < M; i++ { + I := i * width + inGap := false + for j := int(F[i]); j <= lastIdx; j++ { + j0 := j - int(F[0]) + var s1, s2, consecutive int16 + + if j > int(F[i]) { + if inGap { + s2 = H[I+j0-1] + scoreGapExtention + } else { + s2 = H[I+j0-1] + scoreGapStart } } - pchar := pattern[pidx] - if pchar == char { - // Boost bonus for the first character in the pattern - if pidx == 0 { - point *= 2 + + if pattern[i] == T[j] { + var diag int16 + if i > 0 && j0 > 0 { + diag = H[I-width+j0-1] } - // Bonus to consecutive matching chars - if consecutive { - point++ + s1 = diag + scoreMatch + b := B[j] + if i > 0 { + // j > 0 if i > 0 + consecutive = C[I-width+j0-1] + 1 + // Break consecutive chunk + if b == bonusBoundary { + consecutive = 1 + } else if consecutive > 1 { + b = util.Max16(b, util.Max16(bonusConsecutive, B[j-int(consecutive)+1])) + } + } else { + consecutive = 1 + b *= bonusFirstCharMultiplier } - bonus += point + if s1+b < s2 { + s1 += B[j] + consecutive = 0 + } else { + s1 += b + } + } + C[I+j0] = consecutive - if pidx++; pidx == lenPattern { + inGap = s1 < s2 + score := util.Max16(util.Max16(s1, s2), 0) + if i == M-1 && (forward && score > maxScore || !forward && score >= maxScore) { + maxScore, maxScorePos = score, j + } + H[I+j0] = score + } + + if DEBUG { + if i == 0 { + fmt.Print(" ") + for j := int(F[i]); j <= lastIdx; j++ { + fmt.Printf(" " + string(input.Get(j)) + " ") + } + fmt.Println() + } + fmt.Print(string(pattern[i]) + " ") + for idx := int(F[0]); idx < int(F[i]); idx++ { + fmt.Print(" 0 ") + } + for idx := int(F[i]); idx <= lastIdx; idx++ { + fmt.Printf("%2d ", H[i*width+idx-int(F[0])]) + } + fmt.Println() + + fmt.Print(" ") + for idx, p := range C[I : I+width] { + if idx+int(F[0]) < int(F[i]) { + p = 0 + } + fmt.Printf("%2d ", p) + } + fmt.Println() + } + } + + // Phase 3. (Optional) Backtrace to find character positions + pos := posArray(withPos, M) + j := int(F[0]) + if withPos { + i := M - 1 + j = maxScorePos + preferMatch := true + for { + I := i * width + j0 := j - int(F[0]) + s := H[I+j0] + + var s1, s2 int16 + if i > 0 && j >= int(F[i]) { + s1 = H[I-width+j0-1] + } + if j > int(F[i]) { + s2 = H[I+j0-1] + } + + if s > s1 && (s > s2 || s == s2 && preferMatch) { + *pos = append(*pos, j) + if i == 0 { break } - consecutive = true + i-- + } + preferMatch = C[I+j0] > 1 || I+width+j0+1 < len(C) && C[I+width+j0+1] > 0 + j-- + } + } + // Start offset we return here is only relevant when begin tiebreak is used. + // However finding the accurate offset requires backtracking, and we don't + // want to pay extra cost for the option that has lost its importance. + return Result{j, maxScorePos + 1, int(maxScore)}, pos +} + +// Implement the same sorting criteria as V2 +func calculateScore(caseSensitive bool, text util.Chars, pattern []rune, sidx int, eidx int, withPos bool) (int, *[]int) { + pidx, score, inGap, consecutive, firstBonus := 0, 0, false, 0, int16(0) + pos := posArray(withPos, len(pattern)) + prevClass := charNonWord + if sidx > 0 { + prevClass = charClassOf(text.Get(sidx - 1)) + } + for idx := sidx; idx < eidx; idx++ { + char := text.Get(idx) + class := charClassOf(char) + if !caseSensitive { + if char >= 'A' && char <= 'Z' { + char += 32 + } else if char > unicode.MaxASCII { + char = unicode.To(unicode.LowerCase, char) + } + } + if char == pattern[pidx] { + if withPos { + *pos = append(*pos, idx) + } + score += scoreMatch + bonus := bonusFor(prevClass, class) + if consecutive == 0 { + firstBonus = bonus } else { - consecutive = false + // Break consecutive chunk + if bonus == bonusBoundary { + firstBonus = bonus + } + bonus = util.Max16(util.Max16(bonus, firstBonus), bonusConsecutive) } + if pidx == 0 { + score += int(bonus * bonusFirstCharMultiplier) + } else { + score += int(bonus) + } + inGap = false + consecutive++ + pidx++ + } else { + if inGap { + score += scoreGapExtention + } else { + score += scoreGapStart + } + inGap = true + consecutive = 0 + firstBonus = 0 } + prevClass = class } - return bonus + return score, pos } -// FuzzyMatch performs fuzzy-match -func FuzzyMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result { +// FuzzyMatchV1 performs fuzzy-match +func FuzzyMatchV1(caseSensitive bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { if len(pattern) == 0 { - return Result{0, 0, 0} - } - - // 0. (FIXME) How to find the shortest match? - // a_____b__c__abc - // ^^^^^^^^^^ ^^^ - // 1. forward scan (abc) - // *-----*-----*> - // a_____b___abc__ - // 2. reverse scan (cba) - // a_____b___abc__ - // <*** + return Result{0, 0, 0}, nil + } + pidx := 0 sidx := -1 eidx := -1 @@ -157,7 +528,8 @@ func FuzzyMatch(caseSensitive bool, forward bool, text util.Chars, pattern []run if sidx >= 0 && eidx >= 0 { pidx-- for index := eidx - 1; index >= sidx; index-- { - char := text.Get(indexAt(index, lenRunes, forward)) + tidx := indexAt(index, lenRunes, forward) + char := text.Get(tidx) if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 @@ -166,7 +538,8 @@ func FuzzyMatch(caseSensitive bool, forward bool, text util.Chars, pattern []run } } - pchar := pattern[indexAt(pidx, lenPattern, forward)] + pidx_ := indexAt(pidx, lenPattern, forward) + pchar := pattern[pidx_] if char == pchar { if pidx--; pidx < 0 { sidx = index @@ -175,16 +548,14 @@ func FuzzyMatch(caseSensitive bool, forward bool, text util.Chars, pattern []run } } - // Calculate the bonus. This can't be done at the same time as the - // pattern scan above because 'forward' may be false. if !forward { sidx, eidx = lenRunes-eidx, lenRunes-sidx } - return Result{sidx, eidx, - evaluateBonus(caseSensitive, text, pattern, sidx, eidx)} + score, pos := calculateScore(caseSensitive, text, pattern, sidx, eidx, withPos) + return Result{sidx, eidx, score}, pos } - return Result{-1, -1, 0} + return Result{-1, -1, 0}, nil } // ExactMatchNaive is a basic string searching algorithm that handles case @@ -192,23 +563,28 @@ func FuzzyMatch(caseSensitive bool, forward bool, text util.Chars, pattern []run // of strings.ToLower + strings.Index for typical fzf use cases where input // strings and patterns are not very long. // -// We might try to implement better algorithms in the future: -// http://en.wikipedia.org/wiki/String_searching_algorithm -func ExactMatchNaive(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result { +// Since 0.15.0, this function searches for the match with the highest +// bonus point, instead of stopping immediately after finding the first match. +// The solution is much cheaper since there is only one possible alignment of +// the pattern. +func ExactMatchNaive(caseSensitive bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { if len(pattern) == 0 { - return Result{0, 0, 0} + return Result{0, 0, 0}, nil } lenRunes := text.Length() lenPattern := len(pattern) if lenRunes < lenPattern { - return Result{-1, -1, 0} + return Result{-1, -1, 0}, nil } + // For simplicity, only look at the bonus at the first character position pidx := 0 + bestPos, bonus, bestBonus := -1, int16(0), int16(-1) for index := 0; index < lenRunes; index++ { - char := text.Get(indexAt(index, lenRunes, forward)) + index_ := indexAt(index, lenRunes, forward) + char := text.Get(index_) if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 @@ -216,33 +592,51 @@ func ExactMatchNaive(caseSensitive bool, forward bool, text util.Chars, pattern char = unicode.To(unicode.LowerCase, char) } } - pchar := pattern[indexAt(pidx, lenPattern, forward)] + pidx_ := indexAt(pidx, lenPattern, forward) + pchar := pattern[pidx_] if pchar == char { + if pidx_ == 0 { + bonus = bonusAt(text, index_) + } pidx++ if pidx == lenPattern { - var sidx, eidx int - if forward { - sidx = index - lenPattern + 1 - eidx = index + 1 - } else { - sidx = lenRunes - (index + 1) - eidx = lenRunes - (index - lenPattern + 1) + if bonus > bestBonus { + bestPos, bestBonus = index, bonus + } + if bonus == bonusBoundary { + break } - return Result{sidx, eidx, - evaluateBonus(caseSensitive, text, pattern, sidx, eidx)} + index -= pidx - 1 + pidx, bonus = 0, 0 } } else { index -= pidx - pidx = 0 + pidx, bonus = 0, 0 + } + } + if bestPos >= 0 { + var sidx, eidx int + if forward { + sidx = bestPos - lenPattern + 1 + eidx = bestPos + 1 + } else { + sidx = lenRunes - (bestPos + 1) + eidx = lenRunes - (bestPos - lenPattern + 1) } + score, _ := calculateScore(caseSensitive, text, pattern, sidx, eidx, false) + return Result{sidx, eidx, score}, nil } - return Result{-1, -1, 0} + return Result{-1, -1, 0}, nil } // PrefixMatch performs prefix-match -func PrefixMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result { +func PrefixMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { + if len(pattern) == 0 { + return Result{0, 0, 0}, nil + } + if text.Length() < len(pattern) { - return Result{-1, -1, 0} + return Result{-1, -1, 0}, nil } for index, r := range pattern { @@ -251,20 +645,24 @@ func PrefixMatch(caseSensitive bool, forward bool, text util.Chars, pattern []ru char = unicode.ToLower(char) } if char != r { - return Result{-1, -1, 0} + return Result{-1, -1, 0}, nil } } lenPattern := len(pattern) - return Result{0, lenPattern, - evaluateBonus(caseSensitive, text, pattern, 0, lenPattern)} + score, _ := calculateScore(caseSensitive, text, pattern, 0, lenPattern, false) + return Result{0, lenPattern, score}, nil } // SuffixMatch performs suffix-match -func SuffixMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result { - trimmedLen := text.Length() - text.TrailingWhitespaces() +func SuffixMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { + lenRunes := text.Length() + trimmedLen := lenRunes - text.TrailingWhitespaces() + if len(pattern) == 0 { + return Result{trimmedLen, trimmedLen, 0}, nil + } diff := trimmedLen - len(pattern) if diff < 0 { - return Result{-1, -1, 0} + return Result{-1, -1, 0}, nil } for index, r := range pattern { @@ -273,28 +671,29 @@ func SuffixMatch(caseSensitive bool, forward bool, text util.Chars, pattern []ru char = unicode.ToLower(char) } if char != r { - return Result{-1, -1, 0} + return Result{-1, -1, 0}, nil } } lenPattern := len(pattern) sidx := trimmedLen - lenPattern eidx := trimmedLen - return Result{sidx, eidx, - evaluateBonus(caseSensitive, text, pattern, sidx, eidx)} + score, _ := calculateScore(caseSensitive, text, pattern, sidx, eidx, false) + return Result{sidx, eidx, score}, nil } // EqualMatch performs equal-match -func EqualMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result { - // Note: EqualMatch always return a zero bonus. - if text.Length() != len(pattern) { - return Result{-1, -1, 0} +func EqualMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { + lenPattern := len(pattern) + if text.Length() != lenPattern { + return Result{-1, -1, 0}, nil } runesStr := text.ToString() if !caseSensitive { runesStr = strings.ToLower(runesStr) } if runesStr == string(pattern) { - return Result{0, len(pattern), 0} + return Result{0, lenPattern, (scoreMatch+bonusBoundary)*lenPattern + + (bonusFirstCharMultiplier-1)*bonusBoundary}, nil } - return Result{-1, -1, 0} + return Result{-1, -1, 0}, nil } diff --git a/src/algo/algo_test.go b/src/algo/algo_test.go index 7034dcef..7317eb18 100644 --- a/src/algo/algo_test.go +++ b/src/algo/algo_test.go @@ -1,95 +1,154 @@ package algo import ( + "sort" "strings" "testing" "github.com/junegunn/fzf/src/util" ) -func assertMatch(t *testing.T, fun func(bool, bool, util.Chars, []rune) Result, caseSensitive, forward bool, input, pattern string, sidx int, eidx int, bonus int) { +func assertMatch(t *testing.T, fun Algo, caseSensitive, forward bool, input, pattern string, sidx int, eidx int, score int) { if !caseSensitive { pattern = strings.ToLower(pattern) } - res := fun(caseSensitive, forward, util.RunesToChars([]rune(input)), []rune(pattern)) - if res.Start != sidx { - t.Errorf("Invalid start index: %d (expected: %d, %s / %s)", res.Start, sidx, input, pattern) + res, pos := fun(caseSensitive, forward, util.RunesToChars([]rune(input)), []rune(pattern), true, nil) + var start, end int + if pos == nil || len(*pos) == 0 { + start = res.Start + end = res.End + } else { + sort.Ints(*pos) + start = (*pos)[0] + end = (*pos)[len(*pos)-1] + 1 } - if res.End != eidx { - t.Errorf("Invalid end index: %d (expected: %d, %s / %s)", res.End, eidx, input, pattern) + if start != sidx { + t.Errorf("Invalid start index: %d (expected: %d, %s / %s)", start, sidx, input, pattern) } - if res.Bonus != bonus { - t.Errorf("Invalid bonus: %d (expected: %d, %s / %s)", res.Bonus, bonus, input, pattern) + if end != eidx { + t.Errorf("Invalid end index: %d (expected: %d, %s / %s)", end, eidx, input, pattern) + } + if res.Score != score { + t.Errorf("Invalid score: %d (expected: %d, %s / %s)", res.Score, score, input, pattern) } } func TestFuzzyMatch(t *testing.T) { - assertMatch(t, FuzzyMatch, false, true, "fooBarbaz", "oBZ", 2, 9, 2) - assertMatch(t, FuzzyMatch, false, true, "foo bar baz", "fbb", 0, 9, 8) - assertMatch(t, FuzzyMatch, false, true, "/AutomatorDocument.icns", "rdoc", 9, 13, 4) - assertMatch(t, FuzzyMatch, false, true, "/man1/zshcompctl.1", "zshc", 6, 10, 7) - assertMatch(t, FuzzyMatch, false, true, "/.oh-my-zsh/cache", "zshc", 8, 13, 8) - assertMatch(t, FuzzyMatch, false, true, "ab0123 456", "12356", 3, 10, 3) - assertMatch(t, FuzzyMatch, false, true, "abc123 456", "12356", 3, 10, 5) - - assertMatch(t, FuzzyMatch, false, true, "foo/bar/baz", "fbb", 0, 9, 8) - assertMatch(t, FuzzyMatch, false, true, "fooBarBaz", "fbb", 0, 7, 6) - assertMatch(t, FuzzyMatch, false, true, "foo barbaz", "fbb", 0, 8, 6) - assertMatch(t, FuzzyMatch, false, true, "fooBar Baz", "foob", 0, 4, 8) - assertMatch(t, FuzzyMatch, true, true, "fooBarbaz", "oBZ", -1, -1, 0) - assertMatch(t, FuzzyMatch, true, true, "fooBarbaz", "oBz", 2, 9, 2) - assertMatch(t, FuzzyMatch, true, true, "Foo Bar Baz", "fbb", -1, -1, 0) - assertMatch(t, FuzzyMatch, true, true, "Foo/Bar/Baz", "FBB", 0, 9, 8) - assertMatch(t, FuzzyMatch, true, true, "FooBarBaz", "FBB", 0, 7, 6) - assertMatch(t, FuzzyMatch, true, true, "foo BarBaz", "fBB", 0, 8, 7) - assertMatch(t, FuzzyMatch, true, true, "FooBar Baz", "FooB", 0, 4, 8) - assertMatch(t, FuzzyMatch, true, true, "fooBarbaz", "fooBarbazz", -1, -1, 0) + for _, fn := range []Algo{FuzzyMatchV1, FuzzyMatchV2} { + for _, forward := range []bool{true, false} { + assertMatch(t, fn, false, forward, "fooBarbaz1", "oBZ", 2, 9, + scoreMatch*3+bonusCamel123+scoreGapStart+scoreGapExtention*3) + assertMatch(t, fn, false, forward, "foo bar baz", "fbb", 0, 9, + scoreMatch*3+bonusBoundary*bonusFirstCharMultiplier+ + bonusBoundary*2+2*scoreGapStart+4*scoreGapExtention) + assertMatch(t, fn, false, forward, "/AutomatorDocument.icns", "rdoc", 9, 13, + scoreMatch*4+bonusCamel123+bonusConsecutive*2) + assertMatch(t, fn, false, forward, "/man1/zshcompctl.1", "zshc", 6, 10, + scoreMatch*4+bonusBoundary*bonusFirstCharMultiplier+bonusBoundary*3) + assertMatch(t, fn, false, forward, "/.oh-my-zsh/cache", "zshc", 8, 13, + scoreMatch*4+bonusBoundary*bonusFirstCharMultiplier+bonusBoundary*3+scoreGapStart) + assertMatch(t, fn, false, forward, "ab0123 456", "12356", 3, 10, + scoreMatch*5+bonusConsecutive*3+scoreGapStart+scoreGapExtention) + assertMatch(t, fn, false, forward, "abc123 456", "12356", 3, 10, + scoreMatch*5+bonusCamel123*bonusFirstCharMultiplier+bonusCamel123*2+bonusConsecutive+scoreGapStart+scoreGapExtention) + assertMatch(t, fn, false, forward, "foo/bar/baz", "fbb", 0, 9, + scoreMatch*3+bonusBoundary*bonusFirstCharMultiplier+ + bonusBoundary*2+2*scoreGapStart+4*scoreGapExtention) + assertMatch(t, fn, false, forward, "fooBarBaz", "fbb", 0, 7, + scoreMatch*3+bonusBoundary*bonusFirstCharMultiplier+ + bonusCamel123*2+2*scoreGapStart+2*scoreGapExtention) + assertMatch(t, fn, false, forward, "foo barbaz", "fbb", 0, 8, + scoreMatch*3+bonusBoundary*bonusFirstCharMultiplier+bonusBoundary+ + scoreGapStart*2+scoreGapExtention*3) + assertMatch(t, fn, false, forward, "fooBar Baz", "foob", 0, 4, + scoreMatch*4+bonusBoundary*bonusFirstCharMultiplier+bonusBoundary*3) + assertMatch(t, fn, false, forward, "xFoo-Bar Baz", "foo-b", 1, 6, + scoreMatch*5+bonusCamel123*bonusFirstCharMultiplier+bonusCamel123*2+ + bonusNonWord+bonusBoundary) + + assertMatch(t, fn, true, forward, "fooBarbaz", "oBz", 2, 9, + scoreMatch*3+bonusCamel123+scoreGapStart+scoreGapExtention*3) + assertMatch(t, fn, true, forward, "Foo/Bar/Baz", "FBB", 0, 9, + scoreMatch*3+bonusBoundary*(bonusFirstCharMultiplier+2)+ + scoreGapStart*2+scoreGapExtention*4) + assertMatch(t, fn, true, forward, "FooBarBaz", "FBB", 0, 7, + scoreMatch*3+bonusBoundary*bonusFirstCharMultiplier+bonusCamel123*2+ + scoreGapStart*2+scoreGapExtention*2) + assertMatch(t, fn, true, forward, "FooBar Baz", "FooB", 0, 4, + scoreMatch*4+bonusBoundary*bonusFirstCharMultiplier+bonusBoundary*2+ + util.Max(bonusCamel123, bonusBoundary)) + + // Consecutive bonus updated + assertMatch(t, fn, true, forward, "foo-bar", "o-ba", 2, 6, + scoreMatch*4+bonusBoundary*3) + + // Non-match + assertMatch(t, fn, true, forward, "fooBarbaz", "oBZ", -1, -1, 0) + assertMatch(t, fn, true, forward, "Foo Bar Baz", "fbb", -1, -1, 0) + assertMatch(t, fn, true, forward, "fooBarbaz", "fooBarbazz", -1, -1, 0) + } + } } func TestFuzzyMatchBackward(t *testing.T) { - assertMatch(t, FuzzyMatch, false, true, "foobar fb", "fb", 0, 4, 4) - assertMatch(t, FuzzyMatch, false, false, "foobar fb", "fb", 7, 9, 5) + assertMatch(t, FuzzyMatchV1, false, true, "foobar fb", "fb", 0, 4, + scoreMatch*2+bonusBoundary*bonusFirstCharMultiplier+ + scoreGapStart+scoreGapExtention) + assertMatch(t, FuzzyMatchV1, false, false, "foobar fb", "fb", 7, 9, + scoreMatch*2+bonusBoundary*bonusFirstCharMultiplier+bonusBoundary) } func TestExactMatchNaive(t *testing.T) { for _, dir := range []bool{true, false} { - assertMatch(t, ExactMatchNaive, false, dir, "fooBarbaz", "oBA", 2, 5, 3) assertMatch(t, ExactMatchNaive, true, dir, "fooBarbaz", "oBA", -1, -1, 0) assertMatch(t, ExactMatchNaive, true, dir, "fooBarbaz", "fooBarbazz", -1, -1, 0) - assertMatch(t, ExactMatchNaive, false, dir, "/AutomatorDocument.icns", "rdoc", 9, 13, 4) - assertMatch(t, ExactMatchNaive, false, dir, " |