diff options
author | Junegunn Choi <junegunn.c@gmail.com> | 2015-01-02 04:49:30 +0900 |
---|---|---|
committer | Junegunn Choi <junegunn.c@gmail.com> | 2015-01-04 00:37:29 +0900 |
commit | f3177305d5572b26f135fc045481358b4eb1bf69 (patch) | |
tree | d59fd9587e44e998581a131875bf45e243df6c6e /src/pattern.go | |
parent | 7ba93d9f8351be64b37c65ae04d594ee261d5d26 (diff) |
Rewrite fzf in Go
Diffstat (limited to 'src/pattern.go')
-rw-r--r-- | src/pattern.go | 305 |
1 files changed, 305 insertions, 0 deletions
diff --git a/src/pattern.go b/src/pattern.go new file mode 100644 index 00000000..533aa59f --- /dev/null +++ b/src/pattern.go @@ -0,0 +1,305 @@ +package fzf + +import ( + "regexp" + "sort" + "strings" +) + +const UPPERCASE = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + +// fuzzy +// 'exact +// ^exact-prefix +// exact-suffix$ +// !not-fuzzy +// !'not-exact +// !^not-exact-prefix +// !not-exact-suffix$ + +type TermType int + +const ( + TERM_FUZZY TermType = iota + TERM_EXACT + TERM_PREFIX + TERM_SUFFIX +) + +type Term struct { + typ TermType + inv bool + text []rune + origText []rune +} + +type Pattern struct { + mode Mode + caseSensitive bool + text []rune + terms []Term + hasInvTerm bool + delimiter *regexp.Regexp + nth []Range + procFun map[TermType]func(bool, *string, []rune) (int, int) +} + +var ( + _patternCache map[string]*Pattern + _splitRegex *regexp.Regexp + _cache ChunkCache +) + +func init() { + // We can uniquely identify the pattern for a given string since + // mode and caseMode do not change while the program is running + _patternCache = make(map[string]*Pattern) + _splitRegex = regexp.MustCompile("\\s+") + _cache = NewChunkCache() +} + +func clearPatternCache() { + _patternCache = make(map[string]*Pattern) +} + +func BuildPattern(mode Mode, caseMode Case, + nth []Range, delimiter *regexp.Regexp, runes []rune) *Pattern { + + var asString string + switch mode { + case MODE_EXTENDED, MODE_EXTENDED_EXACT: + asString = strings.Trim(string(runes), " ") + default: + asString = string(runes) + } + + cached, found := _patternCache[asString] + if found { + return cached + } + + caseSensitive, hasInvTerm := true, false + terms := []Term{} + + switch caseMode { + case CASE_SMART: + if !strings.ContainsAny(asString, UPPERCASE) { + runes, caseSensitive = []rune(strings.ToLower(asString)), false + } + case CASE_IGNORE: + runes, caseSensitive = []rune(strings.ToLower(asString)), false + } + + switch mode { + case MODE_EXTENDED, MODE_EXTENDED_EXACT: + terms = parseTerms(mode, string(runes)) + for _, term := range terms { + if term.inv { + hasInvTerm = true + } + } + } + + ptr := &Pattern{ + mode: mode, + caseSensitive: caseSensitive, + text: runes, + terms: terms, + hasInvTerm: hasInvTerm, + nth: nth, + delimiter: delimiter, + procFun: make(map[TermType]func(bool, *string, []rune) (int, int))} + + ptr.procFun[TERM_FUZZY] = FuzzyMatch + ptr.procFun[TERM_EXACT] = ExactMatchNaive + ptr.procFun[TERM_PREFIX] = PrefixMatch + ptr.procFun[TERM_SUFFIX] = SuffixMatch + + _patternCache[asString] = ptr + return ptr +} + +func parseTerms(mode Mode, str string) []Term { + tokens := _splitRegex.Split(str, -1) + terms := []Term{} + for _, token := range tokens { + typ, inv, text := TERM_FUZZY, false, token + origText := []rune(text) + if mode == MODE_EXTENDED_EXACT { + typ = TERM_EXACT + } + + if strings.HasPrefix(text, "!") { + inv = true + text = text[1:] + } + + if strings.HasPrefix(text, "'") { + if mode == MODE_EXTENDED { + typ = TERM_EXACT + text = text[1:] + } + } else if strings.HasPrefix(text, "^") { + typ = TERM_PREFIX + text = text[1:] + } else if strings.HasSuffix(text, "$") { + typ = TERM_SUFFIX + text = text[:len(text)-1] + } + + if len(text) > 0 { + terms = append(terms, Term{ + typ: typ, + inv: inv, + text: []rune(text), + origText: origText}) + } + } + return terms +} + +func (p *Pattern) IsEmpty() bool { + if p.mode == MODE_FUZZY { + return len(p.text) == 0 + } else { + return len(p.terms) == 0 + } +} + +func (p *Pattern) AsString() string { + return string(p.text) +} + +func (p *Pattern) CacheKey() string { + if p.mode == MODE_FUZZY { + return p.AsString() + } + cacheableTerms := []string{} + for _, term := range p.terms { + if term.inv { + continue + } + cacheableTerms = append(cacheableTerms, string(term.origText)) + } + sort.Strings(cacheableTerms) + return strings.Join(cacheableTerms, " ") +} + +func (p *Pattern) Match(chunk *Chunk) []*Item { + space := chunk + + // ChunkCache: Exact match + cacheKey := p.CacheKey() + if !p.hasInvTerm { // Because we're excluding Inv-term from cache key + if cached, found := _cache.Find(chunk, cacheKey); found { + return cached + } + } + + // ChunkCache: Prefix match + foundPrefixCache := false + for idx := len(cacheKey) - 1; idx > 0; idx-- { + if cached, found := _cache.Find(chunk, cacheKey[:idx]); found { + cachedChunk := Chunk(cached) + space = &cachedChunk + foundPrefixCache = true + break + } + } + + // ChunkCache: Suffix match + if !foundPrefixCache { + for idx := 1; idx < len(cacheKey); idx++ { + if cached, found := _cache.Find(chunk, cacheKey[idx:]); found { + cachedChunk := Chunk(cached) + space = &cachedChunk + break + } + } + } + + var matches []*Item + if p.mode == MODE_FUZZY { + matches = p.fuzzyMatch(space) + } else { + matches = p.extendedMatch(space) + } + + if !p.hasInvTerm { + _cache.Add(chunk, cacheKey, matches) + } + return matches +} + +func (p *Pattern) fuzzyMatch(chunk *Chunk) []*Item { + matches := []*Item{} + for _, item := range *chunk { + input := p.prepareInput(item) + if sidx, eidx := p.iter(FuzzyMatch, input, p.text); sidx >= 0 { + matches = append(matches, &Item{ + text: item.text, + index: item.index, + offsets: []Offset{Offset{sidx, eidx}}, + rank: NilRank}) + } + } + return matches +} + +func (p *Pattern) extendedMatch(chunk *Chunk) []*Item { + matches := []*Item{} + for _, item := range *chunk { + input := p.prepareInput(item) + offsets := []Offset{} + Loop: + for _, term := range p.terms { + pfun := p.procFun[term.typ] + if sidx, eidx := p.iter(pfun, input, term.text); sidx >= 0 { + if term.inv { + break Loop + } + offsets = append(offsets, Offset{sidx, eidx}) + } else if term.inv { + offsets = append(offsets, Offset{0, 0}) + } + } + if len(offsets) == len(p.terms) { + matches = append(matches, &Item{ + text: item.text, + index: item.index, + offsets: offsets, + rank: NilRank}) + } + } + return matches +} + +func (p *Pattern) prepareInput(item *Item) *Transformed { + if item.transformed != nil { + return item.transformed + } + + var ret *Transformed + if len(p.nth) > 0 { + tokens := Tokenize(item.text, p.delimiter) + ret = Transform(tokens, p.nth) + } else { + trans := Transformed{ + whole: item.text, + parts: []Token{Token{text: item.text, prefixLength: 0}}} + ret = &trans + } + item.transformed = ret + return ret +} + +func (p *Pattern) iter(pfun func(bool, *string, []rune) (int, int), + inputs *Transformed, pattern []rune) (int, int) { + for _, part := range inputs.parts { + prefixLength := part.prefixLength + if sidx, eidx := pfun(p.caseSensitive, part.text, pattern); sidx >= 0 { + return sidx + prefixLength, eidx + prefixLength + } + } + return -1, -1 +} |