Rewrite fzf in Go

author: Junegunn Choi <junegunn.c@gmail.com> 2015-01-02 04:49:30 +0900
committer: Junegunn Choi <junegunn.c@gmail.com> 2015-01-04 00:37:29 +0900
commit: f3177305d5572b26f135fc045481358b4eb1bf69 (patch)
tree: d59fd9587e44e998581a131875bf45e243df6c6e /src/pattern.go
parent: 7ba93d9f8351be64b37c65ae04d594ee261d5d26 (diff)
1 files changed, 305 insertions, 0 deletions
diff --git a/src/pattern.go b/src/pattern.go
new file mode 100644
index 00000000..533aa59f
--- /dev/null
+++ b/src/pattern.go
@@ -0,0 +1,305 @@
+package fzf
+
+import (
+	"regexp"
+	"sort"
+	"strings"
+)
+
+const UPPERCASE = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+
+// fuzzy
+// 'exact
+// ^exact-prefix
+// exact-suffix$
+// !not-fuzzy
+// !'not-exact
+// !^not-exact-prefix
+// !not-exact-suffix$
+
+type TermType int
+
+const (
+	TERM_FUZZY TermType = iota
+	TERM_EXACT
+	TERM_PREFIX
+	TERM_SUFFIX
+)
+
+type Term struct {
+	typ      TermType
+	inv      bool
+	text     []rune
+	origText []rune
+}
+
+type Pattern struct {
+	mode          Mode
+	caseSensitive bool
+	text          []rune
+	terms         []Term
+	hasInvTerm    bool
+	delimiter     *regexp.Regexp
+	nth           []Range
+	procFun       map[TermType]func(bool, *string, []rune) (int, int)
+}
+
+var (
+	_patternCache map[string]*Pattern
+	_splitRegex   *regexp.Regexp
+	_cache        ChunkCache
+)
+
+func init() {
+	// We can uniquely identify the pattern for a given string since
+	// mode and caseMode do not change while the program is running
+	_patternCache = make(map[string]*Pattern)
+	_splitRegex = regexp.MustCompile("\\s+")
+	_cache = NewChunkCache()
+}
+
+func clearPatternCache() {
+	_patternCache = make(map[string]*Pattern)
+}
+
+func BuildPattern(mode Mode, caseMode Case,
+	nth []Range, delimiter *regexp.Regexp, runes []rune) *Pattern {
+
+	var asString string
+	switch mode {
+	case MODE_EXTENDED, MODE_EXTENDED_EXACT:
+		asString = strings.Trim(string(runes), " ")
+	default:
+		asString = string(runes)
+	}
+
+	cached, found := _patternCache[asString]
+	if found {
+		return cached
+	}
+
+	caseSensitive, hasInvTerm := true, false
+	terms := []Term{}
+
+	switch caseMode {
+	case CASE_SMART:
+		if !strings.ContainsAny(asString, UPPERCASE) {
+			runes, caseSensitive = []rune(strings.ToLower(asString)), false
+		}
+	case CASE_IGNORE:
+		runes, caseSensitive = []rune(strings.ToLower(asString)), false
+	}
+
+	switch mode {
+	case MODE_EXTENDED, MODE_EXTENDED_EXACT:
+		terms = parseTerms(mode, string(runes))
+		for _, term := range terms {
+			if term.inv {
+				hasInvTerm = true
+			}
+		}
+	}
+
+	ptr := &Pattern{
+		mode:          mode,
+		caseSensitive: caseSensitive,
+		text:          runes,
+		terms:         terms,
+		hasInvTerm:    hasInvTerm,
+		nth:           nth,
+		delimiter:     delimiter,
+		procFun:       make(map[TermType]func(bool, *string, []rune) (int, int))}
+
+	ptr.procFun[TERM_FUZZY] = FuzzyMatch
+	ptr.procFun[TERM_EXACT] = ExactMatchNaive
+	ptr.procFun[TERM_PREFIX] = PrefixMatch
+	ptr.procFun[TERM_SUFFIX] = SuffixMatch
+
+	_patternCache[asString] = ptr
+	return ptr
+}
+
+func parseTerms(mode Mode, str string) []Term {
+	tokens := _splitRegex.Split(str, -1)
+	terms := []Term{}
+	for _, token := range tokens {
+		typ, inv, text := TERM_FUZZY, false, token
+		origText := []rune(text)
+		if mode == MODE_EXTENDED_EXACT {
+			typ = TERM_EXACT
+		}
+
+		if strings.HasPrefix(text, "!") {
+			inv = true
+			text = text[1:]
+		}
+
+		if strings.HasPrefix(text, "'") {
+			if mode == MODE_EXTENDED {
+				typ = TERM_EXACT
+				text = text[1:]
+			}
+		} else if strings.HasPrefix(text, "^") {
+			typ = TERM_PREFIX
+			text = text[1:]
+		} else if strings.HasSuffix(text, "$") {
+			typ = TERM_SUFFIX
+			text = text[:len(text)-1]
+		}
+
+		if len(text) > 0 {
+			terms = append(terms, Term{
+				typ:      typ,
+				inv:      inv,
+				text:     []rune(text),
+				origText: origText})
+		}
+	}
+	return terms
+}
+
+func (p *Pattern) IsEmpty() bool {
+	if p.mode == MODE_FUZZY {
+		return len(p.text) == 0
+	} else {
+		return len(p.terms) == 0
+	}
+}
+
+func (p *Pattern) AsString() string {
+	return string(p.text)
+}
+
+func (p *Pattern) CacheKey() string {
+	if p.mode == MODE_FUZZY {
+		return p.AsString()
+	}
+	cacheableTerms := []string{}
+	for _, term := range p.terms {
+		if term.inv {
+			continue
+		}
+		cacheableTerms = append(cacheableTerms, string(term.origText))
+	}
+	sort.Strings(cacheableTerms)
+	return strings.Join(cacheableTerms, " ")
+}
+
+func (p *Pattern) Match(chunk *Chunk) []*Item {
+	space := chunk
+
+	// ChunkCache: Exact match
+	cacheKey := p.CacheKey()
+	if !p.hasInvTerm { // Because we're excluding Inv-term from cache key
+		if cached, found := _cache.Find(chunk, cacheKey); found {
+			return cached
+		}
+	}
+
+	// ChunkCache: Prefix match
+	foundPrefixCache := false
+	for idx := len(cacheKey) - 1; idx > 0; idx-- {
+		if cached, found := _cache.Find(chunk, cacheKey[:idx]); found {
+			cachedChunk := Chunk(cached)
+			space = &cachedChunk
+			foundPrefixCache = true
+			break
+		}
+	}
+
+	// ChunkCache: Suffix match
+	if !foundPrefixCache {
+		for idx := 1; idx < len(cacheKey); idx++ {
+			if cached, found := _cache.Find(chunk, cacheKey[idx:]); found {
+				cachedChunk := Chunk(cached)
+				space = &cachedChunk
+				break
+			}
+		}
+	}
+
+	var matches []*Item
+	if p.mode == MODE_FUZZY {
+		matches = p.fuzzyMatch(space)
+	} else {
+		matches = p.extendedMatch(space)
+	}
+
+	if !p.hasInvTerm {
+		_cache.Add(chunk, cacheKey, matches)
+	}
+	return matches
+}
+
+func (p *Pattern) fuzzyMatch(chunk *Chunk) []*Item {
+	matches := []*Item{}
+	for _, item := range *chunk {
+		input := p.prepareInput(item)
+		if sidx, eidx := p.iter(FuzzyMatch, input, p.text); sidx >= 0 {
+			matches = append(matches, &Item{
+				text:    item.text,
+				index:   item.index,
+				offsets: []Offset{Offset{sidx, eidx}},
+				rank:    NilRank})
+		}
+	}
+	return matches
+}
+
+func (p *Pattern) extendedMatch(chunk *Chunk) []*Item {
+	matches := []*Item{}
+	for _, item := range *chunk {
+		input := p.prepareInput(item)
+		offsets := []Offset{}
+	Loop:
+		for _, term := range p.terms {
+			pfun := p.procFun[term.typ]
+			if sidx, eidx := p.iter(pfun, input, term.text); sidx >= 0 {
+				if term.inv {
+					break Loop
+				}
+				offsets = append(offsets, Offset{sidx, eidx})
+			} else if term.inv {
+				offsets = append(offsets, Offset{0, 0})
+			}
+		}
+		if len(offsets) == len(p.terms) {
+			matches = append(matches, &Item{
+				text:    item.text,
+				index:   item.index,
+				offsets: offsets,
+				rank:    NilRank})
+		}
+	}
+	return matches
+}
+
+func (p *Pattern) prepareInput(item *Item) *Transformed {
+	if item.transformed != nil {
+		return item.transformed
+	}
+
+	var ret *Transformed
+	if len(p.nth) > 0 {
+		tokens := Tokenize(item.text, p.delimiter)
+		ret = Transform(tokens, p.nth)
+	} else {
+		trans := Transformed{
+			whole: item.text,
+			parts: []Token{Token{text: item.text, prefixLength: 0}}}
+		ret = &trans
+	}
+	item.transformed = ret
+	return ret
+}
+
+func (p *Pattern) iter(pfun func(bool, *string, []rune) (int, int),
+	inputs *Transformed, pattern []rune) (int, int) {
+	for _, part := range inputs.parts {
+		prefixLength := part.prefixLength
+		if sidx, eidx := pfun(p.caseSensitive, part.text, pattern); sidx >= 0 {
+			return sidx + prefixLength, eidx + prefixLength
+		}
+	}
+	return -1, -1
+}
author	Junegunn Choi <junegunn.c@gmail.com>	2015-01-02 04:49:30 +0900
committer	Junegunn Choi <junegunn.c@gmail.com>	2015-01-04 00:37:29 +0900
commit	f3177305d5572b26f135fc045481358b4eb1bf69 (patch)
tree	d59fd9587e44e998581a131875bf45e243df6c6e /src/pattern.go
parent	7ba93d9f8351be64b37c65ae04d594ee261d5d26 (diff)