summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md9
-rw-r--r--src/options.go20
-rw-r--r--src/options_test.go50
-rw-r--r--src/pattern.go4
-rw-r--r--src/pattern_test.go20
-rw-r--r--src/tokenizer.go41
-rw-r--r--src/tokenizer_test.go4
7 files changed, 115 insertions, 33 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0bce59b3..f7ec3838 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,15 @@
CHANGELOG
=========
+0.10.3
+------
+
+- Fixed slow performance of `--with-nth` when used with `--delimiter`
+ - Regular expression engine of Golang as of now is very slow, so the fixed
+ version will treat the given delimiter pattern as a plain string instead
+ of a regular expression unless it contains special characters and is
+ a valid regular expression.
+
0.10.2
------
diff --git a/src/options.go b/src/options.go
index 6a0d197c..35fa8ecf 100644
--- a/src/options.go
+++ b/src/options.go
@@ -104,7 +104,7 @@ type Options struct {
Case Case
Nth []Range
WithNth []Range
- Delimiter *regexp.Regexp
+ Delimiter Delimiter
Sort int
Tac bool
Tiebreak tiebreak
@@ -149,7 +149,7 @@ func defaultOptions() *Options {
Case: CaseSmart,
Nth: make([]Range, 0),
WithNth: make([]Range, 0),
- Delimiter: nil,
+ Delimiter: Delimiter{},
Sort: 1000,
Tac: false,
Tiebreak: byLength,
@@ -268,17 +268,27 @@ func splitNth(str string) []Range {
return ranges
}
-func delimiterRegexp(str string) *regexp.Regexp {
+func delimiterRegexp(str string) Delimiter {
+ // Special handling of \t
+ str = strings.Replace(str, "\\t", "\t", -1)
+
+ // 1. Pattern does not contain any special character
+ if regexp.QuoteMeta(str) == str {
+ return Delimiter{str: &str}
+ }
+
rx, e := regexp.Compile(str)
+ // 2. Pattern is not a valid regular expression
if e != nil {
- str = regexp.QuoteMeta(str)
+ return Delimiter{str: &str}
}
+ // 3. Pattern as regular expression. Slow.
rx, e = regexp.Compile(fmt.Sprintf("(?:.*?%s)|(?:.+?$)", str))
if e != nil {
errorExit("invalid regular expression: " + e.Error())
}
- return rx
+ return Delimiter{regex: rx}
}
func isAlphabet(char uint8) bool {
diff --git a/src/options_test.go b/src/options_test.go
index 28a8d9db..e9884f2c 100644
--- a/src/options_test.go
+++ b/src/options_test.go
@@ -2,16 +2,60 @@ package fzf
import (
"fmt"
+ "strings"
"testing"
"github.com/junegunn/fzf/src/curses"
)
func TestDelimiterRegex(t *testing.T) {
- rx := delimiterRegexp("*")
+ // Valid regex
+ delim := delimiterRegexp(".")
+ if delim.regex == nil || delim.str != nil {
+ t.Error(delim)
+ }
+ // Broken regex -> string
+ delim = delimiterRegexp("[0-9")
+ if delim.regex != nil || *delim.str != "[0-9" {
+ t.Error(delim)
+ }
+ // Valid regex
+ delim = delimiterRegexp("[0-9]")
+ if strings.Index(delim.regex.String(), "[0-9]") < 0 || delim.str != nil {
+ t.Error(delim)
+ }
+ // Tab character
+ delim = delimiterRegexp("\t")
+ if delim.regex != nil || *delim.str != "\t" {
+ t.Error(delim)
+ }
+ // Tab expression
+ delim = delimiterRegexp("\\t")
+ if delim.regex != nil || *delim.str != "\t" {
+ t.Error(delim)
+ }
+ // Tabs -> regex
+ delim = delimiterRegexp("\t+")
+ if delim.regex == nil || delim.str != nil {
+ t.Error(delim)
+ }
+}
+
+func TestDelimiterRegexString(t *testing.T) {
+ delim := delimiterRegexp("*")
+ tokens := strings.Split("-*--*---**---", *delim.str)
+ if delim.regex != nil || tokens[0] != "-" || tokens[1] != "--" ||
+ tokens[2] != "---" || tokens[3] != "" || tokens[4] != "---" {
+ t.Errorf("%s %s %d", delim, tokens, len(tokens))
+ }
+}
+
+func TestDelimiterRegexRegex(t *testing.T) {
+ delim := delimiterRegexp("--\\*")
+ rx := delim.regex
tokens := rx.FindAllString("-*--*---**---", -1)
- if tokens[0] != "-*" || tokens[1] != "--*" || tokens[2] != "---*" ||
- tokens[3] != "*" || tokens[4] != "---" {
+ if delim.str != nil ||
+ tokens[0] != "-*--*" || tokens[1] != "---*" || tokens[2] != "*---" {
t.Errorf("%s %s %d", rx, tokens, len(tokens))
}
}
diff --git a/src/pattern.go b/src/pattern.go
index 990450a9..f83861ee 100644
--- a/src/pattern.go
+++ b/src/pattern.go
@@ -42,7 +42,7 @@ type Pattern struct {
text []rune
terms []term
hasInvTerm bool
- delimiter *regexp.Regexp
+ delimiter Delimiter
nth []Range
procFun map[termType]func(bool, []rune, []rune) (int, int)
}
@@ -71,7 +71,7 @@ func clearChunkCache() {
// BuildPattern builds Pattern object from the given arguments
func BuildPattern(mode Mode, caseMode Case,
- nth []Range, delimiter *regexp.Regexp, runes []rune) *Pattern {
+ nth []Range, delimiter Delimiter, runes []rune) *Pattern {
var asString string
switch mode {
diff --git a/src/pattern_test.go b/src/pattern_test.go
index 8134cdc0..c7f54142 100644
--- a/src/pattern_test.go
+++ b/src/pattern_test.go
@@ -59,7 +59,7 @@ func TestExact(t *testing.T) {
defer clearPatternCache()
clearPatternCache()
pattern := BuildPattern(ModeExtended, CaseSmart,
- []Range{}, nil, []rune("'abc"))
+ []Range{}, Delimiter{}, []rune("'abc"))
sidx, eidx := algo.ExactMatchNaive(
pattern.caseSensitive, []rune("aabbcc abc"), pattern.terms[0].text)
if sidx != 7 || eidx != 10 {
@@ -70,7 +70,7 @@ func TestExact(t *testing.T) {
func TestEqual(t *testing.T) {
defer clearPatternCache()
clearPatternCache()
- pattern := BuildPattern(ModeExtended, CaseSmart, []Range{}, nil, []rune("^AbC$"))
+ pattern := BuildPattern(ModeExtended, CaseSmart, []Range{}, Delimiter{}, []rune("^AbC$"))
match := func(str string, sidxExpected int, eidxExpected int) {
sidx, eidx := algo.EqualMatch(
@@ -86,17 +86,17 @@ func TestEqual(t *testing.T) {
func TestCaseSensitivity(t *testing.T) {
defer clearPatternCache()
clearPatternCache()
- pat1 := BuildPattern(ModeFuzzy, CaseSmart, []Range{}, nil, []rune("abc"))
+ pat1 := BuildPattern(ModeFuzzy, CaseSmart, []Range{}, Delimiter{}, []rune("abc"))
clearPatternCache()
- pat2 := BuildPattern(ModeFuzzy, CaseSmart, []Range{}, nil, []rune("Abc"))
+ pat2 := BuildPattern(ModeFuzzy, CaseSmart, []Range{}, Delimiter{}, []rune("Abc"))
clearPatternCache()
- pat3 := BuildPattern(ModeFuzzy, CaseIgnore, []Range{}, nil, []rune("abc"))
+ pat3 := BuildPattern(ModeFuzzy, CaseIgnore, []Range{}, Delimiter{}, []rune("abc"))
clearPatternCache()
- pat4 := BuildPattern(ModeFuzzy, CaseIgnore, []Range{}, nil, []rune("Abc"))
+ pat4 := BuildPattern(ModeFuzzy, CaseIgnore, []Range{}, Delimiter{}, []rune("Abc"))
clearPatternCache()
- pat5 := BuildPattern(ModeFuzzy, CaseRespect, []Range{}, nil, []rune("abc"))
+ pat5 := BuildPattern(ModeFuzzy, CaseRespect, []Range{}, Delimiter{}, []rune("abc"))
clearPatternCache()
- pat6 := BuildPattern(ModeFuzzy, CaseRespect, []Range{}, nil, []rune("Abc"))
+ pat6 := BuildPattern(ModeFuzzy, CaseRespect, []Range{}, Delimiter{}, []rune("Abc"))
if string(pat1.text) != "abc" || pat1.caseSensitive != false ||
string(pat2.text) != "Abc" || pat2.caseSensitive != true ||
@@ -109,8 +109,8 @@ func TestCaseSensitivity(t *testing.T) {
}
func TestOrigTextAndTransformed(t *testing.T) {
- pattern := BuildPattern(ModeExtended, CaseSmart, []Range{}, nil, []rune("jg"))
- tokens := Tokenize([]rune("junegunn"), nil)
+ pattern := BuildPattern(ModeExtended, CaseSmart, []Range{}, Delimiter{}, []rune("jg"))
+ tokens := Tokenize([]rune("junegunn"), Delimiter{})
trans := Transform(tokens, []Range{Range{1, 1}})
origRunes := []rune("junegunn.choi")
diff --git a/src/tokenizer.go b/src/tokenizer.go
index a616c6ba..72deb2b1 100644
--- a/src/tokenizer.go
+++ b/src/tokenizer.go
@@ -22,6 +22,12 @@ type Token struct {
prefixLength int
}
+// Delimiter for tokenizing the input
+type Delimiter struct {
+ regex *regexp.Regexp
+ str *string
+}
+
func newRange(begin int, end int) Range {
if begin == 1 {
begin = rangeEllipsis
@@ -68,15 +74,15 @@ func ParseRange(str *string) (Range, bool) {
return newRange(n, n), true
}
-func withPrefixLengths(tokens []string, begin int) []Token {
+func withPrefixLengths(tokens [][]rune, begin int) []Token {
ret := make([]Token, len(tokens))
prefixLength := begin
for idx, token := range tokens {
// Need to define a new local variable instead of the reused token to take
// the pointer to it
- ret[idx] = Token{text: []rune(token), prefixLength: prefixLength}
- prefixLength += len([]rune(token))
+ ret[idx] = Token{text: token, prefixLength: prefixLength}
+ prefixLength += len(token)
}
return ret
}
@@ -87,9 +93,9 @@ const (
awkWhite
)
-func awkTokenizer(input []rune) ([]string, int) {
+func awkTokenizer(input []rune) ([][]rune, int) {
// 9, 32
- ret := []string{}
+ ret := [][]rune{}
str := []rune{}
prefixLength := 0
state := awkNil
@@ -112,27 +118,40 @@ func awkTokenizer(input []rune) ([]string, int) {
if white {
str = append(str, r)
} else {
- ret = append(ret, string(str))
+ ret = append(ret, str)
state = awkBlack
str = []rune{r}
}
}
}
if len(str) > 0 {
- ret = append(ret, string(str))
+ ret = append(ret, str)
}
return ret, prefixLength
}
// Tokenize tokenizes the given string with the delimiter
-func Tokenize(runes []rune, delimiter *regexp.Regexp) []Token {
- if delimiter == nil {
+func Tokenize(runes []rune, delimiter Delimiter) []Token {
+ if delimiter.str == nil && delimiter.regex == nil {
// AWK-style (\S+\s*)
tokens, prefixLength := awkTokenizer(runes)
return withPrefixLengths(tokens, prefixLength)
}
- tokens := delimiter.FindAllString(string(runes), -1)
- return withPrefixLengths(tokens, 0)
+
+ var tokens []string
+ if delimiter.str != nil {
+ tokens = strings.Split(string(runes), *delimiter.str)
+ for i := 0; i < len(tokens)-1; i++ {
+ tokens[i] = tokens[i] + *delimiter.str
+ }
+ } else if delimiter.regex != nil {
+ tokens = delimiter.regex.FindAllString(string(runes), -1)
+ }
+ asRunes := make([][]rune, len(tokens))
+ for i, token := range tokens {
+ asRunes[i] = []rune(token)
+ }
+ return withPrefixLengths(asRunes, 0)
}
func joinTokens(tokens []Token) []rune {
diff --git a/src/tokenizer_test.go b/src/tokenizer_test.go
index 06603ae9..61017b8c 100644
--- a/src/tokenizer_test.go
+++ b/src/tokenizer_test.go
@@ -43,7 +43,7 @@ func TestParseRange(t *testing.T) {
func TestTokenize(t *testing.T) {
// AWK-style
input := " abc: def: ghi "
- tokens := Tokenize([]rune(input), nil)
+ tokens := Tokenize([]rune(input), Delimiter{})
if string(tokens[0].text) != "abc: " || tokens[0].prefixLength != 2 {
t.Errorf("%s", tokens)
}
@@ -58,7 +58,7 @@ func TestTokenize(t *testing.T) {
func TestTransform(t *testing.T) {
input := " abc: def: ghi: jkl"
{
- tokens := Tokenize([]rune(input), nil)
+ tokens := Tokenize([]rune(input), Delimiter{})
{
ranges := splitNth("1,2,3")
tx := Transform(tokens, ranges)