summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/gobwas/glob/syntax/lexer/lexer.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/gobwas/glob/syntax/lexer/lexer.go')
-rw-r--r--vendor/github.com/gobwas/glob/syntax/lexer/lexer.go273
1 files changed, 273 insertions, 0 deletions
diff --git a/vendor/github.com/gobwas/glob/syntax/lexer/lexer.go b/vendor/github.com/gobwas/glob/syntax/lexer/lexer.go
new file mode 100644
index 000000000..a1c8d1962
--- /dev/null
+++ b/vendor/github.com/gobwas/glob/syntax/lexer/lexer.go
@@ -0,0 +1,273 @@
+package lexer
+
+import (
+ "bytes"
+ "fmt"
+ "github.com/gobwas/glob/util/runes"
+ "unicode/utf8"
+)
+
+const (
+ char_any = '*'
+ char_comma = ','
+ char_single = '?'
+ char_escape = '\\'
+ char_range_open = '['
+ char_range_close = ']'
+ char_terms_open = '{'
+ char_terms_close = '}'
+ char_range_not = '!'
+ char_range_between = '-'
+)
+
+var specials = []byte{
+ char_any,
+ char_single,
+ char_escape,
+ char_range_open,
+ char_range_close,
+ char_terms_open,
+ char_terms_close,
+}
+
+func Special(c byte) bool {
+ return bytes.IndexByte(specials, c) != -1
+}
+
+type tokens []Token
+
+func (i *tokens) shift() (ret Token) {
+ ret = (*i)[0]
+ copy(*i, (*i)[1:])
+ *i = (*i)[:len(*i)-1]
+ return
+}
+
+func (i *tokens) push(v Token) {
+ *i = append(*i, v)
+}
+
+func (i *tokens) empty() bool {
+ return len(*i) == 0
+}
+
+var eof rune = 0
+
+type lexer struct {
+ data string
+ pos int
+ err error
+
+ tokens tokens
+ termsLevel int
+
+ lastRune rune
+ lastRuneSize int
+ hasRune bool
+}
+
+func NewLexer(source string) *lexer {
+ l := &lexer{
+ data: source,
+ tokens: tokens(make([]Token, 0, 4)),
+ }
+ return l
+}
+
+func (l *lexer) Next() Token {
+ if l.err != nil {
+ return Token{Error, l.err.Error()}
+ }
+ if !l.tokens.empty() {
+ return l.tokens.shift()
+ }
+
+ l.fetchItem()
+ return l.Next()
+}
+
+func (l *lexer) peek() (r rune, w int) {
+ if l.pos == len(l.data) {
+ return eof, 0
+ }
+
+ r, w = utf8.DecodeRuneInString(l.data[l.pos:])
+ if r == utf8.RuneError {
+ l.errorf("could not read rune")
+ r = eof
+ w = 0
+ }
+
+ return
+}
+
+func (l *lexer) read() rune {
+ if l.hasRune {
+ l.hasRune = false
+ l.seek(l.lastRuneSize)
+ return l.lastRune
+ }
+
+ r, s := l.peek()
+ l.seek(s)
+
+ l.lastRune = r
+ l.lastRuneSize = s
+
+ return r
+}
+
+func (l *lexer) seek(w int) {
+ l.pos += w
+}
+
+func (l *lexer) unread() {
+ if l.hasRune {
+ l.errorf("could not unread rune")
+ return
+ }
+ l.seek(-l.lastRuneSize)
+ l.hasRune = true
+}
+
+func (l *lexer) errorf(f string, v ...interface{}) {
+ l.err = fmt.Errorf(f, v...)
+}
+
+func (l *lexer) inTerms() bool {
+ return l.termsLevel > 0
+}
+
+func (l *lexer) termsEnter() {
+ l.termsLevel++
+}
+
+func (l *lexer) termsLeave() {
+ l.termsLevel--
+}
+
+var inTextBreakers = []rune{char_single, char_any, char_range_open, char_terms_open}
+var inTermsBreakers = append(inTextBreakers, char_terms_close, char_comma)
+
+func (l *lexer) fetchItem() {
+ r := l.read()
+ switch {
+ case r == eof:
+ l.tokens.push(Token{EOF, ""})
+
+ case r == char_terms_open:
+ l.termsEnter()
+ l.tokens.push(Token{TermsOpen, string(r)})
+
+ case r == char_comma && l.inTerms():
+ l.tokens.push(Token{Separator, string(r)})
+
+ case r == char_terms_close && l.inTerms():
+ l.tokens.push(Token{TermsClose, string(r)})
+ l.termsLeave()
+
+ case r == char_range_open:
+ l.tokens.push(Token{RangeOpen, string(r)})
+ l.fetchRange()
+
+ case r == char_single:
+ l.tokens.push(Token{Single, string(r)})
+
+ case r == char_any:
+ if l.read() == char_any {
+ l.tokens.push(Token{Super, string(r) + string(r)})
+ } else {
+ l.unread()
+ l.tokens.push(Token{Any, string(r)})
+ }
+
+ default:
+ l.unread()
+
+ var breakers []rune
+ if l.inTerms() {
+ breakers = inTermsBreakers
+ } else {
+ breakers = inTextBreakers
+ }
+ l.fetchText(breakers)
+ }
+}
+
+func (l *lexer) fetchRange() {
+ var wantHi bool
+ var wantClose bool
+ var seenNot bool
+ for {
+ r := l.read()
+ if r == eof {
+ l.errorf("unexpected end of input")
+ return
+ }
+
+ if wantClose {
+ if r != char_range_close {
+ l.errorf("expected close range character")
+ } else {
+ l.tokens.push(Token{RangeClose, string(r)})
+ }
+ return
+ }
+
+ if wantHi {
+ l.tokens.push(Token{RangeHi, string(r)})
+ wantClose = true
+ continue
+ }
+
+ if !seenNot && r == char_range_not {
+ l.tokens.push(Token{Not, string(r)})
+ seenNot = true
+ continue
+ }
+
+ if n, w := l.peek(); n == char_range_between {
+ l.seek(w)
+ l.tokens.push(Token{RangeLo, string(r)})
+ l.tokens.push(Token{RangeBetween, string(n)})
+ wantHi = true
+ continue
+ }
+
+ l.unread() // unread first peek and fetch as text
+ l.fetchText([]rune{char_range_close})
+ wantClose = true
+ }
+}
+
+func (l *lexer) fetchText(breakers []rune) {
+ var data []rune
+ var escaped bool
+
+reading:
+ for {
+ r := l.read()
+ if r == eof {
+ break
+ }
+
+ if !escaped {
+ if r == char_escape {
+ escaped = true
+ continue
+ }
+
+ if runes.IndexRune(breakers, r) != -1 {
+ l.unread()
+ break reading
+ }
+ }
+
+ escaped = false
+ data = append(data, r)
+ }
+
+ if len(data) > 0 {
+ l.tokens.push(Token{Text, string(data)})
+ }
+}