Move glob implementation to new crate.

It is isolated and complex enough that it deserves attention all on its own. It's also eminently reusable.
author: Andrew Gallant <jamslam@gmail.com> 2016-09-30 19:42:41 -0400
committer: Andrew Gallant <jamslam@gmail.com> 2016-09-30 19:42:41 -0400
commit: fdf24317ac4c34fc0a21efa0a775bc7d3663d498 (patch)
tree: a6e94a61439b0568bfb7bdb5d80e9a0138c48317 /src
parent: b9d5f22a4d20862dfbcbdfc81a07284719cc71c4 (diff)
4 files changed, 19 insertions, 1188 deletions
diff --git a/src/gitignore.rs b/src/gitignore.rs
index bfc83846..6191f0b5 100644
--- a/src/gitignore.rs
+++ b/src/gitignore.rs
@@ -28,15 +28,15 @@ use std::fs::File;
 use std::io::{self, BufRead};
 use std::path::{Path, PathBuf};
 
+use globset;
 use regex;
 
-use glob;
 use pathutil::{is_file_name, strip_prefix};
 
 /// Represents an error that can occur when parsing a gitignore file.
 #[derive(Debug)]
 pub enum Error {
-    Glob(glob::Error),
+    Glob(globset::Error),
     Regex(regex::Error),
     Io(io::Error),
 }
@@ -61,8 +61,8 @@ impl fmt::Display for Error {
     }
 }
 
-impl From<glob::Error> for Error {
-    fn from(err: glob::Error) -> Error {
+impl From<globset::Error> for Error {
+    fn from(err: globset::Error) -> Error {
         Error::Glob(err)
     }
 }
@@ -82,7 +82,7 @@ impl From<io::Error> for Error {
 /// Gitignore is a matcher for the glob patterns in a single gitignore file.
 #[derive(Clone, Debug)]
 pub struct Gitignore {
-    set: glob::Set,
+    set: globset::Set,
     root: PathBuf,
     patterns: Vec<Pattern>,
     num_ignores: u64,
@@ -207,7 +207,7 @@ impl<'a> Match<'a> {
 /// GitignoreBuilder constructs a matcher for a single set of globs from a
 /// .gitignore file.
 pub struct GitignoreBuilder {
-    builder: glob::SetBuilder,
+    builder: globset::SetBuilder,
     root: PathBuf,
     patterns: Vec<Pattern>,
 }
@@ -237,7 +237,7 @@ impl GitignoreBuilder {
     pub fn new<P: AsRef<Path>>(root: P) -> GitignoreBuilder {
         let root = strip_prefix("./", root.as_ref()).unwrap_or(root.as_ref());
         GitignoreBuilder {
-            builder: glob::SetBuilder::new(),
+            builder: globset::SetBuilder::new(),
             root: root.to_path_buf(),
             patterns: vec![],
         }
@@ -299,7 +299,7 @@ impl GitignoreBuilder {
             whitelist: false,
             only_dir: false,
         };
-        let mut opts = glob::MatchOptions::default();
+        let mut opts = globset::MatchOptions::default();
         let has_slash = line.chars().any(|c| c == '/');
         let is_absolute = line.chars().nth(0).unwrap() == '/';
         if line.starts_with("\\!") || line.starts_with("\\#") {
diff --git a/src/glob.rs b/src/glob.rs
deleted file mode 100644
index 295474cc..00000000
--- a/src/glob.rs
+++ /dev/null
@@ -1,1168 +0,0 @@
-/*!
-The glob module provides standard shell globbing, but is specifically
-implemented by converting glob syntax to regular expressions. The reasoning is
-two fold:
-
-1. The regex library is *really* fast. Regaining performance in a distinct
-   implementation of globbing is non-trivial.
-2. Most crucially, a `RegexSet` can be used to match many globs simultaneously.
-
-This module is written with some amount of intention of eventually splitting it
-out into its own separate crate, but I didn't quite have the energy for all
-that rigamorole when I wrote this. In particular, it could be fast/good enough
-to make its way into `glob` proper.
-*/
-
-// TODO(burntsushi): I'm pretty dismayed by the performance of regex sets
-// here. For example, we do a first pass single-regex-of-all-globs filter
-// before actually running the regex set. This turns out to be faster,
-// especially in fresh checkouts of repos that don't have a lot of ignored
-// files. It's not clear how hard it is to make the regex set faster.
-//
-// An alternative avenue is to stop doing "regex all the things." (Which, to
-// be fair, is pretty fast---I just expected it to be faster.) We could do
-// something clever using assumptions along the lines of "oh, most ignore
-// patterns are either literals or are for ignoring file extensions." (Look
-// at the .gitignore for the chromium repo---just about every pattern satisfies
-// that assumption.)
-
-use std::borrow::Cow;
-use std::collections::HashMap;
-use std::error::Error as StdError;
-use std::ffi::{OsStr, OsString};
-use std::fmt;
-use std::hash;
-use std::iter;
-use std::path::Path;
-use std::str;
-
-use fnv;
-use regex;
-use regex::bytes::Regex;
-
-use pathutil::file_name;
-
-lazy_static! {
-    static ref FILE_SEPARATORS: String = regex::quote(r"/\");
-}
-
-/// Represents an error that can occur when parsing a glob pattern.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub enum Error {
-    InvalidRecursive,
-    UnclosedClass,
-    InvalidRange(char, char),
-    UnopenedAlternates,
-    UnclosedAlternates,
-    NestedAlternates,
-}
-
-impl StdError for Error {
-    fn description(&self) -> &str {
-        match *self {
-            Error::InvalidRecursive => {
-                "invalid use of **; must be one path component"
-            }
-            Error::UnclosedClass => {
-                "unclosed character class; missing ']'"
-            }
-            Error::InvalidRange(_, _) => {
-                "invalid character range"
-            }
-            Error::UnopenedAlternates => {
-                "unopened alternate group; missing '{' \
-                (maybe escape '}' with '[}]'?)"
-            }
-            Error::UnclosedAlternates => {
-                "unclosed alternate group; missing '}' \
-                (maybe escape '{' with '[{]'?)"
-            }
-            Error::NestedAlternates => {
-                "nested alternate groups are not allowed"
-            }
-        }
-    }
-}
-
-impl fmt::Display for Error {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match *self {
-            Error::InvalidRecursive
-            | Error::UnclosedClass
-            | Error::UnopenedAlternates
-            | Error::UnclosedAlternates
-            | Error::NestedAlternates => {
-                write!(f, "{}", self.description())
-            }
-            Error::InvalidRange(s, e) => {
-                write!(f, "invalid range; '{}' > '{}'", s, e)
-            }
-        }
-    }
-}
-
-/// SetYesNo represents a group of globs that can be matched together in a
-/// single pass. SetYesNo can only determine whether a particular path matched
-/// any pattern in the set.
-#[derive(Clone, Debug)]
-pub struct SetYesNo {
-    re: Regex,
-}
-
-impl SetYesNo {
-    /// Returns true if and only if the given path matches at least one glob
-    /// in this set.
-    pub fn is_match<T: AsRef<Path>>(&self, path: T) -> bool {
-        self.re.is_match(&*path_bytes(path.as_ref()))
-    }
-
-    fn new(
-        pats: &[(Pattern, MatchOptions)],
-    ) -> Result<SetYesNo, regex::Error> {
-        let mut joined = String::new();
-        for &(ref p, ref o) in pats {
-            let part = format!("(?:{})", p.to_regex_with(o));
-            if !joined.is_empty() {
-                joined.push('|');
-            }
-            joined.push_str(&part);
-        }
-        Ok(SetYesNo { re: try!(Regex::new(&joined)) })
-    }
-}
-
-type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
-
-/// Set represents a group of globs that can be matched together in a single
-/// pass.
-#[derive(Clone, Debug)]
-pub struct Set {
-    exts: HashMap<OsString, Vec<usize>, Fnv>,
-    literals: HashMap<Vec<u8>, Vec<usize>, Fnv>,
-    base_literals: HashMap<Vec<u8>, Vec<usize>, Fnv>,
-    base_prefixes: Vec<Vec<u8>>,
-    base_prefixes_map: Vec<usize>,
-    base_suffixes: Vec<Vec<u8>>,
-    base_suffixes_map: Vec<usize>,
-    base_regexes: Vec<Regex>,
-    base_regexes_map: Vec<usize>,
-    regexes: Vec<Regex>,
-    regexes_map: Vec<usize>,
-}
-
-impl Set {
-    /// Returns the sequence number of every glob pattern that matches the
-    /// given path.
-    #[allow(dead_code)]
-    pub fn matches<T: AsRef<Path>>(&self, path: T) -> Vec<usize> {
-        let mut into = vec![];
-        self.matches_into(path, &mut into);
-        into
-    }
-
-    /// Adds the sequence number of every glob pattern that matches the given
-    /// path to the vec given.
-    pub fn matches_into<T: AsRef<Path>>(
-        &self,
-        path: T,
-        into: &mut Vec<usize>,
-    ) {
-        into.clear();
-        let path = path.as_ref();
-        let path_bytes = &*path_bytes(path);
-        let basename = file_name(path).map(|b| os_str_bytes(b));
-        if !self.exts.is_empty() {
-            if let Some(ext) = path.extension() {
-                if let Some(matches) = self.exts.get(ext) {
-                    into.extend(matches.as_slice());
-                }
-            }
-        }
-        if !self.literals.is_empty() {
-            if let Some(matches) = self.literals.get(path_bytes) {
-                into.extend(matches.as_slice());
-            }
-        }
-        if !self.base_literals.is_empty() {
-            if let Some(ref basename) = basename {
-                if let Some(matches) = self.base_literals.get(&**basename) {
-                    into.extend(matches.as_slice());
-                }
-            }
-        }
-        if !self.base_prefixes.is_empty() {
-            if let Some(ref basename) = basename {
-                let basename = &**basename;
-                for (i, pre) in self.base_prefixes.iter().enumerate() {
-                    if pre.len() <= basename.len() && &**pre == &basename[0..pre.len()] {
-                        into.push(self.base_prefixes_map[i]);
-                    }
-                }
-            }
-        }
-        if !self.base_suffixes.is_empty() {
-            if let Some(ref basename) = basename {
-                let basename = &**basename;
-                for (i, suf) in self.base_suffixes.iter().enumerate() {
-                    if suf.len() > basename.len() {
-                        continue;
-                    }
-                    let (s, e) = (basename.len() - suf.len(), basename.len());
-                    if &**suf == &basename[s..e] {
-                        into.push(self.base_suffixes_map[i]);
-                    }
-                }
-            }
-        }
-        if let Some(ref basename) = basename {
-            for (i, re) in self.base_regexes.iter().enumerate() {
-                if re.is_match(&**basename) {
-                    into.push(self.base_regexes_map[i]);
-                }
-            }
-        }
-        for (i, re) in self.regexes.iter().enumerate() {
-            if re.is_match(path_bytes) {
-                into.push(self.regexes_map[i]);
-            }
-        }
-        into.sort();
-    }
-
-    fn new(pats: &[(Pattern, MatchOptions)]) -> Result<Set, regex::Error> {
-        let fnv = Fnv::default();
-        let mut exts = HashMap::with_hasher(fnv.clone());
-        let mut literals = HashMap::with_hasher(fnv.clone());
-        let mut base_literals = HashMap::with_hasher(fnv.clone());
-        let (mut base_prefixes, mut base_prefixes_map) = (vec![], vec![]);
-        let (mut base_suffixes, mut base_suffixes_map) = (vec![], vec![]);
-        let (mut regexes, mut regexes_map) = (vec![], vec![]);
-        let (mut base_regexes, mut base_regexes_map) = (vec![], vec![]);
-        for (i, &(ref p, ref o)) in pats.iter().enumerate() {
-            if let Some(ext) = p.ext() {
-                exts.entry(ext).or_insert(vec![]).push(i);
-            } else if let Some(literal) = p.literal() {
-                literals.entry(literal.into_bytes()).or_insert(vec![]).push(i);
-            } else if let Some(literal) = p.base_literal() {
-                base_literals
-                    .entry(literal.into_bytes()).or_insert(vec![]).push(i);
-            } else if let Some(literal) = p.base_literal_prefix() {
-                base_prefixes.push(literal.into_bytes());
-                base_prefixes_map.push(i);
-            } else if let Some(literal) = p.base_literal_suffix() {
-                base_suffixes.push(literal.into_bytes());
-                base_suffixes_map.push(i);
-            } else if p.is_only_basename() {
-                base_regexes.push(try!(Regex::new(&p.to_regex_with(o))));
-                base_regexes_map.push(i);
-            } else {
-                regexes.push(try!(Regex::new(&p.to_regex_with(o))));
-                regexes_map.push(i);
-            }
-        }
-        Ok(Set {
-            exts: exts,
-            literals: literals,
-            base_literals: base_literals,
-            base_prefixes: base_prefixes,
-            base_prefixes_map: base_prefixes_map,
-            base_suffixes: base_suffixes,
-            base_suffixes_map: base_suffixes_map,
-            base_regexes: base_regexes,
-            base_regexes_map: base_regexes_map,
-            regexes: regexes,
-            regexes_map: regexes_map,
-        })
-    }
-}
-
-/// SetBuilder builds a group of patterns that can be used to simultaneously
-/// match a file path.
-pub struct SetBuilder {
-    pats: Vec<(Pattern, MatchOptions)>,
-}
-
-impl SetBuilder {
-    /// Create a new SetBuilder. A SetBuilder can be used to add new patterns.
-    /// Once all patterns have been added, `build` should be called to produce
-    /// a `Set`, which can then be used for matching.
-    pub fn new() -> SetBuilder {
-        SetBuilder { pats: vec![] }
-    }
-
-    /// Builds a new matcher from all of the glob patterns added so far.
-    ///
-    /// Once a matcher is built, no new patterns can be added to it.
-    pub fn build(&self) -> Result<Set, regex::Error> {
-        Set::new(&self.pats)
-    }
-
-    /// Like `build`, but returns a matcher that can only answer yes/no.
-    pub fn build_yesno(&self) -> Result<SetYesNo, regex::Error> {
-        SetYesNo::new(&self.pats)
-    }
-
-    /// Add a new pattern to this set.
-    ///
-    /// If the pattern could not be parsed as a glob, then an error is
-    /// returned.
-    #[allow(dead_code)]
-    pub fn add(&mut self, pat: &str) -> Result<(), Error> {
-        self.add_with(pat, &MatchOptions::default())
-    }
-
-    /// Like add, but sets the match options for this particular pattern.
-    pub fn add_with(
-        &mut self,
-        pat: &str,
-        opts: &MatchOptions,
-    ) -> Result<(), Error> {
-        let parsed = try!(Pattern::new(pat));
-        // if let Some(ext) = parsed.ext() {
-            // eprintln!("ext :: {:?} :: {:?}", ext, pat);
-        // } else if let Some(lit) = parsed.literal() {
-            // eprintln!("literal :: {:?} :: {:?}", lit, pat);
-        // } else if let Some(lit) = parsed.base_literal() {
-            // eprintln!("base_literal :: {:?} :: {:?}", lit, pat);
-        // } else if let Some(lit) = parsed.base_literal_prefix() {
-            // eprintln!("base_literal_prefix :: {:?} :: {:?}", lit, pat);
-        // } else if let Some(lit) = parsed.base_literal_suffix() {
-            // eprintln!("base_literal_suffix :: {:?} :: {:?}", lit, pat);
-        // } else if parsed.is_only_basename() {
-            // eprintln!("basename-regex :: {:?} :: {:?}", pat, parsed);
-        // } else {
-            // eprintln!("regex :: {:?} :: {:?}", pat, parsed);
-        // }
-        self.pats.push((parsed, opts.clone()));
-        Ok(())
-    }
-}
-
-/// Pattern represents a successfully parsed shell glob pattern.
-///
-/// It cannot be used directly to match file paths, but it can be converted
-/// to a regular expression string.
-#[derive(Clone, Debug, Default, Eq, PartialEq)]
-pub struct Pattern {
-    tokens: Vec<Token>,
-}
-
-/// Options to control the matching semantics of a glob. The default value
-/// has all options disabled.
-#[derive(Clone, Debug, Default)]
-pub struct MatchOptions {
-    /// When true, matching is done case insensitively.
-    pub case_insensitive: bool,
-    /// When true, neither `*` nor `?` match the current system's path
-    /// separator.
-    pub require_literal_separator: bool,
-}
-
-#[derive(Clone, Debug, Eq, PartialEq)]
-enum Token {
-    Literal(char),
-    Any,
-    ZeroOrMore,
-    RecursivePrefix,
-    RecursiveSuffix,
-    RecursiveZeroOrMore,
-    Class {
-        negated: bool,
-        ranges: Vec<(char, char)>,
-    },
-    Alternates(Vec<Pattern>),
-}
-
-impl Pattern {
-    /// Parse a shell glob pattern.
-    ///
-    /// If the pattern is not a valid glob, then an error is returned.
-    pub fn new(pat: &str) -> Result<Pattern, Error> {
-        let mut p = Parser {
-            stack: vec![Pattern::default()],
-            chars: pat.chars().peekable(),
-            prev: None,
-            cur: None,
-        };
-        try!(p.parse());
-        if p.stack.is_empty() {
-            Err(Error::UnopenedAlternates)
-        } else if p.stack.len() > 1 {
-            Err(Error::UnclosedAlternates)
-        } else {
-            Ok(p.stack.pop().unwrap())
-        }
-    }
-
-    /// Returns an extension if this pattern exclusively matches it.
-    pub fn ext(&self) -> Option<OsString> {
-        if self.tokens.len() <= 3 {
-            return None;
-        }
-        match self.tokens.get(0) {
-            Some(&Token::RecursivePrefix) => {}
-            _ => return None,
-        }
-        match self.tokens.get(1) {
-            Some(&Token::ZeroOrMore) => {}
-            _ => return None,
-        }
-        match self.tokens.get(2) {
-            Some(&Token::Literal(c)) if c == '.' => {}
-            _ => return None,
-        }
-        let mut lit = OsString::new();
-        for t in self.tokens[3..].iter() {
-            match *t {
-                Token::Literal(c) if c == '/' || c == '\\' || c == '.' => {
-                    return None;
-                }
-                Token::Literal(c) => lit.push(c.to_string()),
-                _ => return None,
-            }
-        }
-        Some(lit)
-    }
-
-    /// Returns the pattern as a literal if and only if the pattern exclusiely
-    /// matches the basename of a file path *and* is a literal.
-    ///
-    /// The basic format of these patterns is `**/{literal}`, where `{literal}`
-    /// does not contain a path separator.
-    pub fn base_literal(&self) -> Option<String> {
-        match self.tokens.get(0) {
-            Some(&Token::RecursivePrefix) => {}
-            _ => return None,
-        }
-        let mut lit = String::new();
-        for t in &self.tokens[1..] {
-            match *t {
-                Token::Literal(c) if c == '/' || c == '\\' => return None,
-                Token::Literal(c) => lit.push(c),
-                _ => return None,
-            }
-        }
-        Some(lit)
-    }
-
-    /// Returns true if and only if this pattern only inspects the basename
-    /// of a path.
-    pub fn is_only_basename(&self) -> bool {
-        match self.tokens.get(0) {
-            Some(&Token::RecursivePrefix) => {}
-            _ => return false,
-        }
-        for t in &self.tokens[1..] {
-            match *t {
-                Token::Literal(c) if c == '/' || c == '\\' => return false,
-                Token::RecursivePrefix
-                | Token::RecursiveSuffix
-                | Token::RecursiveZeroOrMore => return false,
-                _ => {}
-            }
-        }
-        true
-    }
-
-    /// Returns the pattern as a literal if and only if the pattern must match
-    /// an entire path exactly.
-    ///
-    /// The basic format of these patterns is `{literal}`.
-    pub fn literal(&self) -> Option<String> {
-        let mut lit = String::new();
-        for t in &self.tokens {
-            match *t {
-                Token::Literal(c) => lit.push(c),
-                _ => return None,
-            }
-        }
-        Some(lit)
-    }
-
-    /// Returns a basename literal prefix of this pattern.
-    pub fn base_literal_prefix(&self) -> Option<String> {
-        match self.tokens.get(0) {
-            Some(&Token::RecursivePrefix) => {}
-            _ => return None,
-        }
-        match self.tokens.last() {
-            Some(&Token::ZeroOrMore) => {}
-            _ => return None,
-        }
-        let mut lit = String::new();
-        for t in &self.tokens[1..self.tokens.len()-1] {
-            match *t {
-                Token::Literal(c) if c == '/' || c == '\\' => return None,
-                Token::Literal(c) => lit.push(c),
-                _ => return None,
-            }
-        }
-        Some(lit)
-    }
-
-    /// Returns a basename literal suffix of this pattern.
-    pub fn base_literal_suffix(&self) -> Option<String> {
-        match self.tokens.get(0) {
-            Some(&Token::RecursivePrefix) => {}
-            _ => return None,
-        }
-        match self.tokens.get(1) {
-            Some(&Token::ZeroOrMore) => {}
-            _ => return None,
-        }
-        let mut lit = String::new();
-        for t in &self.tokens[2..] {
-            match *t {
-                Token::Literal(c) if c == '/' || c == '\\' => return None,
-                Token::Literal(c) => lit.push(c),
-                _ => return None,
-            }
-        }
-        Some(lit)
-    }
-
-    /// Convert this pattern to a string that is guaranteed to be a valid
-    /// regular expression and will represent the matching semantics of this
-    /// glob pattern. This uses a default set of options.
-    #[allow(dead_code)]
-    pub fn to_regex(&self) -> String {
-        self.to_regex_with(&MatchOptions::default())
-    }
-
-    /// Convert this pattern to a string that is guaranteed to be a valid
-    /// regular expression and will represent the matching semantics of this
-    /// glob pattern and the options given.
-    pub fn to_regex_with(&self, options: &MatchOptions) -> String {
-        let mut re = String::new();
-        re.push_str("(?-u)");
-        if options.case_insensitive {
-            re.push_str("(?i)");
-        }
-        re.push('^');
-        // Special case. If the entire glob is just `**`, then it should match
-        // everything.
-        if self.tokens.len() == 1 && self.tokens[0] == Token::RecursivePrefix {
-            re.push_str(".*");
-            re.push('$');
-            return re;
-        }
-        self.tokens_to_regex(options, &self.tokens, &mut re);
-        re.push('$');
-        re
-    }
-
-    fn tokens_to_regex(
-        &self,
-        options: &MatchOptions,
-        tokens: &[Token],
-        re: &mut String,
-    ) {
-        let seps = &*FILE_SEPARATORS;
-
-        for tok in tokens {
-            match *tok {
-                Token::Literal(c) => {
-                    re.push_str(&regex::quote(&c.to_string()));
-                }
-                Token::Any => {
-                    if options.require_literal_separator {
-                        re.push_str(&format!("[^{}]", seps));
-                    } else {
-                        re.push_str(".");
-                    }
-                }
-                Token::ZeroOrMore => {
-                    if options.require_literal_separator {
-                        re.push_str(&format!("[^{}]*", seps));
-                    } else {
-                        re.push_str(".*");
-                    }
-                }
-                Token::RecursivePrefix => {
-                    re.push_str(&format!("(?:[{sep}]?|.*[{sep}])", sep=seps));
-                }
-                Token::RecursiveSuffix => {
-                    re.push_str(&format!("(?:[{sep}]?|[{sep}].*)", sep=seps));
-                }
-                Token::RecursiveZeroOrMore => {
-                    re.push_str(&format!("(?:[{sep}]|[{sep}].*[{sep}])",
-                                         sep=seps));
-                }
-                Token::Class { negated, ref ranges } => {
-                    re.push('[');
-                    if negated {
-                        re.push('^');
-                    }
-                    for r in ranges {
-                        if r.0 == r.1 {
-                            // Not strictly necessary, but nicer to look at.
-                            re.push_str(&regex::quote(&r.0.to_string()));
-                        } else {
-                            re.push_str(&regex::quote(&r.0.to_string()));
-                            re.push('-');
-                            re.push_str(&regex::quote(&r.1.to_string()));
-                        }
-                    }
-                    re.push(']');
-                }
-                Token::Alternates(ref patterns) => {
-                    let mut parts = vec![];
-                    for pat in patterns {
-                        let mut altre = String::new();
-                        self.tokens_to_regex(options, &pat.tokens, &mut altre);
-                        parts.push(altre);
-                    }
-                    re.push_str(&parts.join("|"));
-                }
-            }
-        }
-    }
-}
-
-struct Parser<'a> {
-    stack: Vec<Pattern>,
-    chars: iter::Peekable<str::Chars<'a>>,
-    prev: Option<char>,
-    cur: Option<char>,
-}
-
-impl<'a> Parser<'a> {
-    fn parse(&mut self) -> Result<(), Error> {
-        while let Some(c) = self.bump() {
-            match c {
-                '?' => try!(self.push_token(Token::Any)),
-                '*' => try!(self.parse_star()),
-                '[' => try!(self.parse_class()),
-                '{' => try!(self.push_alternate()),
-                '}' => try!(self.pop_alternate()),
-                ',' => try!(self.parse_comma()),
-                c => try!(self.push_token(Token::Literal(c))),
-            }
-        }
-        Ok(())
-    }
-
-    fn push_alternate(&mut self) -> Result<(), Error> {
-        if self.stack.len() > 1 {
-            return Err(Error::NestedAlternates);
-        }
-        Ok(self.stack.push(Pattern::default()))
-    }
-
-    fn pop_alternate(&mut self) -> Result<(), Error> {
-        let mut alts = vec![];
-        while self.stack.len() >= 2 {
-            alts.push(self.stack.pop().unwrap());
-        }
-        self.push_token(Token::Alternates(alts))
-    }
-
-    fn push_token(&mut self, tok: Token) -> Result<(), Error> {
-        match self.stack.last_mut() {
-            None => Err(Error::UnopenedAlternates),
-            Some(ref mut pat) => Ok(pat.tokens.push(tok)),
-        }
-    }
-
-    fn pop_token(&mut self) -> Result<Token, Error> {
-        match self.stack.last_mut() {
-            None => Err(Error::UnopenedAlternates),
-            Some(ref mut pat) => Ok(pat.tokens.pop().unwrap()),
-        }
-    }
-
-    fn have_tokens(&self) -> Result<bool, Error> {
-        match self.stack.last() {
-            None => Err(Error::UnopenedAlternates),
-            Some(ref pat) => Ok(!pat.tokens.is_empty()),
-        }
-    }
-
-    fn parse_comma(&mut self) -> Result<(), Error> {
-        // If we aren't inside a group alternation, then don't
-        // treat commas specially. Otherwise, we need to start
-        // a new alternate.
-        if self.stack.len() <= 1 {
-            self.push_token(Token::Literal(','))
-        } else {
-            Ok(self.stack.push(Pattern::default()))
-        }
-    }
-
-    fn parse_star(&mut self) -> Result<(), Error> {
-        let prev = self.prev;
-        if self.chars.peek() != Some(&'*') {
-            try!(self.push_token(Token::ZeroOrMore));
-            return Ok(());
-        }
-        assert!(self.bump() == Some('*'));
-        if !try!(self.have_tokens()) {
-            try!(self.push_token(Token::RecursivePrefix));
-            let next = self.bump();
-            if !next.is_none() && next != Some('/') {
-                return Err(Error::InvalidRecursive);
-            }
-            return Ok(());
-        }
-        try!(self.pop_token());
-        if prev != Some('/') {
-            if self.stack.len() <= 1
-                || (prev != Some(',') && prev != Some('{')) {
-                return Err(Error::InvalidRecursive);
-            }
-        }
-        match self.chars.peek() {
-            None => {
-                assert!(self.bump().is_none());
-                self.push_token(Token::RecursiveSuffix)
-            }
-            Some(&',') | Some(&'}') if self.stack.len() >= 2 => {
-                self.push_token(Token::RecursiveSuffix)
-            }
-            Some(&'/') => {
-                assert!(self.bump() == Some('/'));
-                self.push_token(Token::RecursiveZeroOrMore)
-            }
-            _ => Err(Error::InvalidRecursive),
-        }
-    }
-
-    fn parse_class(&mut self) -> Result<(), Error> {
-        fn add_to_last_range(
-            r: &mut (char, char),
-            add: char,
-        ) -> Result<(), Error> {
-            r.1 = add;
-            if r.1 < r.0 {
-                Err(Error::InvalidRange(r.0, r.1))
-            } else {
-                Ok(())
-            }
-        }
-        let mut negated = false;
-        let mut ranges = vec![];
-        if self.chars.peek() == Some(&'!') {
-            assert!(self.bump() == Some('!'));
-            negated = true;
-        }
-        let mut first = true;
-        let mut in_range = false;
-        loop {
-            let c = match self.bump() {
-                Some(c) => c,
-                // The only way to successfully break this loop is to observe
-                // a ']'.
-                None => return Err(Error::UnclosedClass),
-            };
-            match c {
-                ']' => {
-                    if first {
-                        ranges.push((']', ']'));
-                    } else {
-                        break;
-                    }
-                }
-                '-' => {
-                    if first {
-                        ranges.push(('-', '-'));
-                    } else if in_range {
-                        // invariant: in_range is only set when there is
-                        // already at least one character seen.
-                        let r = ranges.last_mut().unwrap();
-                        try!(add_to_last_range(r, '-'));
-                        in_range = false;
-                    } else {
-                        assert!(!ranges.is_empty());
-                        in_range = true;
-                    }
-                }
-                c => {
-                    if in_range {
-                        // invariant: in_range is only set when there is
-                        // already at least one character seen.
-                        try!(add_to_last_range(ranges.last_mut().unwrap(), c));
-                    } else {
-                        ranges.push((c, c));
-                    }
-                    in_range = false;
-                }
-            }
-            first = false;
-        }
-        if in_range {
-            // Means that the last character in the class was a '-', so add
-            // it as a literal.
-            ranges.push(('-', '-'));
-        }
-        self.push_token(Token::Class {
-            negated: negated,
-            ranges: ranges,
-        })
-    }
-
-    fn bump(&mut self) -> Option<char> {
-        self.prev = self.cur;
-        self.cur = self.chars.next();
-        self.cur
-    }
-}
-
-fn path_bytes(path: &Path) -> Cow<[u8]> {
-    os_str_bytes(path.as_os_str())
-}
-
-#[cfg(unix)]
-fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
-    use std::os::unix::ffi::OsStrExt;
-    Cow::Borrowed(s.as_bytes())
-}
-
-#[cfg(not(unix))]
-fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
-    // TODO(burntsushi): On Windows, OS strings are probably UTF-16, so even
-    // if we could get at the raw bytes, they wouldn't be useful. We *must*
-    // convert to UTF-8 before doing path matching. Unfortunate, but necessary.
-    match s.to_string_lossy() {
-        Cow::Owned(s) => Cow::Owned(s.into_bytes()),
-        Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::path::Path;
-
-    use regex::bytes::Regex;
-
-    use super::{Error, Pattern, MatchOptions, Set, SetBuilder, Token};
-    use super::Token::*;
-
-    macro_rules! syntax {
-        ($name:ident, $pat:expr, $tokens:expr) => {
-            #[test]
-            fn $name() {
-                let pat = Pattern::new($pat).unwrap();
-                assert_eq!($tokens, pat.tokens);
-            }
-        }
-    }
-
-    macro_rules! syntaxerr {
-        ($name:ident, $pat:expr, $err:expr) => {
-            #[test]
-            fn $name() {
-                let err = Pattern::new($pat).unwrap_err();
-                assert_eq!($err, err);
-            }
-        }
-    }
-
-    macro_rules! toregex {
-        ($name:ident, $pat:expr, $re:expr) => {
-            toregex!($name, $pat, $re, MatchOptions::default());
-        };
-        ($name:ident, $pat:expr, $re:expr, $options:expr) => {
-            #[test]
-            fn $name() {
-                let pat = Pattern::new($pat).unwrap();
-                assert_eq!(
-                    format!("(?-u){}", $re), pat.to_regex_with(&$options));
-            }
-        };
-    }
-
-    macro_rules! matches {
-        ($name:ident, $pat:expr, $path:expr) => {
-            matches!($name, $pat, $path, MatchOptions::default());
-        };
-        ($name:ident, $pat:expr, $path:expr, $options:expr) => {
-            #[test]
-            fn $name() {
-                let pat = Pattern::new($pat).unwrap();
-                let path = &Path::new($path).to_str().unwrap();
-                let re = Regex::new(&pat.to_regex_with(&$options)).unwrap();
-                assert!(re.is_match(path.as_bytes()));
-            }
-        };
-    }
-
-    macro_rules! nmatches {
-        ($name:ident, $pat:expr, $path:expr) => {
-            nmatches!($name, $pat, $path, MatchOptions::default());
-        };
-        ($name:ident, $pat:expr, $path:expr, $options:expr) => {
-            #[test]
-            fn $name() {
-                let pat = Pattern::new($pat).unwrap();
-                let path = &Path::new($path).to_str().unwrap();
-                let re = Regex::new(&pat.to_regex_with(&$options)).unwrap();
-                assert!(!re.is_match(path.as_bytes()));
-            }
-        };
-    }
-
-    macro_rules! ext {
-        ($name:ident, $pat:expr, $ext:expr) => {
-            #[test]
-            fn $name() {
-                let pat = Pattern::new($pat).unw
author	Andrew Gallant <jamslam@gmail.com>	2016-09-30 19:42:41 -0400
committer	Andrew Gallant <jamslam@gmail.com>	2016-09-30 19:42:41 -0400
commit	fdf24317ac4c34fc0a21efa0a775bc7d3663d498 (patch)
tree	a6e94a61439b0568bfb7bdb5d80e9a0138c48317 /src
parent	b9d5f22a4d20862dfbcbdfc81a07284719cc71c4 (diff)