diff options
-rw-r--r-- | Cargo.lock | 2 | ||||
-rw-r--r-- | globset/Cargo.toml | 2 | ||||
-rw-r--r-- | globset/src/lib.rs | 1270 | ||||
-rw-r--r-- | globset/src/pathutil.rs | 96 | ||||
-rw-r--r-- | globset/src/pattern.rs | 1379 | ||||
-rw-r--r-- | src/gitignore.rs | 24 | ||||
-rw-r--r-- | src/types.rs | 31 | ||||
-rw-r--r-- | tests/tests.rs | 1 |
8 files changed, 1884 insertions, 921 deletions
@@ -82,8 +82,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" name = "globset" version = "0.1.0" dependencies = [ + "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", "fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", ] diff --git a/globset/Cargo.toml b/globset/Cargo.toml index 48e375fb..cf63f397 100644 --- a/globset/Cargo.toml +++ b/globset/Cargo.toml @@ -4,7 +4,9 @@ version = "0.1.0" authors = ["Andrew Gallant <jamslam@gmail.com>"] [dependencies] +aho-corasick = "0.5.3" fnv = "1.0" lazy_static = "0.2" +log = "0.3" memchr = "0.1" regex = "0.1.77" diff --git a/globset/src/lib.rs b/globset/src/lib.rs index b5cbb5be..f608a74a 100644 --- a/globset/src/lib.rs +++ b/globset/src/lib.rs @@ -13,40 +13,42 @@ that rigamorole when I wrote this. In particular, it could be fast/good enough to make its way into `glob` proper. */ -// TODO(burntsushi): I'm pretty dismayed by the performance of regex sets -// here. For example, we do a first pass single-regex-of-all-globs filter -// before actually running the regex set. This turns out to be faster, -// especially in fresh checkouts of repos that don't have a lot of ignored -// files. It's not clear how hard it is to make the regex set faster. -// -// An alternative avenue is to stop doing "regex all the things." (Which, to -// be fair, is pretty fast---I just expected it to be faster.) We could do -// something clever using assumptions along the lines of "oh, most ignore -// patterns are either literals or are for ignoring file extensions." (Look -// at the .gitignore for the chromium repo---just about every pattern satisfies -// that assumption.) +#![deny(missing_docs)] +extern crate aho_corasick; extern crate fnv; #[macro_use] extern crate lazy_static; +#[macro_use] +extern crate log; extern crate memchr; extern crate regex; use std::borrow::Cow; -use std::collections::HashMap; +use std::collections::{BTreeMap, HashMap}; use std::error::Error as StdError; use std::ffi::{OsStr, OsString}; use std::fmt; use std::hash; -use std::iter; use std::path::Path; use std::str; -use regex::bytes::Regex; +use aho_corasick::{Automaton, AcAutomaton, FullAcAutomaton}; +use regex::bytes::{Regex, RegexBuilder, RegexSet}; -use pathutil::file_name; +use pathutil::{file_name, file_name_ext, os_str_bytes, path_bytes}; +use pattern::MatchStrategy; +pub use pattern::{Pattern, PatternBuilder, PatternMatcher}; mod pathutil; +mod pattern; + +macro_rules! eprintln { + ($($tt:tt)*) => {{ + use std::io::Write; + let _ = writeln!(&mut ::std::io::stderr(), $($tt)*); + }} +} lazy_static! { static ref FILE_SEPARATORS: String = regex::quote(r"/\"); @@ -55,12 +57,24 @@ lazy_static! { /// Represents an error that can occur when parsing a glob pattern. #[derive(Clone, Debug, Eq, PartialEq)] pub enum Error { + /// Occurs when a use of `**` is invalid. Namely, `**` can only appear + /// adjacent to a path separator, or the beginning/end of a glob. InvalidRecursive, + /// Occurs when a character class (e.g., `[abc]`) is not closed. UnclosedClass, + /// Occurs when a range in a character (e.g., `[a-z]`) is invalid. For + /// example, if the range starts with a lexicographically larger character + /// than it ends with. InvalidRange(char, char), + /// Occurs when a `}` is found without a matching `{`. UnopenedAlternates, + /// Occurs when a `{` is found without a matching `}`. UnclosedAlternates, + /// Occurs when an alternating group is nested inside another alternating + /// group, e.g., `{{a,b},{c,d}}`. NestedAlternates, + /// An error associated with parsing or compiling a regex. + Regex(String), } impl StdError for Error { @@ -86,6 +100,7 @@ impl StdError for Error { Error::NestedAlternates => { "nested alternate groups are not allowed" } + Error::Regex(ref err) => err, } } } @@ -97,7 +112,8 @@ impl fmt::Display for Error { | Error::UnclosedClass | Error::UnopenedAlternates | Error::UnclosedAlternates - | Error::NestedAlternates => { + | Error::NestedAlternates + | Error::Regex(_) => { write!(f, "{}", self.description()) } Error::InvalidRange(s, e) => { @@ -107,34 +123,18 @@ impl fmt::Display for Error { } } -/// SetYesNo represents a group of globs that can be matched together in a -/// single pass. SetYesNo can only determine whether a particular path matched -/// any pattern in the set. -#[derive(Clone, Debug)] -pub struct SetYesNo { - re: Regex, +fn new_regex(pat: &str) -> Result<Regex, Error> { + RegexBuilder::new(pat) + .dot_matches_new_line(true) + .size_limit(10 * (1 << 20)) + .dfa_size_limit(10 * (1 << 20)) + .compile() + .map_err(|err| Error::Regex(err.to_string())) } -impl SetYesNo { - /// Returns true if and only if the given path matches at least one glob - /// in this set. - pub fn is_match<T: AsRef<Path>>(&self, path: T) -> bool { - self.re.is_match(&*path_bytes(path.as_ref())) - } - - fn new( - pats: &[(Pattern, MatchOptions)], - ) -> Result<SetYesNo, regex::Error> { - let mut joined = String::new(); - for &(ref p, ref o) in pats { - let part = format!("(?:{})", p.to_regex_with(o)); - if !joined.is_empty() { - joined.push('|'); - } - joined.push_str(&part); - } - Ok(SetYesNo { re: try!(Regex::new(&joined)) }) - } +fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error> + where S: AsRef<str>, I: IntoIterator<Item=S> { + RegexSet::new(pats).map_err(|err| Error::Regex(err.to_string())) } type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>; @@ -143,20 +143,21 @@ type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>; /// pass. #[derive(Clone, Debug)] pub struct Set { - exts: HashMap<OsString, Vec<usize>, Fnv>, - literals: HashMap<Vec<u8>, Vec<usize>, Fnv>, - base_literals: HashMap<Vec<u8>, Vec<usize>, Fnv>, - base_prefixes: Vec<Vec<u8>>, - base_prefixes_map: Vec<usize>, - base_suffixes: Vec<Vec<u8>>, - base_suffixes_map: Vec<usize>, - base_regexes: Vec<Regex>, - base_regexes_map: Vec<usize>, - regexes: Vec<Regex>, - regexes_map: Vec<usize>, + strats: Vec<SetMatchStrategy>, } impl Set { + /// Returns true if any glob in this set matches the path given. + pub fn is_match<T: AsRef<Path>>(&self, path: T) -> bool { + let candidate = Candidate::new(path.as_ref()); + for strat in &self.strats { + if strat.is_match(&candidate) { + return true; + } + } + false + } + /// Returns the sequence number of every glob pattern that matches the /// given path. #[allow(dead_code)] @@ -174,110 +175,67 @@ impl Set { into: &mut Vec<usize>, ) { into.clear(); - let path = path.as_ref(); - let path_bytes = &*path_bytes(path); - let basename = file_name(path).map(|b| os_str_bytes(b)); - if !self.exts.is_empty() { - if let Some(ext) = path.extension() { - if let Some(matches) = self.exts.get(ext) { - into.extend(matches.as_slice()); - } - } + let candidate = Candidate::new(path.as_ref()); + for strat in &self.strats { + strat.matches_into(&candidate, into); } - if !self.literals.is_empty() { - if let Some(matches) = self.literals.get(path_bytes) { - into.extend(matches.as_slice()); - } - } - if !self.base_literals.is_empty() { - if let Some(ref basename) = basename { - if let Some(matches) = self.base_literals.get(&**basename) { - into.extend(matches.as_slice()); + into.sort(); + into.dedup(); + } + + fn new(pats: &[Pattern]) -> Result<Set, Error> { + let mut lits = LiteralStrategy::new(); + let mut base_lits = BasenameLiteralStrategy::new(); + let mut exts = ExtensionStrategy::new(); + let mut prefixes = MultiStrategyBuilder::new(); + let mut suffixes = MultiStrategyBuilder::new(); + let mut required_exts = RequiredExtensionStrategyBuilder::new(); + let mut regexes = MultiStrategyBuilder::new(); + for (i, p) in pats.iter().enumerate() { + match MatchStrategy::new(p) { + MatchStrategy::Literal(lit) => { + lits.add(i, lit); } - } - } - if !self.base_prefixes.is_empty() { - if let Some(ref basename) = basename { - let basename = &**basename; - for (i, pre) in self.base_prefixes.iter().enumerate() { - if pre.len() <= basename.len() && &**pre == &basename[0..pre.len()] { - into.push(self.base_prefixes_map[i]); - } + MatchStrategy::BasenameLiteral(lit) => { + base_lits.add(i, lit); } - } - } - if !self.base_suffixes.is_empty() { - if let Some(ref basename) = basename { - let basename = &**basename; - for (i, suf) in self.base_suffixes.iter().enumerate() { - if suf.len() > basename.len() { - continue; - } - let (s, e) = (basename.len() - suf.len(), basename.len()); - if &**suf == &basename[s..e] { - into.push(self.base_suffixes_map[i]); + MatchStrategy::Extension(ext) => { + exts.add(i, ext); + } + MatchStrategy::Prefix(prefix) => { + prefixes.add(i, prefix); + } + MatchStrategy::Suffix { suffix, component } => { + if component { + lits.add(i, suffix[1..].to_string()); } + suffixes.add(i, suffix); } - } - } - if let Some(ref basename) = basename { - for (i, re) in self.base_regexes.iter().enumerate() { - if re.is_match(&**basename) { - into.push(self.base_regexes_map[i]); + MatchStrategy::RequiredExtension(ext) => { + required_exts.add(i, ext, p.regex().to_owned()); + } + MatchStrategy::Regex => { + debug!("glob converted to regex: {:?}", p); + regexes.add(i, p.regex().to_owned()); } } } - for (i, re) in self.regexes.iter().enumerate() { - if re.is_match(path_bytes) { - into.push(self.regexes_map[i]); - } - } - into.sort(); - } - - fn new(pats: &[(Pattern, MatchOptions)]) -> Result<Set, regex::Error> { - let fnv = Fnv::default(); - let mut exts = HashMap::with_hasher(fnv.clone()); - let mut literals = HashMap::with_hasher(fnv.clone()); - let mut base_literals = HashMap::with_hasher(fnv.clone()); - let (mut base_prefixes, mut base_prefixes_map) = (vec![], vec![]); - let (mut base_suffixes, mut base_suffixes_map) = (vec![], vec![]); - let (mut regexes, mut regexes_map) = (vec![], vec![]); - let (mut base_regexes, mut base_regexes_map) = (vec![], vec![]); - for (i, &(ref p, ref o)) in pats.iter().enumerate() { - if let Some(ext) = p.ext() { - exts.entry(ext).or_insert(vec![]).push(i); - } else if let Some(literal) = p.literal() { - literals.entry(literal.into_bytes()).or_insert(vec![]).push(i); - } else if let Some(literal) = p.base_literal() { - base_literals - .entry(literal.into_bytes()).or_insert(vec![]).push(i); - } else if let Some(literal) = p.base_literal_prefix() { - base_prefixes.push(literal.into_bytes()); - base_prefixes_map.push(i); - } else if let Some(literal) = p.base_literal_suffix() { - base_suffixes.push(literal.into_bytes()); - base_suffixes_map.push(i); - } else if p.is_only_basename() { - base_regexes.push(try!(Regex::new(&p.to_regex_with(o)))); - base_regexes_map.push(i); - } else { - regexes.push(try!(Regex::new(&p.to_regex_with(o)))); - regexes_map.push(i); - } - } + debug!("built glob set; {} literals, {} basenames, {} extensions, \ + {} prefixes, {} suffixes, {} required extensions, {} regexes", + lits.0.len(), base_lits.0.len(), exts.0.len(), + prefixes.literals.len(), suffixes.literals.len(), + required_exts.0.len(), regexes.literals.len()); Ok(Set { - exts: exts, - literals: literals, - base_literals: base_literals, - base_prefixes: base_prefixes, - base_prefixes_map: base_prefixes_map, - base_suffixes: base_suffixes, - base_suffixes_map: base_suffixes_map, - base_regexes: base_regexes, - base_regexes_map: base_regexes_map, - regexes: regexes, - regexes_map: regexes_map, + strats: vec![ + SetMatchStrategy::Extension(exts), + SetMatchStrategy::BasenameLiteral(base_lits), + SetMatchStrategy::Literal(lits), + SetMatchStrategy::Suffix(suffixes.suffix()), + SetMatchStrategy::Prefix(prefixes.prefix()), + SetMatchStrategy::RequiredExtension( + try!(required_exts.build())), + SetMatchStrategy::Regex(try!(regexes.regex_set())), + ], }) } } @@ -285,7 +243,7 @@ impl Set { /// SetBuilder builds a group of patterns that can be used to simultaneously /// match a file path. pub struct SetBuilder { - pats: Vec<(Pattern, MatchOptions)>, + pats: Vec<Pattern>, } impl SetBuilder { @@ -299,858 +257,374 @@ impl SetBuilder { /// Builds a new matcher from all of the glob patterns added so far. /// /// Once a matcher is built, no new patterns can be added to it. - pub fn build(&self) -> Result<Set, regex::Error> { + pub fn build(&self) -> Result<Set, Error> { Set::new(&self.pats) } - /// Like `build`, but returns a matcher that can only answer yes/no. - pub fn build_yesno(&self) -> Result<SetYesNo, regex::Error> { - SetYesNo::new(&self.pats) - } - /// Add a new pattern to this set. - /// - /// If the pattern could not be parsed as a glob, then an error is - /// returned. #[allow(dead_code)] - pub fn add(&mut self, pat: &str) -> Result<(), Error> { - self.add_with(pat, &MatchOptions::default()) - } - - /// Like add, but sets the match options for this particular pattern. - pub fn add_with( - &mut self, - pat: &str, - opts: &MatchOptions, - ) -> Result<(), Error> { - let parsed = try!(Pattern::new(pat)); - // if let Some(ext) = parsed.ext() { - // eprintln!("ext :: {:?} :: {:?}", ext, pat); - // } else if let Some(lit) = parsed.literal() { - // eprintln!("literal :: {:?} :: {:?}", lit, pat); - // } else if let Some(lit) = parsed.base_literal() { - // eprintln!("base_literal :: {:?} :: {:?}", lit, pat); - // } else if let Some(lit) = parsed.base_literal_prefix() { - // eprintln!("base_literal_prefix :: {:?} :: {:?}", lit, pat); - // } else if let Some(lit) = parsed.base_literal_suffix() { - // eprintln!("base_literal_suffix :: {:?} :: {:?}", lit, pat); - // } else if parsed.is_only_basename() { - // eprintln!("basename-regex :: {:?} :: {:?}", pat, parsed); - // } else { - // eprintln!("regex :: {:?} :: {:?}", pat, parsed); - // } - self.pats.push((parsed, opts.clone())); - Ok(()) + pub fn add(&mut self, pat: Pattern) -> &mut SetBuilder { + self.pats.push(pat); + self } } -/// Pattern represents a successfully parsed shell glob pattern. -/// -/// It cannot be used directly to match file paths, but it can be converted -/// to a regular expression string. -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub struct Pattern { - tokens: Vec<Token>, -} - -/// Options to control the matching semantics of a glob. The default value -/// has all options disabled. -#[derive(Clone, Debug, Default)] -pub struct MatchOptions { - /// When true, matching is done case insensitively. - pub case_insensitive: bool, - /// When true, neither `*` nor `?` match the current system's path - /// separator. - pub require_literal_separator: bool, -} - -#[derive(Clone, Debug, Eq, PartialEq)] -enum Token { - Literal(char), - Any, - ZeroOrMore, - RecursivePrefix, - RecursiveSuffix, - RecursiveZeroOrMore, - Class { - negated: bool, - ranges: Vec<(char, char)>, - }, - Alternates(Vec<Pattern>), +#[derive(Clone, Debug)] +struct Candidate<'a> { + path: Cow<'a, [u8]>, + basename: Cow<'a, [u8]>, + ext: &'a OsStr, } -impl Pattern { - /// Parse a shell glob pattern. - /// - /// If the pattern is not a valid glob, then an error is returned. - pub fn new(pat: &str) -> Result<Pattern, Error> { - let mut p = Parser { - stack: vec![Pattern::default()], - chars: pat.chars().peekable(), - prev: None, - cur: None, - }; - try!(p.parse()); - if p.stack.is_empty() { - Err(Error::UnopenedAlternates) - } else if p.stack.len() > 1 { - Err(Error::UnclosedAlternates) - } else { - Ok(p.stack.pop().unwrap()) +impl<'a> Candidate<'a> { + fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> { + let path = path.as_ref(); + let basename = file_name(path).unwrap_or(OsStr::new("")); + Candidate { + path: path_bytes(path), + basename: os_str_bytes(basename), + ext: file_name_ext(basename).unwrap_or(OsStr::new("")), } } - /// Returns an extension if this pattern exclusively matches it. - pub fn ext(&self) -> Option<OsString> { - if self.tokens.len() <= 3 { - return None; - } - match self.tokens.get(0) { - Some(&Token::RecursivePrefix) => {} - _ => return None, - } - match self.tokens.get(1) { - Some(&Token::ZeroOrMore) => {} - _ => return None, - } - match self.tokens.get(2) { - Some(&Token::Literal(c)) if c == '.' => {} - _ => return None, - } - let mut lit = OsString::new(); - for t in self.tokens[3..].iter() { - match *t { - Token::Literal(c) if c == '/' || c == '\\' || c == '.' => { - return None; - } - Token::Literal(c) => lit.push(c.to_string()), - _ => return None, - } + fn path_prefix(&self, max: usize) -> &[u8] { + if self.path.len() <= max { + &*self.path + } else { + &self.path[..max] } - Some(lit) } - /// Returns the pattern as a literal if and only if the pattern exclusiely - /// matches the basename of a file path *and* is a literal. - /// - /// The basic format of these patterns is `**/{literal}`, where `{literal}` - /// does not contain a path separator. - pub fn base_literal(&self) -> Option<String> { - match self.tokens.get(0) { - Some(&Token::RecursivePrefix) => {} - _ => return None, - } - let mut lit = String::new(); - for t in &self.tokens[1..] { - match *t { - Token::Literal(c) if c == '/' || c == '\\' => return None, - Token::Literal(c) => lit.push(c), - _ => return None, - } + fn path_suffix(&self, max: usize) -> &[u8] { + if self.path.len() <= max { + &*self.path + } else { + &self.path[self.path.len() - max..] } - Some(lit) } +} - /// Returns true if and only if this pattern only inspects the basename - /// of a path. - pub fn is_only_basename(&self) -> bool { - match self.tokens.get(0) { - Some(&Token::RecursivePrefix) => {} - _ => return false, - } - for t in &self.tokens[1..] { - match *t { - Token::Literal(c) if c == '/' || c == '\\' => return false, - Token::RecursivePrefix - | Token::RecursiveSuffix - | Token::RecursiveZeroOrMore => return false, - _ => {} - } - } - true - } +#[derive(Clone, Debug)] +enum SetMatchStrategy { + Literal(LiteralStrategy), + BasenameLiteral(BasenameLiteralStrategy), + Extension(ExtensionStrategy), + Prefix(PrefixStrategy), + Suffix(SuffixStrategy), + RequiredExtension(RequiredExtensionStrategy), + Regex(RegexSetStrategy), +} - /// Returns the pattern as a literal if and only if the pattern must match - /// an entire path exactly. - /// - /// The basic format of these patterns is `{literal}`. - pub fn literal(&self) -> Option<String> { - let mut lit = String::new(); - for t in &self.tokens { - match *t { - Token::Literal(c) => lit.push(c), - _ => return None, - } +impl SetMatchStrategy { + fn is_match(&self, candidate: &Candidate) -> bool { + use self::SetMatchStrategy::*; + match *self { + Literal(ref s) => s.is_match(candidate), + BasenameLiteral(ref s) => s.is_match(candidate), + Extension(ref s) => s.is_match(candidate), + Prefix(ref s) => s.is_match(candidate), + Suffix(ref s) => s.is_match(candidate), + RequiredExtension(ref s) => s.is_match(candidate), + Regex(ref s) => s.is_match(candidate), } - Some(lit) } - /// Returns a basename literal prefix of this pattern. - pub fn base_literal_prefix(&self) -> Option<String> { - match self.tokens.get(0) { - Some(&Token::RecursivePrefix) => {} - _ => return None, - } - match self.tokens.last() { - Some(&Token::ZeroOrMore) => {} - _ => return None, - } - let mut lit = String::new(); - for t in &self.tokens[1..self.tokens.len()-1] { - match *t { - Token::Literal(c) if c == '/' || c == '\\' => return None, - Token::Literal(c) => lit.push(c), - _ => return None, - } + fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) { + use self::SetMatchStrategy::*; + match *self { + Literal(ref s) => s.matches_into(candidate, matches), + BasenameLiteral(ref s) => s.matches_into(candidate, matches), + Extension(ref s) => s.matches_into(candidate, matches), + Prefix(ref s) => s.matches_into(candidate, matches), + Suffix(ref s) => s.matches_into(candidate, matches), + RequiredExtension(ref s) => s.matches_into(candidate, matches), + Regex(ref s) => s.matches_into(candidate, matches), } - Some(lit) } +} - /// Returns a basename literal suffix of this pattern. - pub fn base_literal_suffix(&self) -> Option<String> { - match self.tokens.get(0) { - Some(&Token::RecursivePrefix) => {} - _ => return None, - } - match self.tokens.get(1) { - Some(&Token::ZeroOrMore) => {} - _ => return None, - } - let mut lit = String::new(); - for t in &self.tokens[2..] { - match *t { - Token::Literal(c) if c == '/' || c == '\\' => return None, - Token::Literal(c) => lit.push(c), - _ => return None, - } - } - Some(lit) - } +#[derive(Clone, Debug)] +struct LiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>); - /// Convert this pattern to a string that is guaranteed to be a valid - /// regular expression and will represent the matching semantics of this - /// glob pattern. This uses a default set of options. - #[allow(dead_code)] - pub fn to_regex(&self) -> String { - self.to_regex_with(&MatchOptions::default()) +impl LiteralStrategy { + fn new() -> LiteralStrategy { + LiteralStrategy(BTreeMap::new()) } - /// Convert this pattern to a string that is guaranteed to be a valid - /// regular expression and will represent the matching semantics of this - /// glob pattern and the options given. - pub fn to_regex_with(&self, options: &MatchOptions) -> String { - let mut re = String::new(); - re.push_str("(?-u)"); - if options.case_insensitive { - re.push_str("(?i)"); - } - re.push('^'); - // Special case. If the entire glob is just `**`, then it should match - // everything. - if self.tokens.len() == 1 && self.tokens[0] == Token::RecursivePrefix { - re.push_str(".*"); - re.push('$'); - return re; - } - self.tokens_to_regex(options, &self.tokens, &mut re); - re.push('$'); - re + fn add(&mut self, global_index: usize, lit: String) { + self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index); } - fn tokens_to_regex( - &self, - options: &MatchOptions, - tokens: &[Token], - re: &mut String, - ) { - let seps = &*FILE_SEPARATORS; + fn is_match(&self, candidate: &Candidate) -> bool { + self.0.contains_key(&*candidate.path) + } - for tok in tokens { - match *tok { - Token::Literal(c) => { - re.push_str(®ex::quote(&c.to_string())); - } - Token::Any => { - if options.require_literal_separator { - re.push_str(&format!("[^{}]", seps)); - } else { - re.push_str("."); - } - } - Token::ZeroOrMore => { - if options.require_literal_separator { - re.push_str(&format!("[^{}]*", seps)); - } else { - re.push_str(".*"); - } - } - Token::RecursivePrefix => { - re.push_str(&format!("(?:[{sep}]?|.*[{sep}])", sep=seps)); - } - Token::RecursiveSuffix => { - re.push_str(&format!("(?:[{sep}]?|[{sep}].*)", sep=seps)); - } - Token::RecursiveZeroOrMore => { - re.push_str(&format!("(?:[{sep}]|[{sep}].*[{sep}])", - sep=seps)); - } - Token::Class { negated, ref ranges } => { - re.push('['); - if negated { - re.push('^'); - } - for r in ranges { - if r.0 == r.1 { - // Not strictly necessary, but nicer to look at. - re.push_str(®ex::quote(&r.0.to_string())); - } else { - re.push_str(®ex::quote(&r.0.to_string())); - re.push('-'); - re.push_str(®ex::quote(&r.1.to_string())); - } - } - re.push(']'); - } - Token::Alternates(ref patterns) => { - let mut parts = vec![]; - for pat in patterns { - let mut altre = String::new(); - self.tokens_to_regex(options, &pat.tokens, &mut altre); - parts.push(altre); - } - re.push_str(&parts.join("|")); - } - } + #[inline(never)] + fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) { + if let Some(hits) = self.0.get(&*candidate.path) { + matches.extend(hits); } } } -struct Parser<'a> { - stack: Vec<Pattern>, - chars: iter::Peekable<str::Chars<'a>>, - prev: Option<char>, - cur: Option<char>, -} +#[derive(Clone, Debug)] +struct BasenameLiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>); -impl<'a> Parser<'a> { - fn parse(&mut self) -> Result<(), Error> { - while let Some(c) = self.bump() { - match c { - '?' => try!(self.push_token(Token::Any)), - '*' => try!(self.parse_star()), - '[' => try!(self.parse_class()), - '{' => try!(self.push_alternate()), - '}' => try!(self.pop_alternate()), - ',' => try!(self.parse_comma()), - c => try!(self.push_token(Token::Literal(c))), - } - } - Ok(()) +impl BasenameLiteralStrategy { + fn new() -> BasenameLiteralStrategy { + BasenameLiteralStrategy(BTreeMap::new()) } - fn push_alternate(&mut self) -> Result<(), Error> { - if self.stack.len() > 1 { - return Err(Error::NestedAlternates); - } - Ok(self.stack.push(Pattern::default())) + fn add(&mut self, global_index: usize, lit: String) { + self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index); } - fn pop_alternate(&mut self) -> Result<(), Error> { - let mut alts = vec![]; - while self.stack.len() >= 2 { - alts.push(self.stack.pop().unwrap()); + fn is_match(&self, candidate: &Candidate) -> bool { + if candidate.basename.is_empty() { + return false; } - self.push_token(Token::Alternates(alts)) + self.0.contains_key(&*candidate.basename) } - fn push_token(&mut self, tok: Token) -> Result<(), Error> { - match self.stack.last_mut() { - None => Err(Error::UnopenedAlternates), - Some(ref mut pat) => Ok(pat.tokens.push(tok)), + #[inline(never)] + fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) { + if candidate.basename.is_empty() { + return; + } + if let Some(hits) = self.0.get(&*candidate.basename) { + matches.extend(hits); } } +} - fn pop_token(&mut self) -> Result<Token, Error> { - match self.stack.last_mut() { - None => Err(Error::UnopenedAlternates), - Some(ref mut pat) => Ok(pat.tokens.pop().unwrap()), - } +#[derive(Clone, Debug)] +struct ExtensionStrategy(HashMap<OsString, Vec<usize>, Fnv>); + +impl ExtensionStrategy { + fn new() -> ExtensionStrategy { + ExtensionStrategy(HashMap::with_hasher(Fnv::default())) } - fn have_tokens(&self) -> Result<bool, Error> { - match self.stack.last() { - None => Err(Error::UnopenedAlternates), - Some(ref pat) => Ok(!pat.tokens.is_empty()), - } + fn add(&mut self, global_index: usize, ext: OsString) { + self.0.entry(ext).or_insert(vec![]).push(global_index); } - fn parse_comma(&mut self) -> Result<(), Error> { - // If we aren't inside a group alternation, then don't - // treat commas specially. Otherwise, we need to start - // a new alternate. - if self.stack.len() <= 1 { - self.push_token(Token::Literal(',')) - } else { - Ok(self.stack.push(Pattern::default())) + fn is_match(&self, candidate: &Candidate) -> bool { + if candidate.ext.is_empty() { + return false; } + self.0.contains_key(candidate.ext) } - fn parse_star(&mut self) -> Result<(), Error> { - let prev = self.prev; - if self.chars.peek() != Some(&'*') { - try!(self.push_token(Token::ZeroOrMore)); - return Ok(()); + #[inline(never)] + fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) { + if candidate.ext.is_empty() { + return; } - assert!(self.bump() == Some('*')); - if !try!(self.have_tokens()) { - try!(self.push_token(Token::RecursivePrefix)); - let next = self.bump(); - if !next.is_none() && next != Some('/') { - return Err(Error::InvalidRecursive); - } - return Ok(()); + if let Some(hits) = self.0.get(candidate.ext) { + matches.extend(hits); } - try!(self.pop_token()); - if prev != Some('/') { - if self.stack.len() <= 1 - || (prev != Some(',') && prev != Some('{')) { - return Err(Error::InvalidRecursive); + } +} + +#[derive(Clone, Debug)] +struct PrefixStrategy { + matcher: FullAcAutomaton<Vec<u8>>, + map: Vec<usize>, + longest: usize, +} + +impl PrefixStrategy { + fn is_match(&self, candidate: &Candidate) -> bool { + let path = candidate.path_prefix(self.longest); + for m in self.matcher.find_overlapping(path) { + if m.start == 0 { + return true; |