summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Cargo.lock2
-rw-r--r--globset/Cargo.toml2
-rw-r--r--globset/src/lib.rs1270
-rw-r--r--globset/src/pathutil.rs96
-rw-r--r--globset/src/pattern.rs1379
-rw-r--r--src/gitignore.rs24
-rw-r--r--src/types.rs31
-rw-r--r--tests/tests.rs1
8 files changed, 1884 insertions, 921 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 21640763..cce72a7b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -82,8 +82,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
name = "globset"
version = "0.1.0"
dependencies = [
+ "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
"fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
]
diff --git a/globset/Cargo.toml b/globset/Cargo.toml
index 48e375fb..cf63f397 100644
--- a/globset/Cargo.toml
+++ b/globset/Cargo.toml
@@ -4,7 +4,9 @@ version = "0.1.0"
authors = ["Andrew Gallant <jamslam@gmail.com>"]
[dependencies]
+aho-corasick = "0.5.3"
fnv = "1.0"
lazy_static = "0.2"
+log = "0.3"
memchr = "0.1"
regex = "0.1.77"
diff --git a/globset/src/lib.rs b/globset/src/lib.rs
index b5cbb5be..f608a74a 100644
--- a/globset/src/lib.rs
+++ b/globset/src/lib.rs
@@ -13,40 +13,42 @@ that rigamorole when I wrote this. In particular, it could be fast/good enough
to make its way into `glob` proper.
*/
-// TODO(burntsushi): I'm pretty dismayed by the performance of regex sets
-// here. For example, we do a first pass single-regex-of-all-globs filter
-// before actually running the regex set. This turns out to be faster,
-// especially in fresh checkouts of repos that don't have a lot of ignored
-// files. It's not clear how hard it is to make the regex set faster.
-//
-// An alternative avenue is to stop doing "regex all the things." (Which, to
-// be fair, is pretty fast---I just expected it to be faster.) We could do
-// something clever using assumptions along the lines of "oh, most ignore
-// patterns are either literals or are for ignoring file extensions." (Look
-// at the .gitignore for the chromium repo---just about every pattern satisfies
-// that assumption.)
+#![deny(missing_docs)]
+extern crate aho_corasick;
extern crate fnv;
#[macro_use]
extern crate lazy_static;
+#[macro_use]
+extern crate log;
extern crate memchr;
extern crate regex;
use std::borrow::Cow;
-use std::collections::HashMap;
+use std::collections::{BTreeMap, HashMap};
use std::error::Error as StdError;
use std::ffi::{OsStr, OsString};
use std::fmt;
use std::hash;
-use std::iter;
use std::path::Path;
use std::str;
-use regex::bytes::Regex;
+use aho_corasick::{Automaton, AcAutomaton, FullAcAutomaton};
+use regex::bytes::{Regex, RegexBuilder, RegexSet};
-use pathutil::file_name;
+use pathutil::{file_name, file_name_ext, os_str_bytes, path_bytes};
+use pattern::MatchStrategy;
+pub use pattern::{Pattern, PatternBuilder, PatternMatcher};
mod pathutil;
+mod pattern;
+
+macro_rules! eprintln {
+ ($($tt:tt)*) => {{
+ use std::io::Write;
+ let _ = writeln!(&mut ::std::io::stderr(), $($tt)*);
+ }}
+}
lazy_static! {
static ref FILE_SEPARATORS: String = regex::quote(r"/\");
@@ -55,12 +57,24 @@ lazy_static! {
/// Represents an error that can occur when parsing a glob pattern.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Error {
+ /// Occurs when a use of `**` is invalid. Namely, `**` can only appear
+ /// adjacent to a path separator, or the beginning/end of a glob.
InvalidRecursive,
+ /// Occurs when a character class (e.g., `[abc]`) is not closed.
UnclosedClass,
+ /// Occurs when a range in a character (e.g., `[a-z]`) is invalid. For
+ /// example, if the range starts with a lexicographically larger character
+ /// than it ends with.
InvalidRange(char, char),
+ /// Occurs when a `}` is found without a matching `{`.
UnopenedAlternates,
+ /// Occurs when a `{` is found without a matching `}`.
UnclosedAlternates,
+ /// Occurs when an alternating group is nested inside another alternating
+ /// group, e.g., `{{a,b},{c,d}}`.
NestedAlternates,
+ /// An error associated with parsing or compiling a regex.
+ Regex(String),
}
impl StdError for Error {
@@ -86,6 +100,7 @@ impl StdError for Error {
Error::NestedAlternates => {
"nested alternate groups are not allowed"
}
+ Error::Regex(ref err) => err,
}
}
}
@@ -97,7 +112,8 @@ impl fmt::Display for Error {
| Error::UnclosedClass
| Error::UnopenedAlternates
| Error::UnclosedAlternates
- | Error::NestedAlternates => {
+ | Error::NestedAlternates
+ | Error::Regex(_) => {
write!(f, "{}", self.description())
}
Error::InvalidRange(s, e) => {
@@ -107,34 +123,18 @@ impl fmt::Display for Error {
}
}
-/// SetYesNo represents a group of globs that can be matched together in a
-/// single pass. SetYesNo can only determine whether a particular path matched
-/// any pattern in the set.
-#[derive(Clone, Debug)]
-pub struct SetYesNo {
- re: Regex,
+fn new_regex(pat: &str) -> Result<Regex, Error> {
+ RegexBuilder::new(pat)
+ .dot_matches_new_line(true)
+ .size_limit(10 * (1 << 20))
+ .dfa_size_limit(10 * (1 << 20))
+ .compile()
+ .map_err(|err| Error::Regex(err.to_string()))
}
-impl SetYesNo {
- /// Returns true if and only if the given path matches at least one glob
- /// in this set.
- pub fn is_match<T: AsRef<Path>>(&self, path: T) -> bool {
- self.re.is_match(&*path_bytes(path.as_ref()))
- }
-
- fn new(
- pats: &[(Pattern, MatchOptions)],
- ) -> Result<SetYesNo, regex::Error> {
- let mut joined = String::new();
- for &(ref p, ref o) in pats {
- let part = format!("(?:{})", p.to_regex_with(o));
- if !joined.is_empty() {
- joined.push('|');
- }
- joined.push_str(&part);
- }
- Ok(SetYesNo { re: try!(Regex::new(&joined)) })
- }
+fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error>
+ where S: AsRef<str>, I: IntoIterator<Item=S> {
+ RegexSet::new(pats).map_err(|err| Error::Regex(err.to_string()))
}
type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
@@ -143,20 +143,21 @@ type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
/// pass.
#[derive(Clone, Debug)]
pub struct Set {
- exts: HashMap<OsString, Vec<usize>, Fnv>,
- literals: HashMap<Vec<u8>, Vec<usize>, Fnv>,
- base_literals: HashMap<Vec<u8>, Vec<usize>, Fnv>,
- base_prefixes: Vec<Vec<u8>>,
- base_prefixes_map: Vec<usize>,
- base_suffixes: Vec<Vec<u8>>,
- base_suffixes_map: Vec<usize>,
- base_regexes: Vec<Regex>,
- base_regexes_map: Vec<usize>,
- regexes: Vec<Regex>,
- regexes_map: Vec<usize>,
+ strats: Vec<SetMatchStrategy>,
}
impl Set {
+ /// Returns true if any glob in this set matches the path given.
+ pub fn is_match<T: AsRef<Path>>(&self, path: T) -> bool {
+ let candidate = Candidate::new(path.as_ref());
+ for strat in &self.strats {
+ if strat.is_match(&candidate) {
+ return true;
+ }
+ }
+ false
+ }
+
/// Returns the sequence number of every glob pattern that matches the
/// given path.
#[allow(dead_code)]
@@ -174,110 +175,67 @@ impl Set {
into: &mut Vec<usize>,
) {
into.clear();
- let path = path.as_ref();
- let path_bytes = &*path_bytes(path);
- let basename = file_name(path).map(|b| os_str_bytes(b));
- if !self.exts.is_empty() {
- if let Some(ext) = path.extension() {
- if let Some(matches) = self.exts.get(ext) {
- into.extend(matches.as_slice());
- }
- }
+ let candidate = Candidate::new(path.as_ref());
+ for strat in &self.strats {
+ strat.matches_into(&candidate, into);
}
- if !self.literals.is_empty() {
- if let Some(matches) = self.literals.get(path_bytes) {
- into.extend(matches.as_slice());
- }
- }
- if !self.base_literals.is_empty() {
- if let Some(ref basename) = basename {
- if let Some(matches) = self.base_literals.get(&**basename) {
- into.extend(matches.as_slice());
+ into.sort();
+ into.dedup();
+ }
+
+ fn new(pats: &[Pattern]) -> Result<Set, Error> {
+ let mut lits = LiteralStrategy::new();
+ let mut base_lits = BasenameLiteralStrategy::new();
+ let mut exts = ExtensionStrategy::new();
+ let mut prefixes = MultiStrategyBuilder::new();
+ let mut suffixes = MultiStrategyBuilder::new();
+ let mut required_exts = RequiredExtensionStrategyBuilder::new();
+ let mut regexes = MultiStrategyBuilder::new();
+ for (i, p) in pats.iter().enumerate() {
+ match MatchStrategy::new(p) {
+ MatchStrategy::Literal(lit) => {
+ lits.add(i, lit);
}
- }
- }
- if !self.base_prefixes.is_empty() {
- if let Some(ref basename) = basename {
- let basename = &**basename;
- for (i, pre) in self.base_prefixes.iter().enumerate() {
- if pre.len() <= basename.len() && &**pre == &basename[0..pre.len()] {
- into.push(self.base_prefixes_map[i]);
- }
+ MatchStrategy::BasenameLiteral(lit) => {
+ base_lits.add(i, lit);
}
- }
- }
- if !self.base_suffixes.is_empty() {
- if let Some(ref basename) = basename {
- let basename = &**basename;
- for (i, suf) in self.base_suffixes.iter().enumerate() {
- if suf.len() > basename.len() {
- continue;
- }
- let (s, e) = (basename.len() - suf.len(), basename.len());
- if &**suf == &basename[s..e] {
- into.push(self.base_suffixes_map[i]);
+ MatchStrategy::Extension(ext) => {
+ exts.add(i, ext);
+ }
+ MatchStrategy::Prefix(prefix) => {
+ prefixes.add(i, prefix);
+ }
+ MatchStrategy::Suffix { suffix, component } => {
+ if component {
+ lits.add(i, suffix[1..].to_string());
}
+ suffixes.add(i, suffix);
}
- }
- }
- if let Some(ref basename) = basename {
- for (i, re) in self.base_regexes.iter().enumerate() {
- if re.is_match(&**basename) {
- into.push(self.base_regexes_map[i]);
+ MatchStrategy::RequiredExtension(ext) => {
+ required_exts.add(i, ext, p.regex().to_owned());
+ }
+ MatchStrategy::Regex => {
+ debug!("glob converted to regex: {:?}", p);
+ regexes.add(i, p.regex().to_owned());
}
}
}
- for (i, re) in self.regexes.iter().enumerate() {
- if re.is_match(path_bytes) {
- into.push(self.regexes_map[i]);
- }
- }
- into.sort();
- }
-
- fn new(pats: &[(Pattern, MatchOptions)]) -> Result<Set, regex::Error> {
- let fnv = Fnv::default();
- let mut exts = HashMap::with_hasher(fnv.clone());
- let mut literals = HashMap::with_hasher(fnv.clone());
- let mut base_literals = HashMap::with_hasher(fnv.clone());
- let (mut base_prefixes, mut base_prefixes_map) = (vec![], vec![]);
- let (mut base_suffixes, mut base_suffixes_map) = (vec![], vec![]);
- let (mut regexes, mut regexes_map) = (vec![], vec![]);
- let (mut base_regexes, mut base_regexes_map) = (vec![], vec![]);
- for (i, &(ref p, ref o)) in pats.iter().enumerate() {
- if let Some(ext) = p.ext() {
- exts.entry(ext).or_insert(vec![]).push(i);
- } else if let Some(literal) = p.literal() {
- literals.entry(literal.into_bytes()).or_insert(vec![]).push(i);
- } else if let Some(literal) = p.base_literal() {
- base_literals
- .entry(literal.into_bytes()).or_insert(vec![]).push(i);
- } else if let Some(literal) = p.base_literal_prefix() {
- base_prefixes.push(literal.into_bytes());
- base_prefixes_map.push(i);
- } else if let Some(literal) = p.base_literal_suffix() {
- base_suffixes.push(literal.into_bytes());
- base_suffixes_map.push(i);
- } else if p.is_only_basename() {
- base_regexes.push(try!(Regex::new(&p.to_regex_with(o))));
- base_regexes_map.push(i);
- } else {
- regexes.push(try!(Regex::new(&p.to_regex_with(o))));
- regexes_map.push(i);
- }
- }
+ debug!("built glob set; {} literals, {} basenames, {} extensions, \
+ {} prefixes, {} suffixes, {} required extensions, {} regexes",
+ lits.0.len(), base_lits.0.len(), exts.0.len(),
+ prefixes.literals.len(), suffixes.literals.len(),
+ required_exts.0.len(), regexes.literals.len());
Ok(Set {
- exts: exts,
- literals: literals,
- base_literals: base_literals,
- base_prefixes: base_prefixes,
- base_prefixes_map: base_prefixes_map,
- base_suffixes: base_suffixes,
- base_suffixes_map: base_suffixes_map,
- base_regexes: base_regexes,
- base_regexes_map: base_regexes_map,
- regexes: regexes,
- regexes_map: regexes_map,
+ strats: vec![
+ SetMatchStrategy::Extension(exts),
+ SetMatchStrategy::BasenameLiteral(base_lits),
+ SetMatchStrategy::Literal(lits),
+ SetMatchStrategy::Suffix(suffixes.suffix()),
+ SetMatchStrategy::Prefix(prefixes.prefix()),
+ SetMatchStrategy::RequiredExtension(
+ try!(required_exts.build())),
+ SetMatchStrategy::Regex(try!(regexes.regex_set())),
+ ],
})
}
}
@@ -285,7 +243,7 @@ impl Set {
/// SetBuilder builds a group of patterns that can be used to simultaneously
/// match a file path.
pub struct SetBuilder {
- pats: Vec<(Pattern, MatchOptions)>,
+ pats: Vec<Pattern>,
}
impl SetBuilder {
@@ -299,858 +257,374 @@ impl SetBuilder {
/// Builds a new matcher from all of the glob patterns added so far.
///
/// Once a matcher is built, no new patterns can be added to it.
- pub fn build(&self) -> Result<Set, regex::Error> {
+ pub fn build(&self) -> Result<Set, Error> {
Set::new(&self.pats)
}
- /// Like `build`, but returns a matcher that can only answer yes/no.
- pub fn build_yesno(&self) -> Result<SetYesNo, regex::Error> {
- SetYesNo::new(&self.pats)
- }
-
/// Add a new pattern to this set.
- ///
- /// If the pattern could not be parsed as a glob, then an error is
- /// returned.
#[allow(dead_code)]
- pub fn add(&mut self, pat: &str) -> Result<(), Error> {
- self.add_with(pat, &MatchOptions::default())
- }
-
- /// Like add, but sets the match options for this particular pattern.
- pub fn add_with(
- &mut self,
- pat: &str,
- opts: &MatchOptions,
- ) -> Result<(), Error> {
- let parsed = try!(Pattern::new(pat));
- // if let Some(ext) = parsed.ext() {
- // eprintln!("ext :: {:?} :: {:?}", ext, pat);
- // } else if let Some(lit) = parsed.literal() {
- // eprintln!("literal :: {:?} :: {:?}", lit, pat);
- // } else if let Some(lit) = parsed.base_literal() {
- // eprintln!("base_literal :: {:?} :: {:?}", lit, pat);
- // } else if let Some(lit) = parsed.base_literal_prefix() {
- // eprintln!("base_literal_prefix :: {:?} :: {:?}", lit, pat);
- // } else if let Some(lit) = parsed.base_literal_suffix() {
- // eprintln!("base_literal_suffix :: {:?} :: {:?}", lit, pat);
- // } else if parsed.is_only_basename() {
- // eprintln!("basename-regex :: {:?} :: {:?}", pat, parsed);
- // } else {
- // eprintln!("regex :: {:?} :: {:?}", pat, parsed);
- // }
- self.pats.push((parsed, opts.clone()));
- Ok(())
+ pub fn add(&mut self, pat: Pattern) -> &mut SetBuilder {
+ self.pats.push(pat);
+ self
}
}
-/// Pattern represents a successfully parsed shell glob pattern.
-///
-/// It cannot be used directly to match file paths, but it can be converted
-/// to a regular expression string.
-#[derive(Clone, Debug, Default, Eq, PartialEq)]
-pub struct Pattern {
- tokens: Vec<Token>,
-}
-
-/// Options to control the matching semantics of a glob. The default value
-/// has all options disabled.
-#[derive(Clone, Debug, Default)]
-pub struct MatchOptions {
- /// When true, matching is done case insensitively.
- pub case_insensitive: bool,
- /// When true, neither `*` nor `?` match the current system's path
- /// separator.
- pub require_literal_separator: bool,
-}
-
-#[derive(Clone, Debug, Eq, PartialEq)]
-enum Token {
- Literal(char),
- Any,
- ZeroOrMore,
- RecursivePrefix,
- RecursiveSuffix,
- RecursiveZeroOrMore,
- Class {
- negated: bool,
- ranges: Vec<(char, char)>,
- },
- Alternates(Vec<Pattern>),
+#[derive(Clone, Debug)]
+struct Candidate<'a> {
+ path: Cow<'a, [u8]>,
+ basename: Cow<'a, [u8]>,
+ ext: &'a OsStr,
}
-impl Pattern {
- /// Parse a shell glob pattern.
- ///
- /// If the pattern is not a valid glob, then an error is returned.
- pub fn new(pat: &str) -> Result<Pattern, Error> {
- let mut p = Parser {
- stack: vec![Pattern::default()],
- chars: pat.chars().peekable(),
- prev: None,
- cur: None,
- };
- try!(p.parse());
- if p.stack.is_empty() {
- Err(Error::UnopenedAlternates)
- } else if p.stack.len() > 1 {
- Err(Error::UnclosedAlternates)
- } else {
- Ok(p.stack.pop().unwrap())
+impl<'a> Candidate<'a> {
+ fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
+ let path = path.as_ref();
+ let basename = file_name(path).unwrap_or(OsStr::new(""));
+ Candidate {
+ path: path_bytes(path),
+ basename: os_str_bytes(basename),
+ ext: file_name_ext(basename).unwrap_or(OsStr::new("")),
}
}
- /// Returns an extension if this pattern exclusively matches it.
- pub fn ext(&self) -> Option<OsString> {
- if self.tokens.len() <= 3 {
- return None;
- }
- match self.tokens.get(0) {
- Some(&Token::RecursivePrefix) => {}
- _ => return None,
- }
- match self.tokens.get(1) {
- Some(&Token::ZeroOrMore) => {}
- _ => return None,
- }
- match self.tokens.get(2) {
- Some(&Token::Literal(c)) if c == '.' => {}
- _ => return None,
- }
- let mut lit = OsString::new();
- for t in self.tokens[3..].iter() {
- match *t {
- Token::Literal(c) if c == '/' || c == '\\' || c == '.' => {
- return None;
- }
- Token::Literal(c) => lit.push(c.to_string()),
- _ => return None,
- }
+ fn path_prefix(&self, max: usize) -> &[u8] {
+ if self.path.len() <= max {
+ &*self.path
+ } else {
+ &self.path[..max]
}
- Some(lit)
}
- /// Returns the pattern as a literal if and only if the pattern exclusiely
- /// matches the basename of a file path *and* is a literal.
- ///
- /// The basic format of these patterns is `**/{literal}`, where `{literal}`
- /// does not contain a path separator.
- pub fn base_literal(&self) -> Option<String> {
- match self.tokens.get(0) {
- Some(&Token::RecursivePrefix) => {}
- _ => return None,
- }
- let mut lit = String::new();
- for t in &self.tokens[1..] {
- match *t {
- Token::Literal(c) if c == '/' || c == '\\' => return None,
- Token::Literal(c) => lit.push(c),
- _ => return None,
- }
+ fn path_suffix(&self, max: usize) -> &[u8] {
+ if self.path.len() <= max {
+ &*self.path
+ } else {
+ &self.path[self.path.len() - max..]
}
- Some(lit)
}
+}
- /// Returns true if and only if this pattern only inspects the basename
- /// of a path.
- pub fn is_only_basename(&self) -> bool {
- match self.tokens.get(0) {
- Some(&Token::RecursivePrefix) => {}
- _ => return false,
- }
- for t in &self.tokens[1..] {
- match *t {
- Token::Literal(c) if c == '/' || c == '\\' => return false,
- Token::RecursivePrefix
- | Token::RecursiveSuffix
- | Token::RecursiveZeroOrMore => return false,
- _ => {}
- }
- }
- true
- }
+#[derive(Clone, Debug)]
+enum SetMatchStrategy {
+ Literal(LiteralStrategy),
+ BasenameLiteral(BasenameLiteralStrategy),
+ Extension(ExtensionStrategy),
+ Prefix(PrefixStrategy),
+ Suffix(SuffixStrategy),
+ RequiredExtension(RequiredExtensionStrategy),
+ Regex(RegexSetStrategy),
+}
- /// Returns the pattern as a literal if and only if the pattern must match
- /// an entire path exactly.
- ///
- /// The basic format of these patterns is `{literal}`.
- pub fn literal(&self) -> Option<String> {
- let mut lit = String::new();
- for t in &self.tokens {
- match *t {
- Token::Literal(c) => lit.push(c),
- _ => return None,
- }
+impl SetMatchStrategy {
+ fn is_match(&self, candidate: &Candidate) -> bool {
+ use self::SetMatchStrategy::*;
+ match *self {
+ Literal(ref s) => s.is_match(candidate),
+ BasenameLiteral(ref s) => s.is_match(candidate),
+ Extension(ref s) => s.is_match(candidate),
+ Prefix(ref s) => s.is_match(candidate),
+ Suffix(ref s) => s.is_match(candidate),
+ RequiredExtension(ref s) => s.is_match(candidate),
+ Regex(ref s) => s.is_match(candidate),
}
- Some(lit)
}
- /// Returns a basename literal prefix of this pattern.
- pub fn base_literal_prefix(&self) -> Option<String> {
- match self.tokens.get(0) {
- Some(&Token::RecursivePrefix) => {}
- _ => return None,
- }
- match self.tokens.last() {
- Some(&Token::ZeroOrMore) => {}
- _ => return None,
- }
- let mut lit = String::new();
- for t in &self.tokens[1..self.tokens.len()-1] {
- match *t {
- Token::Literal(c) if c == '/' || c == '\\' => return None,
- Token::Literal(c) => lit.push(c),
- _ => return None,
- }
+ fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
+ use self::SetMatchStrategy::*;
+ match *self {
+ Literal(ref s) => s.matches_into(candidate, matches),
+ BasenameLiteral(ref s) => s.matches_into(candidate, matches),
+ Extension(ref s) => s.matches_into(candidate, matches),
+ Prefix(ref s) => s.matches_into(candidate, matches),
+ Suffix(ref s) => s.matches_into(candidate, matches),
+ RequiredExtension(ref s) => s.matches_into(candidate, matches),
+ Regex(ref s) => s.matches_into(candidate, matches),
}
- Some(lit)
}
+}
- /// Returns a basename literal suffix of this pattern.
- pub fn base_literal_suffix(&self) -> Option<String> {
- match self.tokens.get(0) {
- Some(&Token::RecursivePrefix) => {}
- _ => return None,
- }
- match self.tokens.get(1) {
- Some(&Token::ZeroOrMore) => {}
- _ => return None,
- }
- let mut lit = String::new();
- for t in &self.tokens[2..] {
- match *t {
- Token::Literal(c) if c == '/' || c == '\\' => return None,
- Token::Literal(c) => lit.push(c),
- _ => return None,
- }
- }
- Some(lit)
- }
+#[derive(Clone, Debug)]
+struct LiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
- /// Convert this pattern to a string that is guaranteed to be a valid
- /// regular expression and will represent the matching semantics of this
- /// glob pattern. This uses a default set of options.
- #[allow(dead_code)]
- pub fn to_regex(&self) -> String {
- self.to_regex_with(&MatchOptions::default())
+impl LiteralStrategy {
+ fn new() -> LiteralStrategy {
+ LiteralStrategy(BTreeMap::new())
}
- /// Convert this pattern to a string that is guaranteed to be a valid
- /// regular expression and will represent the matching semantics of this
- /// glob pattern and the options given.
- pub fn to_regex_with(&self, options: &MatchOptions) -> String {
- let mut re = String::new();
- re.push_str("(?-u)");
- if options.case_insensitive {
- re.push_str("(?i)");
- }
- re.push('^');
- // Special case. If the entire glob is just `**`, then it should match
- // everything.
- if self.tokens.len() == 1 && self.tokens[0] == Token::RecursivePrefix {
- re.push_str(".*");
- re.push('$');
- return re;
- }
- self.tokens_to_regex(options, &self.tokens, &mut re);
- re.push('$');
- re
+ fn add(&mut self, global_index: usize, lit: String) {
+ self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
}
- fn tokens_to_regex(
- &self,
- options: &MatchOptions,
- tokens: &[Token],
- re: &mut String,
- ) {
- let seps = &*FILE_SEPARATORS;
+ fn is_match(&self, candidate: &Candidate) -> bool {
+ self.0.contains_key(&*candidate.path)
+ }
- for tok in tokens {
- match *tok {
- Token::Literal(c) => {
- re.push_str(&regex::quote(&c.to_string()));
- }
- Token::Any => {
- if options.require_literal_separator {
- re.push_str(&format!("[^{}]", seps));
- } else {
- re.push_str(".");
- }
- }
- Token::ZeroOrMore => {
- if options.require_literal_separator {
- re.push_str(&format!("[^{}]*", seps));
- } else {
- re.push_str(".*");
- }
- }
- Token::RecursivePrefix => {
- re.push_str(&format!("(?:[{sep}]?|.*[{sep}])", sep=seps));
- }
- Token::RecursiveSuffix => {
- re.push_str(&format!("(?:[{sep}]?|[{sep}].*)", sep=seps));
- }
- Token::RecursiveZeroOrMore => {
- re.push_str(&format!("(?:[{sep}]|[{sep}].*[{sep}])",
- sep=seps));
- }
- Token::Class { negated, ref ranges } => {
- re.push('[');
- if negated {
- re.push('^');
- }
- for r in ranges {
- if r.0 == r.1 {
- // Not strictly necessary, but nicer to look at.
- re.push_str(&regex::quote(&r.0.to_string()));
- } else {
- re.push_str(&regex::quote(&r.0.to_string()));
- re.push('-');
- re.push_str(&regex::quote(&r.1.to_string()));
- }
- }
- re.push(']');
- }
- Token::Alternates(ref patterns) => {
- let mut parts = vec![];
- for pat in patterns {
- let mut altre = String::new();
- self.tokens_to_regex(options, &pat.tokens, &mut altre);
- parts.push(altre);
- }
- re.push_str(&parts.join("|"));
- }
- }
+ #[inline(never)]
+ fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
+ if let Some(hits) = self.0.get(&*candidate.path) {
+ matches.extend(hits);
}
}
}
-struct Parser<'a> {
- stack: Vec<Pattern>,
- chars: iter::Peekable<str::Chars<'a>>,
- prev: Option<char>,
- cur: Option<char>,
-}
+#[derive(Clone, Debug)]
+struct BasenameLiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
-impl<'a> Parser<'a> {
- fn parse(&mut self) -> Result<(), Error> {
- while let Some(c) = self.bump() {
- match c {
- '?' => try!(self.push_token(Token::Any)),
- '*' => try!(self.parse_star()),
- '[' => try!(self.parse_class()),
- '{' => try!(self.push_alternate()),
- '}' => try!(self.pop_alternate()),
- ',' => try!(self.parse_comma()),
- c => try!(self.push_token(Token::Literal(c))),
- }
- }
- Ok(())
+impl BasenameLiteralStrategy {
+ fn new() -> BasenameLiteralStrategy {
+ BasenameLiteralStrategy(BTreeMap::new())
}
- fn push_alternate(&mut self) -> Result<(), Error> {
- if self.stack.len() > 1 {
- return Err(Error::NestedAlternates);
- }
- Ok(self.stack.push(Pattern::default()))
+ fn add(&mut self, global_index: usize, lit: String) {
+ self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
}
- fn pop_alternate(&mut self) -> Result<(), Error> {
- let mut alts = vec![];
- while self.stack.len() >= 2 {
- alts.push(self.stack.pop().unwrap());
+ fn is_match(&self, candidate: &Candidate) -> bool {
+ if candidate.basename.is_empty() {
+ return false;
}
- self.push_token(Token::Alternates(alts))
+ self.0.contains_key(&*candidate.basename)
}
- fn push_token(&mut self, tok: Token) -> Result<(), Error> {
- match self.stack.last_mut() {
- None => Err(Error::UnopenedAlternates),
- Some(ref mut pat) => Ok(pat.tokens.push(tok)),
+ #[inline(never)]
+ fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
+ if candidate.basename.is_empty() {
+ return;
+ }
+ if let Some(hits) = self.0.get(&*candidate.basename) {
+ matches.extend(hits);
}
}
+}
- fn pop_token(&mut self) -> Result<Token, Error> {
- match self.stack.last_mut() {
- None => Err(Error::UnopenedAlternates),
- Some(ref mut pat) => Ok(pat.tokens.pop().unwrap()),
- }
+#[derive(Clone, Debug)]
+struct ExtensionStrategy(HashMap<OsString, Vec<usize>, Fnv>);
+
+impl ExtensionStrategy {
+ fn new() -> ExtensionStrategy {
+ ExtensionStrategy(HashMap::with_hasher(Fnv::default()))
}
- fn have_tokens(&self) -> Result<bool, Error> {
- match self.stack.last() {
- None => Err(Error::UnopenedAlternates),
- Some(ref pat) => Ok(!pat.tokens.is_empty()),
- }
+ fn add(&mut self, global_index: usize, ext: OsString) {
+ self.0.entry(ext).or_insert(vec![]).push(global_index);
}
- fn parse_comma(&mut self) -> Result<(), Error> {
- // If we aren't inside a group alternation, then don't
- // treat commas specially. Otherwise, we need to start
- // a new alternate.
- if self.stack.len() <= 1 {
- self.push_token(Token::Literal(','))
- } else {
- Ok(self.stack.push(Pattern::default()))
+ fn is_match(&self, candidate: &Candidate) -> bool {
+ if candidate.ext.is_empty() {
+ return false;
}
+ self.0.contains_key(candidate.ext)
}
- fn parse_star(&mut self) -> Result<(), Error> {
- let prev = self.prev;
- if self.chars.peek() != Some(&'*') {
- try!(self.push_token(Token::ZeroOrMore));
- return Ok(());
+ #[inline(never)]
+ fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
+ if candidate.ext.is_empty() {
+ return;
}
- assert!(self.bump() == Some('*'));
- if !try!(self.have_tokens()) {
- try!(self.push_token(Token::RecursivePrefix));
- let next = self.bump();
- if !next.is_none() && next != Some('/') {
- return Err(Error::InvalidRecursive);
- }
- return Ok(());
+ if let Some(hits) = self.0.get(candidate.ext) {
+ matches.extend(hits);
}
- try!(self.pop_token());
- if prev != Some('/') {
- if self.stack.len() <= 1
- || (prev != Some(',') && prev != Some('{')) {
- return Err(Error::InvalidRecursive);
+ }
+}
+
+#[derive(Clone, Debug)]
+struct PrefixStrategy {
+ matcher: FullAcAutomaton<Vec<u8>>,
+ map: Vec<usize>,
+ longest: usize,
+}
+
+impl PrefixStrategy {
+ fn is_match(&self, candidate: &Candidate) -> bool {
+ let path = candidate.path_prefix(self.longest);
+ for m in self.matcher.find_overlapping(path) {
+ if m.start == 0 {
+ return true;