diff options
author | Andrew Gallant <jamslam@gmail.com> | 2016-09-30 19:42:41 -0400 |
---|---|---|
committer | Andrew Gallant <jamslam@gmail.com> | 2016-09-30 19:42:41 -0400 |
commit | fdf24317ac4c34fc0a21efa0a775bc7d3663d498 (patch) | |
tree | a6e94a61439b0568bfb7bdb5d80e9a0138c48317 /src | |
parent | b9d5f22a4d20862dfbcbdfc81a07284719cc71c4 (diff) |
Move glob implementation to new crate.
It is isolated and complex enough that it deserves attention all on its
own. It's also eminently reusable.
Diffstat (limited to 'src')
-rw-r--r-- | src/gitignore.rs | 16 | ||||
-rw-r--r-- | src/glob.rs | 1168 | ||||
-rw-r--r-- | src/main.rs | 3 | ||||
-rw-r--r-- | src/types.rs | 20 |
4 files changed, 19 insertions, 1188 deletions
diff --git a/src/gitignore.rs b/src/gitignore.rs index bfc83846..6191f0b5 100644 --- a/src/gitignore.rs +++ b/src/gitignore.rs @@ -28,15 +28,15 @@ use std::fs::File; use std::io::{self, BufRead}; use std::path::{Path, PathBuf}; +use globset; use regex; -use glob; use pathutil::{is_file_name, strip_prefix}; /// Represents an error that can occur when parsing a gitignore file. #[derive(Debug)] pub enum Error { - Glob(glob::Error), + Glob(globset::Error), Regex(regex::Error), Io(io::Error), } @@ -61,8 +61,8 @@ impl fmt::Display for Error { } } -impl From<glob::Error> for Error { - fn from(err: glob::Error) -> Error { +impl From<globset::Error> for Error { + fn from(err: globset::Error) -> Error { Error::Glob(err) } } @@ -82,7 +82,7 @@ impl From<io::Error> for Error { /// Gitignore is a matcher for the glob patterns in a single gitignore file. #[derive(Clone, Debug)] pub struct Gitignore { - set: glob::Set, + set: globset::Set, root: PathBuf, patterns: Vec<Pattern>, num_ignores: u64, @@ -207,7 +207,7 @@ impl<'a> Match<'a> { /// GitignoreBuilder constructs a matcher for a single set of globs from a /// .gitignore file. pub struct GitignoreBuilder { - builder: glob::SetBuilder, + builder: globset::SetBuilder, root: PathBuf, patterns: Vec<Pattern>, } @@ -237,7 +237,7 @@ impl GitignoreBuilder { pub fn new<P: AsRef<Path>>(root: P) -> GitignoreBuilder { let root = strip_prefix("./", root.as_ref()).unwrap_or(root.as_ref()); GitignoreBuilder { - builder: glob::SetBuilder::new(), + builder: globset::SetBuilder::new(), root: root.to_path_buf(), patterns: vec![], } @@ -299,7 +299,7 @@ impl GitignoreBuilder { whitelist: false, only_dir: false, }; - let mut opts = glob::MatchOptions::default(); + let mut opts = globset::MatchOptions::default(); let has_slash = line.chars().any(|c| c == '/'); let is_absolute = line.chars().nth(0).unwrap() == '/'; if line.starts_with("\\!") || line.starts_with("\\#") { diff --git a/src/glob.rs b/src/glob.rs deleted file mode 100644 index 295474cc..00000000 --- a/src/glob.rs +++ /dev/null @@ -1,1168 +0,0 @@ -/*! -The glob module provides standard shell globbing, but is specifically -implemented by converting glob syntax to regular expressions. The reasoning is -two fold: - -1. The regex library is *really* fast. Regaining performance in a distinct - implementation of globbing is non-trivial. -2. Most crucially, a `RegexSet` can be used to match many globs simultaneously. - -This module is written with some amount of intention of eventually splitting it -out into its own separate crate, but I didn't quite have the energy for all -that rigamorole when I wrote this. In particular, it could be fast/good enough -to make its way into `glob` proper. -*/ - -// TODO(burntsushi): I'm pretty dismayed by the performance of regex sets -// here. For example, we do a first pass single-regex-of-all-globs filter -// before actually running the regex set. This turns out to be faster, -// especially in fresh checkouts of repos that don't have a lot of ignored -// files. It's not clear how hard it is to make the regex set faster. -// -// An alternative avenue is to stop doing "regex all the things." (Which, to -// be fair, is pretty fast---I just expected it to be faster.) We could do -// something clever using assumptions along the lines of "oh, most ignore -// patterns are either literals or are for ignoring file extensions." (Look -// at the .gitignore for the chromium repo---just about every pattern satisfies -// that assumption.) - -use std::borrow::Cow; -use std::collections::HashMap; -use std::error::Error as StdError; -use std::ffi::{OsStr, OsString}; -use std::fmt; -use std::hash; -use std::iter; -use std::path::Path; -use std::str; - -use fnv; -use regex; -use regex::bytes::Regex; - -use pathutil::file_name; - -lazy_static! { - static ref FILE_SEPARATORS: String = regex::quote(r"/\"); -} - -/// Represents an error that can occur when parsing a glob pattern. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum Error { - InvalidRecursive, - UnclosedClass, - InvalidRange(char, char), - UnopenedAlternates, - UnclosedAlternates, - NestedAlternates, -} - -impl StdError for Error { - fn description(&self) -> &str { - match *self { - Error::InvalidRecursive => { - "invalid use of **; must be one path component" - } - Error::UnclosedClass => { - "unclosed character class; missing ']'" - } - Error::InvalidRange(_, _) => { - "invalid character range" - } - Error::UnopenedAlternates => { - "unopened alternate group; missing '{' \ - (maybe escape '}' with '[}]'?)" - } - Error::UnclosedAlternates => { - "unclosed alternate group; missing '}' \ - (maybe escape '{' with '[{]'?)" - } - Error::NestedAlternates => { - "nested alternate groups are not allowed" - } - } - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - Error::InvalidRecursive - | Error::UnclosedClass - | Error::UnopenedAlternates - | Error::UnclosedAlternates - | Error::NestedAlternates => { - write!(f, "{}", self.description()) - } - Error::InvalidRange(s, e) => { - write!(f, "invalid range; '{}' > '{}'", s, e) - } - } - } -} - -/// SetYesNo represents a group of globs that can be matched together in a -/// single pass. SetYesNo can only determine whether a particular path matched -/// any pattern in the set. -#[derive(Clone, Debug)] -pub struct SetYesNo { - re: Regex, -} - -impl SetYesNo { - /// Returns true if and only if the given path matches at least one glob - /// in this set. - pub fn is_match<T: AsRef<Path>>(&self, path: T) -> bool { - self.re.is_match(&*path_bytes(path.as_ref())) - } - - fn new( - pats: &[(Pattern, MatchOptions)], - ) -> Result<SetYesNo, regex::Error> { - let mut joined = String::new(); - for &(ref p, ref o) in pats { - let part = format!("(?:{})", p.to_regex_with(o)); - if !joined.is_empty() { - joined.push('|'); - } - joined.push_str(&part); - } - Ok(SetYesNo { re: try!(Regex::new(&joined)) }) - } -} - -type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>; - -/// Set represents a group of globs that can be matched together in a single -/// pass. -#[derive(Clone, Debug)] -pub struct Set { - exts: HashMap<OsString, Vec<usize>, Fnv>, - literals: HashMap<Vec<u8>, Vec<usize>, Fnv>, - base_literals: HashMap<Vec<u8>, Vec<usize>, Fnv>, - base_prefixes: Vec<Vec<u8>>, - base_prefixes_map: Vec<usize>, - base_suffixes: Vec<Vec<u8>>, - base_suffixes_map: Vec<usize>, - base_regexes: Vec<Regex>, - base_regexes_map: Vec<usize>, - regexes: Vec<Regex>, - regexes_map: Vec<usize>, -} - -impl Set { - /// Returns the sequence number of every glob pattern that matches the - /// given path. - #[allow(dead_code)] - pub fn matches<T: AsRef<Path>>(&self, path: T) -> Vec<usize> { - let mut into = vec![]; - self.matches_into(path, &mut into); - into - } - - /// Adds the sequence number of every glob pattern that matches the given - /// path to the vec given. - pub fn matches_into<T: AsRef<Path>>( - &self, - path: T, - into: &mut Vec<usize>, - ) { - into.clear(); - let path = path.as_ref(); - let path_bytes = &*path_bytes(path); - let basename = file_name(path).map(|b| os_str_bytes(b)); - if !self.exts.is_empty() { - if let Some(ext) = path.extension() { - if let Some(matches) = self.exts.get(ext) { - into.extend(matches.as_slice()); - } - } - } - if !self.literals.is_empty() { - if let Some(matches) = self.literals.get(path_bytes) { - into.extend(matches.as_slice()); - } - } - if !self.base_literals.is_empty() { - if let Some(ref basename) = basename { - if let Some(matches) = self.base_literals.get(&**basename) { - into.extend(matches.as_slice()); - } - } - } - if !self.base_prefixes.is_empty() { - if let Some(ref basename) = basename { - let basename = &**basename; - for (i, pre) in self.base_prefixes.iter().enumerate() { - if pre.len() <= basename.len() && &**pre == &basename[0..pre.len()] { - into.push(self.base_prefixes_map[i]); - } - } - } - } - if !self.base_suffixes.is_empty() { - if let Some(ref basename) = basename { - let basename = &**basename; - for (i, suf) in self.base_suffixes.iter().enumerate() { - if suf.len() > basename.len() { - continue; - } - let (s, e) = (basename.len() - suf.len(), basename.len()); - if &**suf == &basename[s..e] { - into.push(self.base_suffixes_map[i]); - } - } - } - } - if let Some(ref basename) = basename { - for (i, re) in self.base_regexes.iter().enumerate() { - if re.is_match(&**basename) { - into.push(self.base_regexes_map[i]); - } - } - } - for (i, re) in self.regexes.iter().enumerate() { - if re.is_match(path_bytes) { - into.push(self.regexes_map[i]); - } - } - into.sort(); - } - - fn new(pats: &[(Pattern, MatchOptions)]) -> Result<Set, regex::Error> { - let fnv = Fnv::default(); - let mut exts = HashMap::with_hasher(fnv.clone()); - let mut literals = HashMap::with_hasher(fnv.clone()); - let mut base_literals = HashMap::with_hasher(fnv.clone()); - let (mut base_prefixes, mut base_prefixes_map) = (vec![], vec![]); - let (mut base_suffixes, mut base_suffixes_map) = (vec![], vec![]); - let (mut regexes, mut regexes_map) = (vec![], vec![]); - let (mut base_regexes, mut base_regexes_map) = (vec![], vec![]); - for (i, &(ref p, ref o)) in pats.iter().enumerate() { - if let Some(ext) = p.ext() { - exts.entry(ext).or_insert(vec![]).push(i); - } else if let Some(literal) = p.literal() { - literals.entry(literal.into_bytes()).or_insert(vec![]).push(i); - } else if let Some(literal) = p.base_literal() { - base_literals - .entry(literal.into_bytes()).or_insert(vec![]).push(i); - } else if let Some(literal) = p.base_literal_prefix() { - base_prefixes.push(literal.into_bytes()); - base_prefixes_map.push(i); - } else if let Some(literal) = p.base_literal_suffix() { - base_suffixes.push(literal.into_bytes()); - base_suffixes_map.push(i); - } else if p.is_only_basename() { - base_regexes.push(try!(Regex::new(&p.to_regex_with(o)))); - base_regexes_map.push(i); - } else { - regexes.push(try!(Regex::new(&p.to_regex_with(o)))); - regexes_map.push(i); - } - } - Ok(Set { - exts: exts, - literals: literals, - base_literals: base_literals, - base_prefixes: base_prefixes, - base_prefixes_map: base_prefixes_map, - base_suffixes: base_suffixes, - base_suffixes_map: base_suffixes_map, - base_regexes: base_regexes, - base_regexes_map: base_regexes_map, - regexes: regexes, - regexes_map: regexes_map, - }) - } -} - -/// SetBuilder builds a group of patterns that can be used to simultaneously -/// match a file path. -pub struct SetBuilder { - pats: Vec<(Pattern, MatchOptions)>, -} - -impl SetBuilder { - /// Create a new SetBuilder. A SetBuilder can be used to add new patterns. - /// Once all patterns have been added, `build` should be called to produce - /// a `Set`, which can then be used for matching. - pub fn new() -> SetBuilder { - SetBuilder { pats: vec![] } - } - - /// Builds a new matcher from all of the glob patterns added so far. - /// - /// Once a matcher is built, no new patterns can be added to it. - pub fn build(&self) -> Result<Set, regex::Error> { - Set::new(&self.pats) - } - - /// Like `build`, but returns a matcher that can only answer yes/no. - pub fn build_yesno(&self) -> Result<SetYesNo, regex::Error> { - SetYesNo::new(&self.pats) - } - - /// Add a new pattern to this set. - /// - /// If the pattern could not be parsed as a glob, then an error is - /// returned. - #[allow(dead_code)] - pub fn add(&mut self, pat: &str) -> Result<(), Error> { - self.add_with(pat, &MatchOptions::default()) - } - - /// Like add, but sets the match options for this particular pattern. - pub fn add_with( - &mut self, - pat: &str, - opts: &MatchOptions, - ) -> Result<(), Error> { - let parsed = try!(Pattern::new(pat)); - // if let Some(ext) = parsed.ext() { - // eprintln!("ext :: {:?} :: {:?}", ext, pat); - // } else if let Some(lit) = parsed.literal() { - // eprintln!("literal :: {:?} :: {:?}", lit, pat); - // } else if let Some(lit) = parsed.base_literal() { - // eprintln!("base_literal :: {:?} :: {:?}", lit, pat); - // } else if let Some(lit) = parsed.base_literal_prefix() { - // eprintln!("base_literal_prefix :: {:?} :: {:?}", lit, pat); - // } else if let Some(lit) = parsed.base_literal_suffix() { - // eprintln!("base_literal_suffix :: {:?} :: {:?}", lit, pat); - // } else if parsed.is_only_basename() { - // eprintln!("basename-regex :: {:?} :: {:?}", pat, parsed); - // } else { - // eprintln!("regex :: {:?} :: {:?}", pat, parsed); - // } - self.pats.push((parsed, opts.clone())); - Ok(()) - } -} - -/// Pattern represents a successfully parsed shell glob pattern. -/// -/// It cannot be used directly to match file paths, but it can be converted -/// to a regular expression string. -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub struct Pattern { - tokens: Vec<Token>, -} - -/// Options to control the matching semantics of a glob. The default value -/// has all options disabled. -#[derive(Clone, Debug, Default)] -pub struct MatchOptions { - /// When true, matching is done case insensitively. - pub case_insensitive: bool, - /// When true, neither `*` nor `?` match the current system's path - /// separator. - pub require_literal_separator: bool, -} - -#[derive(Clone, Debug, Eq, PartialEq)] -enum Token { - Literal(char), - Any, - ZeroOrMore, - RecursivePrefix, - RecursiveSuffix, - RecursiveZeroOrMore, - Class { - negated: bool, - ranges: Vec<(char, char)>, - }, - Alternates(Vec<Pattern>), -} - -impl Pattern { - /// Parse a shell glob pattern. - /// - /// If the pattern is not a valid glob, then an error is returned. - pub fn new(pat: &str) -> Result<Pattern, Error> { - let mut p = Parser { - stack: vec![Pattern::default()], - chars: pat.chars().peekable(), - prev: None, - cur: None, - }; - try!(p.parse()); - if p.stack.is_empty() { - Err(Error::UnopenedAlternates) - } else if p.stack.len() > 1 { - Err(Error::UnclosedAlternates) - } else { - Ok(p.stack.pop().unwrap()) - } - } - - /// Returns an extension if this pattern exclusively matches it. - pub fn ext(&self) -> Option<OsString> { - if self.tokens.len() <= 3 { - return None; - } - match self.tokens.get(0) { - Some(&Token::RecursivePrefix) => {} - _ => return None, - } - match self.tokens.get(1) { - Some(&Token::ZeroOrMore) => {} - _ => return None, - } - match self.tokens.get(2) { - Some(&Token::Literal(c)) if c == '.' => {} - _ => return None, - } - let mut lit = OsString::new(); - for t in self.tokens[3..].iter() { - match *t { - Token::Literal(c) if c == '/' || c == '\\' || c == '.' => { - return None; - } - Token::Literal(c) => lit.push(c.to_string()), - _ => return None, - } - } - Some(lit) - } - - /// Returns the pattern as a literal if and only if the pattern exclusiely - /// matches the basename of a file path *and* is a literal. - /// - /// The basic format of these patterns is `**/{literal}`, where `{literal}` - /// does not contain a path separator. - pub fn base_literal(&self) -> Option<String> { - match self.tokens.get(0) { - Some(&Token::RecursivePrefix) => {} - _ => return None, - } - let mut lit = String::new(); - for t in &self.tokens[1..] { - match *t { - Token::Literal(c) if c == '/' || c == '\\' => return None, - Token::Literal(c) => lit.push(c), - _ => return None, - } - } - Some(lit) - } - - /// Returns true if and only if this pattern only inspects the basename - /// of a path. - pub fn is_only_basename(&self) -> bool { - match self.tokens.get(0) { - Some(&Token::RecursivePrefix) => {} - _ => return false, - } - for t in &self.tokens[1..] { - match *t { - Token::Literal(c) if c == '/' || c == '\\' => return false, - Token::RecursivePrefix - | Token::RecursiveSuffix - | Token::RecursiveZeroOrMore => return false, - _ => {} - } - } - true - } - - /// Returns the pattern as a literal if and only if the pattern must match - /// an entire path exactly. - /// - /// The basic format of these patterns is `{literal}`. - pub fn literal(&self) -> Option<String> { - let mut lit = String::new(); - for t in &self.tokens { - match *t { - Token::Literal(c) => lit.push(c), - _ => return None, - } - } - Some(lit) - } - - /// Returns a basename literal prefix of this pattern. - pub fn base_literal_prefix(&self) -> Option<String> { - match self.tokens.get(0) { - Some(&Token::RecursivePrefix) => {} - _ => return None, - } - match self.tokens.last() { - Some(&Token::ZeroOrMore) => {} - _ => return None, - } - let mut lit = String::new(); - for t in &self.tokens[1..self.tokens.len()-1] { - match *t { - Token::Literal(c) if c == '/' || c == '\\' => return None, - Token::Literal(c) => lit.push(c), - _ => return None, - } - } - Some(lit) - } - - /// Returns a basename literal suffix of this pattern. - pub fn base_literal_suffix(&self) -> Option<String> { - match self.tokens.get(0) { - Some(&Token::RecursivePrefix) => {} - _ => return None, - } - match self.tokens.get(1) { - Some(&Token::ZeroOrMore) => {} - _ => return None, - } - let mut lit = String::new(); - for t in &self.tokens[2..] { - match *t { - Token::Literal(c) if c == '/' || c == '\\' => return None, - Token::Literal(c) => lit.push(c), - _ => return None, - } - } - Some(lit) - } - - /// Convert this pattern to a string that is guaranteed to be a valid - /// regular expression and will represent the matching semantics of this - /// glob pattern. This uses a default set of options. - #[allow(dead_code)] - pub fn to_regex(&self) -> String { - self.to_regex_with(&MatchOptions::default()) - } - - /// Convert this pattern to a string that is guaranteed to be a valid - /// regular expression and will represent the matching semantics of this - /// glob pattern and the options given. - pub fn to_regex_with(&self, options: &MatchOptions) -> String { - let mut re = String::new(); - re.push_str("(?-u)"); - if options.case_insensitive { - re.push_str("(?i)"); - } - re.push('^'); - // Special case. If the entire glob is just `**`, then it should match - // everything. - if self.tokens.len() == 1 && self.tokens[0] == Token::RecursivePrefix { - re.push_str(".*"); - re.push('$'); - return re; - } - self.tokens_to_regex(options, &self.tokens, &mut re); - re.push('$'); - re - } - - fn tokens_to_regex( - &self, - options: &MatchOptions, - tokens: &[Token], - re: &mut String, - ) { - let seps = &*FILE_SEPARATORS; - - for tok in tokens { - match *tok { - Token::Literal(c) => { - re.push_str(®ex::quote(&c.to_string())); - } - Token::Any => { - if options.require_literal_separator { - re.push_str(&format!("[^{}]", seps)); - } else { - re.push_str("."); - } - } - Token::ZeroOrMore => { - if options.require_literal_separator { - re.push_str(&format!("[^{}]*", seps)); - } else { - re.push_str(".*"); - } - } - Token::RecursivePrefix => { - re.push_str(&format!("(?:[{sep}]?|.*[{sep}])", sep=seps)); - } - Token::RecursiveSuffix => { - re.push_str(&format!("(?:[{sep}]?|[{sep}].*)", sep=seps)); - } - Token::RecursiveZeroOrMore => { - re.push_str(&format!("(?:[{sep}]|[{sep}].*[{sep}])", - sep=seps)); - } - Token::Class { negated, ref ranges } => { - re.push('['); - if negated { - re.push('^'); - } - for r in ranges { - if r.0 == r.1 { - // Not strictly necessary, but nicer to look at. - re.push_str(®ex::quote(&r.0.to_string())); - } else { - re.push_str(®ex::quote(&r.0.to_string())); - re.push('-'); - re.push_str(®ex::quote(&r.1.to_string())); - } - } - re.push(']'); - } - Token::Alternates(ref patterns) => { - let mut parts = vec![]; - for pat in patterns { - let mut altre = String::new(); - self.tokens_to_regex(options, &pat.tokens, &mut altre); - parts.push(altre); - } - re.push_str(&parts.join("|")); - } - } - } - } -} - -struct Parser<'a> { - stack: Vec<Pattern>, - chars: iter::Peekable<str::Chars<'a>>, - prev: Option<char>, - cur: Option<char>, -} - -impl<'a> Parser<'a> { - fn parse(&mut self) -> Result<(), Error> { - while let Some(c) = self.bump() { - match c { - '?' => try!(self.push_token(Token::Any)), - '*' => try!(self.parse_star()), - '[' => try!(self.parse_class()), - '{' => try!(self.push_alternate()), - '}' => try!(self.pop_alternate()), - ',' => try!(self.parse_comma()), - c => try!(self.push_token(Token::Literal(c))), - } - } - Ok(()) - } - - fn push_alternate(&mut self) -> Result<(), Error> { - if self.stack.len() > 1 { - return Err(Error::NestedAlternates); - } - Ok(self.stack.push(Pattern::default())) - } - - fn pop_alternate(&mut self) -> Result<(), Error> { - let mut alts = vec![]; - while self.stack.len() >= 2 { - alts.push(self.stack.pop().unwrap()); - } - self.push_token(Token::Alternates(alts)) - } - - fn push_token(&mut self, tok: Token) -> Result<(), Error> { - match self.stack.last_mut() { - None => Err(Error::UnopenedAlternates), - Some(ref mut pat) => Ok(pat.tokens.push(tok)), - } - } - - fn pop_token(&mut self) -> Result<Token, Error> { - match self.stack.last_mut() { - None => Err(Error::UnopenedAlternates), - Some(ref mut pat) => Ok(pat.tokens.pop().unwrap()), - } - } - - fn have_tokens(&self) -> Result<bool, Error> { - match self.stack.last() { - None => Err(Error::UnopenedAlternates), - Some(ref pat) => Ok(!pat.tokens.is_empty()), - } - } - - fn parse_comma(&mut self) -> Result<(), Error> { - // If we aren't inside a group alternation, then don't - // treat commas specially. Otherwise, we need to start - // a new alternate. - if self.stack.len() <= 1 { - self.push_token(Token::Literal(',')) - } else { - Ok(self.stack.push(Pattern::default())) - } - } - - fn parse_star(&mut self) -> Result<(), Error> { - let prev = self.prev; - if self.chars.peek() != Some(&'*') { - try!(self.push_token(Token::ZeroOrMore)); - return Ok(()); - } - assert!(self.bump() == Some('*')); - if !try!(self.have_tokens()) { - try!(self.push_token(Token::RecursivePrefix)); - let next = self.bump(); - if !next.is_none() && next != Some('/') { - return Err(Error::InvalidRecursive); - } - return Ok(()); - } - try!(self.pop_token()); - if prev != Some('/') { - if self.stack.len() <= 1 - || (prev != Some(',') && prev != Some('{')) { - return Err(Error::InvalidRecursive); - } - } - match self.chars.peek() { - None => { - assert!(self.bump().is_none()); - self.push_token(Token::RecursiveSuffix) - } - Some(&',') | Some(&'}') if self.stack.len() >= 2 => { - self.push_token(Token::RecursiveSuffix) - } - Some(&'/') => { - assert!(self.bump() == Some('/')); - self.push_token(Token::RecursiveZeroOrMore) - } - _ => Err(Error::InvalidRecursive), - } - } - - fn parse_class(&mut self) -> Result<(), Error> { - fn add_to_last_range( - r: &mut (char, char), - add: char, - ) -> Result<(), Error> { - r.1 = add; - if r.1 < r.0 { - Err(Error::InvalidRange(r.0, r.1)) - } else { - Ok(()) - } - } - let mut negated = false; - let mut ranges = vec![]; - if self.chars.peek() == Some(&'!') { - assert!(self.bump() == Some('!')); - negated = true; - } - let mut first = true; - let mut in_range = false; - loop { - let c = match self.bump() { - Some(c) => c, - // The only way to successfully break this loop is to observe - // a ']'. - None => return Err(Error::UnclosedClass), - }; - match c { - ']' => { - if first { - ranges.push((']', ']')); - } else { - break; - } - } - '-' => { - if first { - ranges.push(('-', '-')); - } else if in_range { - // invariant: in_range is only set when there is - // already at least one character seen. - let r = ranges.last_mut().unwrap(); - try!(add_to_last_range(r, '-')); - in_range = false; - } else { - assert!(!ranges.is_empty()); - in_range = true; - } - } - c => { - if in_range { - // invariant: in_range is only set when there is - // already at least one character seen. - try!(add_to_last_range(ranges.last_mut().unwrap(), c)); - } else { - ranges.push((c, c)); - } - in_range = false; - } - } - first = false; - } - if in_range { - // Means that the last character in the class was a '-', so add - // it as a literal. - ranges.push(('-', '-')); - } - self.push_token(Token::Class { - negated: negated, - ranges: ranges, - }) - } - - fn bump(&mut self) -> Option<char> { - self.prev = self.cur; - self.cur = self.chars.next(); - self.cur - } -} - -fn path_bytes(path: &Path) -> Cow<[u8]> { - os_str_bytes(path.as_os_str()) -} - -#[cfg(unix)] -fn os_str_bytes(s: &OsStr) -> Cow<[u8]> { - use std::os::unix::ffi::OsStrExt; - Cow::Borrowed(s.as_bytes()) -} - -#[cfg(not(unix))] -fn os_str_bytes(s: &OsStr) -> Cow<[u8]> { - // TODO(burntsushi): On Windows, OS strings are probably UTF-16, so even - // if we could get at the raw bytes, they wouldn't be useful. We *must* - // convert to UTF-8 before doing path matching. Unfortunate, but necessary. - match s.to_string_lossy() { - Cow::Owned(s) => Cow::Owned(s.into_bytes()), - Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()), - } -} - -#[cfg(test)] -mod tests { - use std::path::Path; - - use regex::bytes::Regex; - - use super::{Error, Pattern, MatchOptions, Set, SetBuilder, Token}; - use super::Token::*; - - macro_rules! syntax { - ($name:ident, $pat:expr, $tokens:expr) => { - #[test] - fn $name() { - let pat = Pattern::new($pat).unwrap(); - assert_eq!($tokens, pat.tokens); - } - } - } - - macro_rules! syntaxerr { - ($name:ident, $pat:expr, $err:expr) => { - #[test] - fn $name() { - let err = Pattern::new($pat).unwrap_err(); - assert_eq!($err, err); - } - } - } - - macro_rules! toregex { - ($name:ident, $pat:expr, $re:expr) => { - toregex!($name, $pat, $re, MatchOptions::default()); - }; - ($name:ident, $pat:expr, $re:expr, $options:expr) => { - #[test] - fn $name() { - let pat = Pattern::new($pat).unwrap(); - assert_eq!( - format!("(?-u){}", $re), pat.to_regex_with(&$options)); - } - }; - } - - macro_rules! matches { - ($name:ident, $pat:expr, $path:expr) => { - matches!($name, $pat, $path, MatchOptions::default()); - }; - ($name:ident, $pat:expr, $path:expr, $options:expr) => { - #[test] - fn $name() { - let pat = Pattern::new($pat).unwrap(); - let path = &Path::new($path).to_str().unwrap(); - let re = Regex::new(&pat.to_regex_with(&$options)).unwrap(); - assert!(re.is_match(path.as_bytes())); - } - }; - } - - macro_rules! nmatches { - ($name:ident, $pat:expr, $path:expr) => { - nmatches!($name, $pat, $path, MatchOptions::default()); - }; - ($name:ident, $pat:expr, $path:expr, $options:expr) => { - #[test] - fn $name() { - let pat = Pattern::new($pat).unwrap(); - let path = &Path::new($path).to_str().unwrap(); - let re = Regex::new(&pat.to_regex_with(&$options)).unwrap(); - assert!(!re.is_match(path.as_bytes())); - } - }; - } - - macro_rules! ext { - ($name:ident, $pat:expr, $ext:expr) => { - #[test] - fn $name() { - let pat = Pattern::new($pat).unw |