summaryrefslogtreecommitdiffstats
path: root/globset/src/pattern.rs
diff options
context:
space:
mode:
Diffstat (limited to 'globset/src/pattern.rs')
-rw-r--r--globset/src/pattern.rs1379
1 files changed, 1379 insertions, 0 deletions
diff --git a/globset/src/pattern.rs b/globset/src/pattern.rs
new file mode 100644
index 00000000..1eff726a
--- /dev/null
+++ b/globset/src/pattern.rs
@@ -0,0 +1,1379 @@
+use std::ffi::{OsStr, OsString};
+use std::fmt;
+use std::iter;
+use std::ops::{Deref, DerefMut};
+use std::path::Path;
+use std::str;
+
+use regex;
+use regex::bytes::Regex;
+
+use {Error, FILE_SEPARATORS, new_regex};
+use pathutil::path_bytes;
+
+/// Describes a matching strategy for a particular pattern.
+///
+/// This provides a way to more quickly determine whether a pattern matches
+/// a particular file path in a way that scales with a large number of
+/// patterns. For example, if many patterns are of the form `*.ext`, then it's
+/// possible to test whether any of those patterns matches by looking up a
+/// file path's extension in a hash table.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum MatchStrategy {
+ /// A pattern matches if and only if the entire file path matches this
+ /// literal string.
+ Literal(String),
+ /// A pattern matches if and only if the file path's basename matches this
+ /// literal string.
+ BasenameLiteral(String),
+ /// A pattern matches if and only if the file path's extension matches this
+ /// literal string.
+ Extension(OsString),
+ /// A pattern matches if and only if this prefix literal is a prefix of the
+ /// candidate file path.
+ Prefix(String),
+ /// A pattern matches if and only if this prefix literal is a prefix of the
+ /// candidate file path.
+ ///
+ /// An exception: if `component` is true, then `suffix` must appear at the
+ /// beginning of a file path or immediately following a `/`.
+ Suffix {
+ /// The actual suffix.
+ suffix: String,
+ /// Whether this must start at the beginning of a path component.
+ component: bool,
+ },
+ /// A pattern matches only if the given extension matches the file path's
+ /// extension. Note that this is a necessary but NOT sufficient criterion.
+ /// Namely, if the extension matches, then a full regex search is still
+ /// required.
+ RequiredExtension(OsString),
+ /// A regex needs to be used for matching.
+ Regex,
+}
+
+impl MatchStrategy {
+ /// Returns a matching strategy for the given pattern.
+ pub fn new(pat: &Pattern) -> MatchStrategy {
+ if let Some(lit) = pat.basename_literal() {
+ MatchStrategy::BasenameLiteral(lit)
+ } else if let Some(lit) = pat.literal() {
+ MatchStrategy::Literal(lit)
+ } else if let Some(ext) = pat.ext() {
+ MatchStrategy::Extension(ext)
+ } else if let Some(prefix) = pat.prefix() {
+ MatchStrategy::Prefix(prefix)
+ } else if let Some((suffix, component)) = pat.suffix() {
+ MatchStrategy::Suffix { suffix: suffix, component: component }
+ } else if let Some(ext) = pat.required_ext() {
+ MatchStrategy::RequiredExtension(ext)
+ } else {
+ MatchStrategy::Regex
+ }
+ }
+}
+
+/// Pattern represents a successfully parsed shell glob pattern.
+///
+/// It cannot be used directly to match file paths, but it can be converted
+/// to a regular expression string.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Pattern {
+ glob: String,
+ re: String,
+ opts: PatternOptions,
+ tokens: Tokens,
+}
+
+impl fmt::Display for Pattern {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ self.glob.fmt(f)
+ }
+}
+
+/// A matcher for a single pattern.
+#[derive(Clone, Debug)]
+pub struct PatternMatcher {
+ /// The underlying pattern.
+ pat: Pattern,
+ /// The pattern, as a compiled regex.
+ re: Regex,
+}
+
+impl PatternMatcher {
+ /// Tests whether the given path matches this pattern or not.
+ pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
+ self.re.is_match(&*path_bytes(path.as_ref()))
+ }
+}
+
+/// A strategic matcher for a single pattern.
+#[cfg(test)]
+#[derive(Clone, Debug)]
+struct PatternStrategic {
+ /// The match strategy to use.
+ strategy: MatchStrategy,
+ /// The underlying pattern.
+ pat: Pattern,
+ /// The pattern, as a compiled regex.
+ re: Regex,
+}
+
+#[cfg(test)]
+impl PatternStrategic {
+ /// Tests whether the given path matches this pattern or not.
+ pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
+ use pathutil::file_name_ext;
+
+ let cow_path = path_bytes(path.as_ref());
+ let byte_path = &*cow_path;
+
+ match self.strategy {
+ MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path,
+ MatchStrategy::BasenameLiteral(ref lit) => {
+ let lit = OsStr::new(lit);
+ path.as_ref().file_name().map(|n| n == lit).unwrap_or(false)
+ }
+ MatchStrategy::Extension(ref ext) => {
+ path.as_ref().file_name()
+ .and_then(file_name_ext)
+ .map(|got| got == ext)
+ .unwrap_or(false)
+ }
+ MatchStrategy::Prefix(ref pre) => {
+ starts_with(pre.as_bytes(), byte_path)
+ }
+ MatchStrategy::Suffix { ref suffix, component } => {
+ if component && byte_path == &suffix.as_bytes()[1..] {
+ return true;
+ }
+ ends_with(suffix.as_bytes(), byte_path)
+ }
+ MatchStrategy::RequiredExtension(ref ext) => {
+ path.as_ref().file_name()
+ .and_then(file_name_ext)
+ .map(|got| got == ext && self.re.is_match(byte_path))
+ .unwrap_or(false)
+ }
+ MatchStrategy::Regex => self.re.is_match(byte_path),
+ }
+ }
+}
+
+/// A builder for a pattern.
+///
+/// This builder enables configuring the match semantics of a pattern. For
+/// example, one can make matching case insensitive.
+///
+/// The lifetime `'a` refers to the lifetime of the pattern string.
+#[derive(Clone, Debug)]
+pub struct PatternBuilder<'a> {
+ /// The glob pattern to compile.
+ glob: &'a str,
+ /// Options for the pattern.
+ opts: PatternOptions,
+}
+
+#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
+struct PatternOptions {
+ /// Whether to match case insensitively.
+ case_insensitive: bool,
+ /// Whether to require a literal separator to match a separator in a file
+ /// path. e.g., when enabled, `*` won't match `/`.
+ literal_separator: bool,
+}
+
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
+struct Tokens(Vec<Token>);
+
+impl Deref for Tokens {
+ type Target = Vec<Token>;
+ fn deref(&self) -> &Vec<Token> { &self.0 }
+}
+
+impl DerefMut for Tokens {
+ fn deref_mut(&mut self) -> &mut Vec<Token> { &mut self.0 }
+}
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+enum Token {
+ Literal(char),
+ Any,
+ ZeroOrMore,
+ RecursivePrefix,
+ RecursiveSuffix,
+ RecursiveZeroOrMore,
+ Class {
+ negated: bool,
+ ranges: Vec<(char, char)>,
+ },
+ Alternates(Vec<Tokens>),
+}
+
+impl Pattern {
+ /// Builds a new pattern with default options.
+ pub fn new(glob: &str) -> Result<Pattern, Error> {
+ PatternBuilder::new(glob).build()
+ }
+
+ /// Returns a matcher for this pattern.
+ pub fn compile_matcher(&self) -> PatternMatcher {
+ let re = new_regex(&self.re)
+ .expect("regex compilation shouldn't fail");
+ PatternMatcher {
+ pat: self.clone(),
+ re: re,
+ }
+ }
+
+ /// Returns a strategic matcher.
+ ///
+ /// This isn't exposed because it's not clear whether it's actually
+ /// faster than just running a regex for a *single* pattern. If it
+ /// is faster, then PatternMatcher should do it automatically.
+ #[cfg(test)]
+ fn compile_strategic_matcher(&self) -> PatternStrategic {
+ let strategy = MatchStrategy::new(self);
+ let re = new_regex(&self.re)
+ .expect("regex compilation shouldn't fail");
+ PatternStrategic {
+ strategy: strategy,
+ pat: self.clone(),
+ re: re,
+ }
+ }
+
+ /// Returns the original glob pattern used to build this pattern.
+ pub fn glob(&self) -> &str {
+ &self.glob
+ }
+
+ /// Returns the regular expression string for this glob.
+ pub fn regex(&self) -> &str {
+ &self.re
+ }
+
+ /// Returns true if and only if this pattern only inspects the basename
+ /// of a path.
+ pub fn is_only_basename(&self) -> bool {
+ match self.tokens.get(0) {
+ Some(&Token::RecursivePrefix) => {}
+ _ => return false,
+ }
+ for t in &self.tokens[1..] {
+ match *t {
+ Token::Literal(c) if c == '/' || c == '\\' => return false,
+ Token::RecursivePrefix
+ | Token::RecursiveSuffix
+ | Token::RecursiveZeroOrMore => return false,
+ _ => {}
+ }
+ }
+ true
+ }
+
+ /// Returns the pattern as a literal if and only if the pattern must match
+ /// an entire path exactly.
+ ///
+ /// The basic format of these patterns is `{literal}`.
+ pub fn literal(&self) -> Option<String> {
+ if self.opts.case_insensitive {
+ return None;
+ }
+ let mut lit = String::new();
+ for t in &*self.tokens {
+ match *t {
+ Token::Literal(c) => lit.push(c),
+ _ => return None,
+ }
+ }
+ if lit.is_empty() {
+ None
+ } else {
+ Some(lit)
+ }
+ }
+
+ /// Returns an extension if this pattern matches a file path if and only
+ /// if the file path has the extension returned.
+ ///
+ /// Note that this extension returned differs from the extension that
+ /// std::path::Path::extension returns. Namely, this extension includes
+ /// the '.'. Also, paths like `.rs` are considered to have an extension
+ /// of `.rs`.
+ pub fn ext(&self) -> Option<OsString> {
+ if self.opts.case_insensitive {
+ return None;
+ }
+ let start = match self.tokens.get(0) {
+ Some(&Token::RecursivePrefix) => 1,
+ Some(_) => 0,
+ _ => return None,
+ };
+ match self.tokens.get(start) {
+ Some(&Token::ZeroOrMore) => {
+ // If there was no recursive prefix, then we only permit
+ // `*` if `*` can match a `/`. For example, if `*` can't
+ // match `/`, then `*.c` doesn't match `foo/bar.c`.
+ if start == 0 && self.opts.literal_separator {
+ return None;
+ }
+ }
+ _ => return None,
+ }
+ match self.tokens.get(start + 1) {
+ Some(&Token::Literal('.')) => {}
+ _ => return None,
+ }
+ let mut lit = OsStr::new(".").to_os_string();
+ for t in self.tokens[start + 2..].iter() {
+ match *t {
+ Token::Literal('.') | Token::Literal('/') => return None,
+ Token::Literal(c) => lit.push(c.to_string()),
+ _ => return None,
+ }
+ }
+ if lit.is_empty() {
+ None
+ } else {
+ Some(lit)
+ }
+ }
+
+ /// This is like `ext`, but returns an extension even if it isn't sufficent
+ /// to imply a match. Namely, if an extension is returned, then it is
+ /// necessary but not sufficient for a match.
+ pub fn required_ext(&self) -> Option<OsString> {
+ if self.opts.case_insensitive {
+ return None;
+ }
+ // We don't care at all about the beginning of this pattern. All we
+ // need to check for is if it ends with a literal of the form `.ext`.
+ let mut ext: Vec<char> = vec![]; // built in reverse
+ for t in self.tokens.iter().rev() {
+ match *t {
+ Token::Literal('/') => return None,
+ Token::Literal(c) => {
+ ext.push(c);
+ if c == '.' {
+ break;
+ }
+ }
+ _ => return None,
+ }
+ }
+ if ext.last() != Some(&'.') {
+ None
+ } else {
+ ext.reverse();
+ Some(OsString::from(ext.into_iter().collect::<String>()))
+ }
+ }
+
+ /// Returns a literal prefix of this pattern if the entire pattern matches
+ /// if the literal prefix matches.
+ pub fn prefix(&self) -> Option<String> {
+ if self.opts.case_insensitive {
+ return None;
+ }
+ let end = match self.tokens.last() {
+ Some(&Token::ZeroOrMore) => {
+ if self.opts.literal_separator {
+ // If a trailing `*` can't match a `/`, then we can't
+ // assume a match of the prefix corresponds to a match
+ // of the overall pattern. e.g., `foo/*` with
+ // `literal_separator` enabled matches `foo/bar` but not
+ // `foo/bar/baz`, even though `foo/bar/baz` has a `foo/`
+ // literal prefix.
+ return None;
+ }
+ self.tokens.len() - 1
+ }
+ _ => self.tokens.len(),
+ };
+ let mut lit = String::new();
+ for t in &self.tokens[0..end] {
+ match *t {
+ Token::Literal(c) => lit.push(c),
+ _ => return None,
+ }
+ }
+ if lit.is_empty() {
+ None
+ } else {
+ Some(lit)
+ }
+ }
+
+ /// Returns a literal suffix of this pattern if the entire pattern matches
+ /// if the literal suffix matches.
+ ///
+ /// If a literal suffix is returned and it must match either the entire
+ /// file path or be preceded by a `/`, then also return true. This happens
+ /// with a pattern like `**/foo/bar`. Namely, this pattern matches
+ /// `foo/bar` and `baz/foo/bar`, but not `foofoo/bar`. In this case, the
+ /// suffix returned is `/foo/bar` (but should match the entire path
+ /// `foo/bar`).
+ ///
+ /// When this returns true, the suffix literal is guaranteed to start with
+ /// a `/`.
+ pub fn suffix(&self) -> Option<(String, bool)> {
+ if self.opts.case_insensitive {
+ return None;
+ }
+ let mut lit = String::new();
+ let (start, entire) = match self.tokens.get(0) {
+ Some(&Token::RecursivePrefix) => {
+ // We only care if this follows a path component if the next
+ // token is a literal.
+ if let Some(&Token::Literal(_)) = self.tokens.get(1) {
+ lit.push('/');
+ (1, true)
+ } else {
+ (1, false)
+ }
+ }
+ _ => (0, false),
+ };
+ let start = match self.tokens.get(start) {
+ Some(&Token::ZeroOrMore) => {
+ // If literal_separator is enabled, then a `*` can't
+ // necessarily match everything, so reporting a suffix match
+ // as a match of the pattern would be a false positive.
+ if self.opts.literal_separator {
+ return None;
+ }
+ start + 1
+ }
+ _ => start,
+ };
+ for t in &self.tokens[start..] {
+ match *t {
+ Token::Literal(c) => lit.push(c),
+ _ => return None,
+ }
+ }
+ if lit.is_empty() || lit == "/" {
+ None
+ } else {
+ Some((lit, entire))
+ }
+ }
+
+ /// If this pattern only needs to inspect the basename of a file path,
+ /// then the tokens corresponding to only the basename match are returned.
+ ///
+ /// For example, given a pattern of `**/*.foo`, only the tokens
+ /// corresponding to `*.foo` are returned.
+ ///
+ /// Note that this will return None if any match of the basename tokens
+ /// doesn't correspond to a match of the entire pattern. For example, the
+ /// glob `foo` only matches when a file path has a basename of `foo`, but
+ /// doesn't *always* match when a file path has a basename of `foo`. e.g.,
+ /// `foo` doesn't match `abc/foo`.
+ fn basename_tokens(&self) -> Option<&[Token]> {
+ if self.opts.case_insensitive {
+ return None;
+ }
+ let start = match self.tokens.get(0) {
+ Some(&Token::RecursivePrefix) => 1,
+ _ => {
+ // With nothing to gobble up the parent portion of a path,
+ // we can't assume that matching on only the basename is
+ // correct.
+ return None;
+ }
+ };
+ if self.tokens[start..].is_empty() {
+ return None;
+ }
+ for t in &self.tokens[start..] {
+ match *t {
+ Token::Literal('/') => return None,
+ Token::Literal(_) => {} // OK
+ Token::Any | Token::ZeroOrMore => {
+ if !self.opts.literal_separator {
+ // In this case, `*` and `?` can match a path
+ // separator, which means this could reach outside
+ // the basename.
+ return None;
+ }
+ }
+ Token::RecursivePrefix
+ | Token::RecursiveSuffix
+ | Token::RecursiveZeroOrMore => {
+ return None;
+ }
+ Token::Class{..} | Token::Alternates(..) => {
+ // We *could* be a little smarter here, but either one
+ // of these is going to prevent our literal optimizations
+ // anyway, so give up.
+ return None;
+ }
+ }
+ }
+ Some(&self.tokens[start..])
+ }
+
+ /// Returns the pattern as a literal if and only if the pattern exclusiely
+ /// matches the basename of a file path *and* is a literal.
+ ///
+ /// The basic format of these patterns is `**/{literal}`, where `{literal}`
+ /// does not contain a path separator.
+ pub fn basename_literal(&self) -> Option<String> {
+ self.base_literal()
+ }
+
+ /// Returns the pattern as a literal if and only if the pattern exclusiely
+ /// matches the basename of a file path *and* is a literal.
+ ///
+ /// The basic format of these patterns is `**/{literal}`, where `{literal}`
+ /// does not contain a path separator.
+ pub fn base_literal(&self) -> Option<String> {
+ let tokens = match self.basename_tokens() {
+ None => return None,
+ Some(tokens) => tokens,
+ };
+ let mut lit = String::new();
+ for t in tokens {
+ match *t {
+ Token::Literal(c) => lit.push(c),
+ _ => return None,
+ }
+ }
+ Some(lit)
+ }
+
+ /// Returns a literal prefix of this pattern if and only if the entire
+ /// pattern matches if the literal prefix matches.
+ pub fn literal_prefix(&self) -> Option<String> {
+ match self.tokens.last() {
+ Some(&Token::ZeroOrMore) => {}
+ _ => return None,
+ }
+ let mut lit = String::new();
+ for t in &self.tokens[0..self.tokens.len()-1] {
+ match *t {
+ Token::Literal(c) => lit.push(c),
+ _ => return None,
+ }
+ }
+ Some(lit)
+ }
+
+ /// Returns a literal suffix of this pattern if and only if the entire
+ /// pattern matches if the literal suffix matches.
+ pub fn literal_suffix(&self) -> Option<String> {
+ match self.tokens.get(0) {
+ Some(&Token::RecursivePrefix) => {}
+ _ => return None,
+ }
+ let start =
+ match self.tokens.get(1) {
+ Some(&Token::ZeroOrMore) => 2,
+ _ => 1,
+ };
+ let mut lit = String::new();
+ for t in &self.tokens[start..] {
+ match *t {
+ Token::Literal(c) => lit.push(c),
+ _ => return None,
+ }
+ }
+ Some(lit)
+ }
+
+ /// Returns a basename literal prefix of this pattern.
+ pub fn base_literal_prefix(&self) -> Option<String> {
+ match self.tokens.get(0) {
+ Some(&Token::RecursivePrefix) => {}
+ _ => return None,
+ }
+ match self.tokens.last() {
+ Some(&Token::ZeroOrMore) => {}
+ _ => return None,
+ }
+ let mut lit = String::new();
+ for t in &self.tokens[1..self.tokens.len()-1] {
+ match *t {
+ Token::Literal(c) if c == '/' || c == '\\' => return None,
+ Token::Literal(c) => lit.push(c),
+ _ => return None,
+ }
+ }
+ Some(lit)
+ }
+
+ /// Returns a basename literal suffix of this pattern.
+ pub fn base_literal_suffix(&self) -> Option<String> {
+ match self.tokens.get(0) {
+ Some(&Token::RecursivePrefix) => {}
+ _ => return None,
+ }
+ match self.tokens.get(1) {
+ Some(&Token::ZeroOrMore) => {}
+ _ => return None,
+ }
+ let mut lit = String::new();
+ for t in &self.tokens[2..] {
+ match *t {
+ Token::Literal(c) if c == '/' || c == '\\' => return None,
+ Token::Literal(c) => lit.push(c),
+ _ => return None,
+ }
+ }
+ Some(lit)
+ }
+}
+
+impl<'a> PatternBuilder<'a> {
+ /// Create a new builder for the pattern given.
+ ///
+ /// The pattern is not compiled until `build` is called.
+ pub fn new(glob: &'a str) -> PatternBuilder<'a> {
+ PatternBuilder {
+ glob: glob,
+ opts: PatternOptions::default(),
+ }
+ }
+
+ /// Parses and builds the pattern.
+ pub fn build(&self) -> Result<Pattern, Error> {
+ let mut p = Parser {
+ stack: vec![Tokens::default()],
+ chars: self.glob.chars().peekable(),
+ prev: None,
+ cur: None,
+ };
+ try!(p.parse());
+ if p.stack.is_empty() {
+ Err(Error::UnopenedAlternates)
+ } else if p.stack.len() > 1 {
+ Err(Error::UnclosedAlternates)
+ } else {
+ let tokens = p.stack.pop().unwrap();
+ Ok(Pattern {
+ glob: self.glob.to_string(),
+ re: tokens.to_regex_with(&self.opts),
+ opts: self.opts,
+ tokens: tokens,
+ })
+ }
+ }
+
+ /// Toggle whether the pattern matches case insensitively or not.
+ ///
+ /// This is disabled by default.
+ pub fn case_insensitive(&mut self, yes: bool) -> &mut PatternBuilder<'a> {
+ self.opts.case_insensitive = yes;
+ self
+ }
+
+ /// Toggle whether a literal `/` is required to match a path separator.
+ pub fn literal_separator(&mut self, yes: bool) -> &mut PatternBuilder<'a> {
+ self.opts.literal_separator = yes;
+ self
+ }
+}
+
+impl Tokens {
+ /// Convert this pattern to a string that is guaranteed to be a valid
+ /// regular expression and will represent the matching semantics of this
+ /// glob pattern and the options given.
+ fn to_regex_with(&self, options: &PatternOptions) -> String {
+ let mut re = String::new();
+ re.push_str("(?-u)");
+ if options.case_insensitive {
+ re.push_str("(?i)");
+ }
+ re.push('^');
+ // Special case. If the entire glob is just `**`, then it should match
+ // everything.
+ if self.len() == 1 && self[0] == Token::RecursivePrefix {
+ re.push_str(".*");
+ re.push('$');
+ return re;
+ }
+ self.tokens_to_regex(options, &self, &mut re);
+ re.push('$');
+ re
+ }
+
+
+ fn tokens_to_regex(
+ &self,
+ options: &PatternOptions,
+ tokens: &[Token],
+ re: &mut String,
+ ) {
+ let seps = &*FILE_SEPARATORS;
+
+ for tok in tokens {
+ match *tok {
+ Token::Literal(c) => {
+ re.push_str(&regex::quote(&c.to_string()));
+ }
+ Token::Any => {
+ if options.literal_separator {
+ re.push_str(&format!("[^{}]", seps));
+ } else {
+ re.push_str(".");
+ }
+ }
+ Token::ZeroOrMore => {
+ if options.literal_separator {
+ re.push_str(&format!("[^{}]*", seps));
+ } else {
+ re.push_str(".*");
+ }
+ }
+ Token::RecursivePrefix => {
+ re.push_str(&format!("(?:[{sep}]?|.*[{sep}])", sep=seps));
+ }
+ Token::RecursiveSuffix => {
+ re.push_str(&format!("(?:[{sep}]?|[{sep}].*)", sep=seps));
+ }
+ Token::RecursiveZeroOrMore => {
+ re.push_str(&format!("(?:[{sep}]|[{sep}].*[{sep}])",
+ sep=seps));
+ }
+ Token::Class { negated, ref ranges } => {
+ re.push('[');
+ if negated {
+ re.push('^');
+ }
+ for r in ranges {
+ if r.0 == r.1 {
+ // Not strictly necessary, but nicer to look at.
+ re.push_str(&regex::quote(&r.0.to_string()));
+ } else {
+ re.push_str(&regex::quote(&r.0.to_string()));
+ re.push('-');
+ re.push_str(&regex::quote(&r.1.to_string()));
+ }
+ }
+ re.push(']');
+ }
+ Token::Alternates(ref patterns) => {
+ let mut parts = vec![];
+ for pat in patterns {
+ let mut altre = String::new();
+ self.tokens_to_regex(options, &pat, &mut altre);
+ parts.push(altre);
+ }
+ re.push_str(&parts.join("|"));
+ }
+ }
+ }
+ }
+}
+
+struct Parser<'a> {
+ stack: Vec<Tokens>,
+ chars: iter::Peekable<str::Chars<'a>>,
+ prev: Option<char>,
+ cur: Option<char>,
+}
+
+impl<'a> Parser<'a> {
+ fn parse(&mut self) -> Result<(), Error> {
+ while let Some(c) = self.bump() {
+ match c {
+ '?' => try!(self.push_token(Token::Any)),
+ '*' => try!(self.parse_star()),
+ '[' => try!(self.parse_class()),
+ '{' => try!(self.push_alternate()),
+ '}' => try!(self.pop_alternate()),
+ ',' => try!(self.parse_comma()),
+ c => try!(self.push_token(Token::Literal(c))),
+ }
+ }
+ Ok(())
+ }
+
+ fn push_alternate(&mut self) -> Result<(), Error> {
+ if self.stack.len() > 1 {
+ return Err(Error::NestedAlternates);
+ }
+ Ok(self.stack.push(Tokens::default()))
+ }
+
+ fn pop_alternate(&mut self) -> Result<(), Error> {
+ let mut alts = vec![];
+ while self.stack.len() >= 2 {
+ alts.push(self.stack.pop().unwrap());
+ }
+ self.push_token(Token::Alternates(alts))
+ }
+
+ fn push_token(&mut self, tok: Token) -> Result<(), Error> {
+ match self.stack.last_mut() {
+ None => Err(Error::UnopenedAlternates),
+ Some(ref mut pat) => Ok(pat.push(tok)),
+ }
+ }
+
+ fn pop_token(&mut self) -> Result<Token, Error> {
+ match self.stack.last_mut() {
+ None => Err(Error::UnopenedAlternates),
+ Some(ref mut pat) => Ok(pat.pop().unwrap()),
+ }
+ }
+
+ fn have_tokens(&self) -> Result<bool, Error> {
+ match self.stack.last() {
+ None => Err(Error::UnopenedAlternates),
+ Some(ref pat) => Ok(!pat.is_empty()),
+ }
+ }
+
+ fn parse_comma(&mut self) -> Result<(), Error> {
+ // If we aren't inside a group alternation, then don't
+ // treat commas specially. Otherwise, we need to start
+ // a new alternate.
+ if self.stack.len() <= 1 {
+ self.push_token(Token::Literal(','))
+ } else {
+ Ok(self.stack.push(Tokens::default()))
+ }
+ }
+
+ fn parse_star(&mut self) -> Result<(), Error> {
+ let prev = self.prev;
+ if self.chars.peek() != Some(&'*') {
+ try!(self.push_token(Token::ZeroOrMore));
+ return Ok(());
+ }
+ assert!(self.bump() == Some('*'));
+ if !try!(self.have_tokens()) {
+ try!(self.push_token(Token::RecursivePrefix));
+ let next = self.bump();
+ if !next.is_none() && next != Some('/') {
+ return Err(Error::InvalidRecursive);
+ }
+ return Ok(());
+ }
+ try!(self.pop_token());
+ if prev != Some('/') {
+ if self.stack.len() <= 1
+ || (prev != Some(',') && prev != Some('{')) {
+ return Err(Error::InvalidRecursive);
+ }
+ }
+ match self.chars.peek() {
+ None => {
+ assert!(self.bump().is_none());
+ self.push_token(Token::RecursiveSuffix)
+ }
+ Some(&',') | Some(&'}') if self.stack.len() >= 2 => {
+ self.push_token(Token::RecursiveSuffix)
+ }
+ Some(&'/') => {
+ assert!(self.bump() == Some('/'));
+ self.push_token(Token::RecursiveZeroOrMore)
+ }
+ _ => Err(Error::InvalidRecursive),
+ }
+ }
+
+ fn parse_class(&mut self) -> Result<(), Error> {
+ fn add_to_last_range(
+ r: &mut (char, char),
+ add: char,
+ ) -> Result<(), Error> {
+ r.1 = add;
+ if r.1 < r.0 {
+ Err(Error::InvalidRange(r.0, r.1))
+ } else {
+ Ok(())
+ }
+ }
+ let mut negated = false;
+ let mut ranges = vec![];
+ if self.chars.peek() == Some(&'!') {
+ assert!(self.bump() == Some('!'));
+ negated = true;
+ }
+ let mut first = true;
+ let mut in_range = false;
+ loop {
+ let c = match self.bump() {
+ Some(c) => c,
+ // The only way to successfully break this loop is to observe
+ // a ']'.
+ None => return Err(Error::UnclosedClass),
+ };
+ match c {
+ ']' => {
+ if first {
+ ranges.push((']', ']'));
+ } else {
+ break;
+ }
+ }
+ '-' => {
+ if first {
+ ranges.push(('-', '-'));
+ } else if in_range {
+ // invariant: in_range is only set when there is
+ // already at least one character seen.
+ let r = ranges.last_mut().unwrap();
+ try!(add_to_last_range(r, '-'));
+ in_range = false;
+ } else {
+ assert!(!ranges.is_empty());
+ in_range = true;
+ }
+ }
+ c => {
+ if in_range {
+ // invariant: in_range is only set when there is
+ // already at least one character seen.
+ try!(add_to_last_range(ranges.last_mut().unwrap(), c));
+ } else {
+ ranges.push((c, c));
+ }
+ in_range = false;
+ }
+ }
+ first = false;
+ }
+ if in_range {
+ // Means that the last character in the class was a '-', so add
+ // it as a literal.
+ ranges.push(('-', '-'));
+ }
+ self.push_token(Token::Class {
+ negated: negated,
+ ranges: ranges,
+ })
+ }
+
+ fn bump(&mut self) -> Option<char> {
+ self.prev = self.cur;
+ self.cur = self.chars.next();
+ self.cur
+ }
+}
+
+#[cfg(test)]
+fn starts_with(needle: &[u8], haystack: &[u8]) -> bool {
+ needle.len() <= haystack.len() && needle == &haystack[..needle.len()]
+}
+
+#[cfg(test)]
+fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
+ if needle.len() > haystack.len() {
+ return false;
+ }
+ needle == &haystack[haystack.len() - needle.len()..]
+}
+
+#[cfg(test)]
+mod tests {
+ use std::ffi::{OsStr, OsString};
+
+ use {SetBuilder, Error};
+ use super::{Pattern, PatternBuilder, Token};
+ use super::Token::*;
+
+ #[derive(Clone, Copy, Debug, Default)]
+ struct Options {
+ casei: bool,
+ litsep: bool,
+ }
+
+ macro_rules! syntax {
+ ($name:ident, $pat:expr, $tokens:expr) => {
+ #[test]
+ fn $name() {
+ let pat = Pattern::new($pat).unwrap();
+ assert_eq!($tokens, pat.tokens.0);
+ }
+ }
+ }
+
+ macro_rules! syntaxerr {
+ ($name:ident, $pat:expr, $err:expr) => {
+ #[test]
+ fn $name() {
+ let err = Pattern::new($pat).unwrap_err();
+ assert_eq!($err, err);
+ }
+ }
+ }
+
+ macro_rules! toregex {
+ ($name:ident, $pat:expr, $re:expr) => {
+ toregex!($name, $pat, $re, Options::default());
+ };
+ ($name:ident, $pat:expr, $re:expr, $options:expr) => {
+ #[test]
+ fn $name() {
+ let pat = PatternBuilder::new($pat)
+ .case_insensitive($options.casei)
+ .literal_separator($options.litsep)
+ .build()
+ .unwrap();
+ assert_eq!(format!("(?-u){}", $re), pat.regex());
+ }
+ };
+ }
+
+ macro_rules! matches {
+ ($name:ident, $pat:expr, $path:expr) => {
+ matches!($name, $pat, $path, Options::