/*!
The glob module provides standard shell globbing, but is specifically
implemented by converting glob syntax to regular expressions. The reasoning is
two fold:
1. The regex library is *really* fast. Regaining performance in a distinct
implementation of globbing is non-trivial.
2. Most crucially, a `RegexSet` can be used to match many globs simultaneously.
This module is written with some amount of intention of eventually splitting it
out into its own separate crate, but I didn't quite have the energy for all
that rigamorole when I wrote this. In particular, it could be fast/good enough
to make its way into `glob` proper.
*/
// TODO(burntsushi): I'm pretty dismayed by the performance of regex sets
// here. For example, we do a first pass single-regex-of-all-globs filter
// before actually running the regex set. This turns out to be faster,
// especially in fresh checkouts of repos that don't have a lot of ignored
// files. It's not clear how hard it is to make the regex set faster.
//
// An alternative avenue is to stop doing "regex all the things." (Which, to
// be fair, is pretty fast---I just expected it to be faster.) We could do
// something clever using assumptions along the lines of "oh, most ignore
// patterns are either literals or are for ignoring file extensions." (Look
// at the .gitignore for the chromium repo---just about every pattern satisfies
// that assumption.)
use std::borrow::Cow;
use std::collections::HashMap;
use std::error::Error as StdError;
use std::ffi::{OsStr, OsString};
use std::fmt;
use std::hash;
use std::iter;
use std::path::Path;
use std::str;
use fnv;
use regex;
use regex::bytes::Regex;
use regex::bytes::RegexSet;
use pathutil::file_name;
/// Represents an error that can occur when parsing a glob pattern.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Error {
InvalidRecursive,
UnclosedClass,
InvalidRange(char, char),
}
impl StdError for Error {
fn description(&self) -> &str {
match *self {
Error::InvalidRecursive => {
"invalid use of **; must be one path component"
}
Error::UnclosedClass => {
"unclosed character class; missing ']'"
}
Error::InvalidRange(_, _) => {
"invalid character range"
}
}
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::InvalidRecursive | Error::UnclosedClass => {
write!(f, "{}", self.description())
}
Error::InvalidRange(s, e) => {
write!(f, "invalid range; '{}' > '{}'", s, e)
}
}
}
}
/// SetYesNo represents a group of globs that can be matched together in a
/// single pass. SetYesNo can only determine whether a particular path matched
/// any pattern in the set.
#[derive(Clone, Debug)]
pub struct SetYesNo {
re: Regex,
}
impl SetYesNo {
/// Returns true if and only if the given path matches at least one glob
/// in this set.
pub fn is_match<T: AsRef<Path>>(&self, path: T) -> bool {
self.re.is_match(&*path_bytes(path.as_ref()))
}
fn new(
pats: &[(Pattern, MatchOptions)],
) -> Result<SetYesNo, regex::Error> {
let mut joined = String::new();
for &(ref p, ref o) in pats {
let part = format!("(?:{})", p.to_regex_with(o));
if !joined.is_empty() {
joined.push('|');
}
joined.push_str(&part);
}
Ok(SetYesNo { re: try!(Regex::new(&joined)) })
}
}
type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
/// Set represents a group of globs that can be matched together in a single
/// pass.
#[derive(Clone, Debug)]
pub struct Set {
yesno: SetYesNo,
exts: HashMap<OsString, Vec<usize>, Fnv>,
literals: HashMap<Vec<u8>, Vec<usize>, Fnv>,
base_literals: HashMap<Vec<u8>, Vec<usize>, Fnv>,
base_prefixes: Vec<Vec<u8>>,
base_prefixes_map: Vec<usize>,
base_suffixes: Vec<Vec<u8>>,
base_suffixes_map: Vec<usize>,
regexes: RegexSet,
regexes_map: Vec<usize>,
}
impl Set {
/// Returns the sequence number of every glob pattern that matches the
/// given path.
#[allow(dead_code)]
pub fn matches<T: AsRef<Path>>(&self, path: T) -> Vec<usize> {
let mut into = vec![];
self.matches_into(path, &mut into);
into
}
/// Adds the sequence number of every glob pattern that matches the given
/// path to the vec given.
pub fn matches_into<T: AsRef<Path>>(
&self,
path: T,
into: