diff options
author | Andrew Gallant <jamslam@gmail.com> | 2016-09-11 13:26:53 -0400 |
---|---|---|
committer | Andrew Gallant <jamslam@gmail.com> | 2016-09-11 13:26:53 -0400 |
commit | 2b943eda4754f3b191aaab0c4cdfc4238632f845 (patch) | |
tree | fe7f07b1707590646039215d4db2cc2655bd7161 /src | |
parent | 37544c092f068761c8da1866c2eab20fd4cac146 (diff) |
Make file type filtering a lot faster.
We do this by avoiding using a RegexSet (*sigh*). In particular, file
type matching has much simpler semantics than gitignore files, so we don't
actually need to care which file type matched. Therefore, we can get away
with a single regex with a giant alternation.
Diffstat (limited to 'src')
-rw-r--r-- | src/args.rs | 2 | ||||
-rw-r--r-- | src/types.rs | 175 |
2 files changed, 121 insertions, 56 deletions
diff --git a/src/args.rs b/src/args.rs index 63abfa24..b3d57ded 100644 --- a/src/args.rs +++ b/src/args.rs @@ -367,7 +367,7 @@ impl RawArgs { types.select(ty); } for ty in &self.flag_type_not { - types.select_not(ty); + types.negate(ty); } Ok(()) } diff --git a/src/types.rs b/src/types.rs index 67f643c7..f3315491 100644 --- a/src/types.rs +++ b/src/types.rs @@ -8,7 +8,10 @@ use std::error::Error as StdError; use std::fmt; use std::path::Path; -use gitignore::{self, Gitignore, GitignoreBuilder, Match, Pattern}; +use regex; + +use gitignore::{Match, Pattern}; +use glob::{self, MatchOptions}; const TYPE_EXTENSIONS: &'static [(&'static str, &'static [&'static str])] = &[ ("asm", &["*.asm", "*.s", "*.S"]), @@ -55,6 +58,7 @@ const TYPE_EXTENSIONS: &'static [(&'static str, &'static [&'static str])] = &[ ("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm"]), ("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]), ("py", &["*.py"]), + ("readme", &["README*", "*README"]), ("rr", &["*.R"]), ("rst", &["*.rst"]), ("ruby", &["*.rb"]), @@ -81,7 +85,9 @@ pub enum Error { /// A user specified file type definition could not be parsed. InvalidDefinition, /// There was an error building the matcher (probably a bad glob). - Gitignore(gitignore::Error), + Glob(glob::Error), + /// There was an error compiling a glob as a regex. + Regex(regex::Error), } impl StdError for Error { @@ -89,7 +95,8 @@ impl StdError for Error { match *self { Error::UnrecognizedFileType(_) => "unrecognized file type", Error::InvalidDefinition => "invalid definition", - Error::Gitignore(ref err) => err.description(), + Error::Glob(ref err) => err.description(), + Error::Regex(ref err) => err.description(), } } } @@ -104,14 +111,21 @@ impl fmt::Display for Error { write!(f, "invalid definition (format is type:glob, e.g., \ html:*.html)") } - Error::Gitignore(ref err) => err.fmt(f), + Error::Glob(ref err) => err.fmt(f), + Error::Regex(ref err) => err.fmt(f), } } } -impl From<gitignore::Error> for Error { - fn from(err: gitignore::Error) -> Error { - Error::Gitignore(err) +impl From<glob::Error> for Error { + fn from(err: glob::Error) -> Error { + Error::Glob(err) + } +} + +impl From<regex::Error> for Error { + fn from(err: regex::Error) -> Error { + Error::Regex(err) } } @@ -137,7 +151,8 @@ impl FileTypeDef { /// Types is a file type matcher. #[derive(Clone, Debug)] pub struct Types { - gi: Option<Gitignore>, + selected: Option<glob::Set>, + negated: Option<glob::Set>, has_selected: bool, unmatched_pat: Pattern, } @@ -149,14 +164,19 @@ impl Types { /// /// If has_selected is true, then at least one file type was selected. /// Therefore, any non-matches should be ignored. - fn new(gi: Option<Gitignore>, has_selected: bool) -> Types { + fn new( + selected: Option<glob::Set>, + negated: Option<glob::Set>, + has_selected: bool, + ) -> Types { Types { - gi: gi, + selected: selected, + negated: negated, has_selected: has_selected, unmatched_pat: Pattern { from: Path::new("<filetype>").to_path_buf(), - original: "<none>".to_string(), - pat: "<none>".to_string(), + original: "<N/A>".to_string(), + pat: "<N/A>".to_string(), whitelist: false, only_dir: false, }, @@ -165,7 +185,7 @@ impl Types { /// Creates a new file type matcher that never matches. pub fn empty() -> Types { - Types::new(None, false) + Types::new(None, None, false) } /// Returns a match for the given path against this file type matcher. @@ -175,22 +195,35 @@ impl Types { /// If at least one file type is selected and path doesn't match, then /// the path is also considered ignored. pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match { + // If we don't have any matcher, then we can't do anything. + if self.negated.is_none() && self.selected.is_none() { + return Match::None; + } // File types don't apply to directories. if is_dir { return Match::None; } let path = path.as_ref(); - self.gi.as_ref() - .map(|gi| { - let path = &*path.to_string_lossy(); - let mat = gi.matched_utf8(path, is_dir).invert(); - if self.has_selected && mat.is_none() { - Match::Ignored(&self.unmatched_pat) - } else { - mat - } - }) - .unwrap_or(Match::None) + let name = match path.file_name() { + Some(name) => name.to_string_lossy(), + None if self.has_selected => { + return Match::Ignored(&self.unmatched_pat); + } + None => { + return Match::None; + } + }; + if self.negated.as_ref().map(|s| s.is_match(&*name)).unwrap_or(false) { + return Match::Ignored(&self.unmatched_pat); + } + if self.selected.as_ref().map(|s| s.is_match(&*name)).unwrap_or(false) { + return Match::Whitelist(&self.unmatched_pat); + } + if self.has_selected { + Match::Ignored(&self.unmatched_pat) + } else { + Match::None + } } } @@ -198,8 +231,8 @@ impl Types { /// a set of file type selections. pub struct TypesBuilder { types: HashMap<String, Vec<String>>, - select: Vec<String>, - select_not: Vec<String>, + selected: Vec<String>, + negated: Vec<String>, } impl TypesBuilder { @@ -207,41 +240,57 @@ impl TypesBuilder { pub fn new() -> TypesBuilder { TypesBuilder { types: HashMap::new(), - select: vec![], - select_not: vec![], + selected: vec![], + negated: vec![], } } /// Build the current set of file type definitions *and* selections into /// a file type matcher. pub fn build(&self) -> Result<Types, Error> { - if self.select.is_empty() && self.select_not.is_empty() { - return Ok(Types::new(None, false)); - } - let mut bgi = GitignoreBuilder::new("/"); - for name in &self.select { - let globs = match self.types.get(name) { - Some(globs) => globs, - None => { - return Err(Error::UnrecognizedFileType(name.to_string())); + let opts = MatchOptions { + require_literal_separator: true, ..MatchOptions::default() + }; + let selected_globs = + if self.selected.is_empty() { + None + } else { + let mut bset = glob::SetBuilder::new(); + for name in &self.selected { + let globs = match self.types.get(name) { + Some(globs) => globs, + None => { + let msg = name.to_string(); + return Err(Error::UnrecognizedFileType(msg)); + } + }; + for glob in globs { + try!(bset.add_with(glob, &opts)); + } } + Some(try!(bset.build())) }; - for glob in globs { - try!(bgi.add("<filetype>", glob)); - } - } - for name in &self.select_not { - let globs = match self.types.get(name) { - Some(globs) => globs, - None => { - return Err(Error::UnrecognizedFileType(name.to_string())); + let negated_globs = + if self.negated.is_empty() { + None + } else { + let mut bset = glob::SetBuilder::new(); + for name in &self.negated { + let globs = match self.types.get(name) { + Some(globs) => globs, + None => { + let msg = name.to_string(); + return Err(Error::UnrecognizedFileType(msg)); + } + }; + for glob in globs { + try!(bset.add_with(glob, &opts)); + } } + Some(try!(bset.build())) }; - for glob in globs { - try!(bgi.add("<filetype>", &format!("!{}", glob))); - } - } - Ok(Types::new(Some(try!(bgi.build())), !self.select.is_empty())) + Ok(Types::new( + selected_globs, negated_globs, !self.selected.is_empty())) } /// Return the set of current file type definitions. @@ -260,14 +309,30 @@ impl TypesBuilder { } /// Select the file type given by `name`. + /// + /// If `name` is `all`, then all file types are selected. pub fn select(&mut self, name: &str) -> &mut TypesBuilder { - self.select.push(name.to_string()); + if name == "all" { + for name in self.types.keys() { + self.selected.push(name.to_string()); + } + } else { + self.selected.push(name.to_string()); + } self } /// Ignore the file type given by `name`. - pub fn select_not(&mut self, name: &str) -> &mut TypesBuilder { - self.select_not.push(name.to_string()); + /// + /// If `name` is `all`, then all file types are negated. + pub fn negate(&mut self, name: &str) -> &mut TypesBuilder { + if name == "all" { + for name in self.types.keys() { + self.negated.push(name.to_string()); + } + } else { + self.negated.push(name.to_string()); + } self } @@ -333,7 +398,7 @@ mod tests { btypes.select(sel); } for selnot in $selnot { - btypes.select_not(selnot); + btypes.negate(selnot); } let types = btypes.build().unwrap(); let mat = types.matched($path, false); |