summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2016-08-27 01:01:06 -0400
committerAndrew Gallant <jamslam@gmail.com>2016-08-27 01:01:06 -0400
commit065c44998092906746f6585090bafbf36d98342b (patch)
treeaff9d512fd19767309fd0bad6b18a674ec84b00c
parentb55ecf34c713392b012dd652fbbd11d7e0126d97 (diff)
File path filtering works and is pretty fast.
I'm pretty disappointed by the performance of regex sets. They are apparently spending a lot of their time in construction of the DFA, which probably means that the DFA is just too big. It turns out that it's actually faster to build an *additional* normal regex with the alternation of every glob and use it as a first-pass filter over every file path. If there's a match, only then do we try the more expensive RegexSet.
-rw-r--r--Cargo.toml3
-rw-r--r--src/gitignore.rs368
-rw-r--r--src/glob.rs38
-rw-r--r--src/ignore.rs200
-rw-r--r--src/main.rs90
5 files changed, 673 insertions, 26 deletions
diff --git a/Cargo.toml b/Cargo.toml
index d4d59f63..15b268c9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -29,6 +29,9 @@ regex-syntax = { version = "0.3.1", path = "/home/andrew/rust/regex/regex-syntax
rustc-serialize = "0.3"
walkdir = "0.1"
+[features]
+simd-accel = ["regex/simd-accel"]
+
[dev-dependencies]
glob = "0.2"
lazy_static = "0.2"
diff --git a/src/gitignore.rs b/src/gitignore.rs
new file mode 100644
index 00000000..f39cfc48
--- /dev/null
+++ b/src/gitignore.rs
@@ -0,0 +1,368 @@
+/*!
+The gitignore module provides a way of reading a gitignore file and applying
+it to a particular file name to determine whether it should be ignore or not.
+The motivation for this submodule is performance and portability:
+
+1. There is a gitignore crate on crates.io, but it uses the standard `glob`
+ crate and checks patterns one-by-one. This is a reasonable implementation,
+ but not suitable for the performance we need here.
+2. We could shell out to a `git` sub-command like ls-files or status, but it
+ seems better to not rely on the existence of external programs for a search
+ tool. Besides, we need to implement this logic anyway to support things like
+ an .xrepignore file.
+
+The key implementation detail here is that a single gitignore file is compiled
+into a single RegexSet, which can be used to report which globs match a
+particular file name. We can then do a quick post-processing step to implement
+additional rules such as whitelists (prefix of `!`) or directory-only globs
+(suffix of `/`).
+*/
+
+// TODO(burntsushi): Implement something similar, but for Mercurial. We can't
+// use this exact implementation because hgignore files are different.
+
+use std::env;
+use std::error::Error as StdError;
+use std::fmt;
+use std::fs::File;
+use std::io::{self, BufRead};
+use std::path::{Path, PathBuf};
+
+use regex;
+
+use glob;
+
+/// Represents an error that can occur when parsing a gitignore file.
+#[derive(Debug)]
+pub enum Error {
+ Glob(glob::Error),
+ Regex(regex::Error),
+ Io(io::Error),
+}
+
+impl StdError for Error {
+ fn description(&self) -> &str {
+ match *self {
+ Error::Glob(ref err) => err.description(),
+ Error::Regex(ref err) => err.description(),
+ Error::Io(ref err) => err.description(),
+ }
+ }
+}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match *self {
+ Error::Glob(ref err) => err.fmt(f),
+ Error::Regex(ref err) => err.fmt(f),
+ Error::Io(ref err) => err.fmt(f),
+ }
+ }
+}
+
+impl From<glob::Error> for Error {
+ fn from(err: glob::Error) -> Error {
+ Error::Glob(err)
+ }
+}
+
+impl From<regex::Error> for Error {
+ fn from(err: regex::Error) -> Error {
+ Error::Regex(err)
+ }
+}
+
+impl From<io::Error> for Error {
+ fn from(err: io::Error) -> Error {
+ Error::Io(err)
+ }
+}
+
+/// Gitignore is a matcher for the glob patterns in a single gitignore file.
+pub struct Gitignore {
+ set: glob::Set,
+ root: PathBuf,
+ patterns: Vec<Pattern>,
+}
+
+impl Gitignore {
+ /// Create a new gitignore glob matcher from the gitignore file at the
+ /// given path. The root of the gitignore file is the basename of path.
+ pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Gitignore, Error> {
+ let root = match path.as_ref().parent() {
+ Some(parent) => parent.to_path_buf(),
+ None => env::current_dir().unwrap_or(Path::new("/").to_path_buf()),
+ };
+ let mut builder = GitignoreBuilder::new(root);
+ try!(builder.add_path(path));
+ builder.build()
+ }
+
+ /// Create a new gitignore glob matcher from the given root directory and
+ /// string containing the contents of a gitignore file.
+ pub fn from_str<P: AsRef<Path>>(
+ root: P,
+ gitignore: &str,
+ ) -> Result<Gitignore, Error> {
+ let mut builder = GitignoreBuilder::new(root);
+ try!(builder.add_str(gitignore));
+ builder.build()
+ }
+
+ /// Returns true if and only if the given file path should be ignored
+ /// according to the globs in this gitignore. `is_dir` should be true if
+ /// the path refers to a directory and false otherwise.
+ ///
+ /// Before matching path, its prefix (as determined by a common suffix
+ /// of the directory containing this gitignore) is stripped. If there is
+ /// no common suffix/prefix overlap, then path is assumed to reside in the
+ /// same directory as this gitignore file.
+ ///
+ /// If the given path has a `./` prefix then it is stripped before
+ /// matching.
+ pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
+ let mut path = path.as_ref();
+ if let Ok(p) = path.strip_prefix("./") {
+ path = p;
+ }
+ if let Ok(p) = path.strip_prefix(&self.root) {
+ path = p;
+ }
+ self.matched_utf8(&*path.to_string_lossy(), is_dir)
+ }
+
+ /// Like matched, but takes a path that has already been stripped and
+ /// converted to UTF-8.
+ pub fn matched_utf8(&self, path: &str, is_dir: bool) -> Match {
+ // A single regex with a bunch of alternations of glob patterns is
+ // unfortunately typically faster than a regex, so we use it as a
+ // first pass filter.
+ if !self.set.is_match(path) {
+ return Match::None;
+ }
+ let pat = match self.set.matches(path).iter().last() {
+ None => return Match::None,
+ Some(i) => &self.patterns[i],
+ };
+ if pat.whitelist {
+ Match::Whitelist
+ } else if !pat.only_dir || is_dir {
+ Match::Ignored
+ } else {
+ Match::None
+ }
+ }
+}
+
+/// The result of a glob match.
+#[derive(Clone, Debug)]
+pub enum Match {
+ /// The path didn't match any glob in the gitignore file.
+ None,
+ /// The last glob matched indicates the path should be ignored.
+ Ignored,
+ /// The last glob matched indicates the path should be whitelisted.
+ Whitelist,
+}
+
+impl Match {
+ /// Returns true if the match result implies the path should be ignored.
+ pub fn is_ignored(&self) -> bool {
+ match *self {
+ Match::Ignored => true,
+ Match::None | Match::Whitelist => false,
+ }
+ }
+}
+
+/// GitignoreBuilder constructs a matcher for a single set of globs from a
+/// .gitignore file.
+pub struct GitignoreBuilder {
+ builder: glob::SetBuilder,
+ root: PathBuf,
+ patterns: Vec<Pattern>,
+}
+
+/// Pattern represents a single pattern in a gitignore file. It doesn't
+/// know how to do glob matching directly, but it does store additional
+/// options on a pattern, such as whether it's whitelisted.
+#[derive(Clone, Debug, Default)]
+struct Pattern {
+ pat: String,
+ whitelist: bool, // prefix of '!'
+ only_dir: bool, // suffix of '/'
+}
+
+impl GitignoreBuilder {
+ /// Create a new builder for a gitignore file.
+ ///
+ /// The path given should be the path at which the globs for this gitignore
+ /// file should be matched.
+ pub fn new<P: AsRef<Path>>(root: P) -> GitignoreBuilder {
+ GitignoreBuilder {
+ builder: glob::SetBuilder::new(),
+ root: root.as_ref().to_path_buf(),
+ patterns: vec![],
+ }
+ }
+
+ /// Builds a new matcher from the glob patterns added so far.
+ ///
+ /// Once a matcher is built, no new glob patterns can be added to it.
+ pub fn build(self) -> Result<Gitignore, Error> {
+ Ok(Gitignore {
+ set: try!(self.builder.build()),
+ root: self.root,
+ patterns: self.patterns,
+ })
+ }
+
+ /// Add each pattern line from the file path given.
+ pub fn add_path<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
+ let rdr = io::BufReader::new(try!(File::open(&path)));
+ // println!("adding ignores from: {}", path.as_ref().display());
+ for line in rdr.lines() {
+ try!(self.add(&try!(line)));
+ }
+ Ok(())
+ }
+
+ /// Add each pattern line from the string given.
+ pub fn add_str(&mut self, gitignore: &str) -> Result<(), Error> {
+ for line in gitignore.lines() {
+ try!(self.add(line));
+ }
+ Ok(())
+ }
+
+ /// Add a line from a gitignore file to this builder.
+ ///
+ /// If the line could not be parsed as a glob, then an error is returned.
+ pub fn add(&mut self, mut line: &str) -> Result<(), Error> {
+ if line.is_empty() {
+ return Ok(());
+ }
+ let mut pat = Pattern::default();
+ let mut opts = glob::MatchOptions::default();
+ let has_slash = line.chars().any(|c| c == '/');
+ // If the line starts with an escaped '!', then remove the escape.
+ // Otherwise, if it starts with an unescaped '!', then this is a
+ // whitelist pattern.
+ match line.chars().nth(0) {
+ Some('#') => return Ok(()),
+ Some('\\') => {
+ match line.chars().nth(1) {
+ Some('!') | Some('#') => {
+ line = &line[1..];
+ }
+ _ => {}
+ }
+ }
+ Some('!') => {
+ pat.whitelist = true;
+ line = &line[1..];
+ }
+ Some('/') => {
+ // `man gitignore` says that if a glob starts with a slash,
+ // then the glob can only match the beginning of a path
+ // (relative to the location of gitignore). We achieve this by
+ // simply banning wildcards from matching /.
+ opts.require_literal_separator = true;
+ line = &line[1..];
+ }
+ _ => {}
+ }
+ // If it ends with a slash, then this should only match directories,
+ // but the slash should otherwise not be used while globbing.
+ if let Some((i, c)) = line.char_indices().rev().nth(0) {
+ if c == '/' {
+ pat.only_dir = true;
+ line = &line[..i];
+ }
+ }
+ // If there is a literal slash, then we note that so that globbing
+ // doesn't let wildcards match slashes. Otherwise, we need to let
+ // the pattern match anywhere, so we add a `**/` prefix to achieve
+ // that behavior.
+ pat.pat = line.to_string();
+ if has_slash {
+ opts.require_literal_separator = true;
+ } else {
+ pat.pat = format!("**/{}", pat.pat);
+ }
+ try!(self.builder.add_with(&pat.pat, &opts));
+ self.patterns.push(pat);
+ Ok(())
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::Gitignore;
+
+ macro_rules! ignored {
+ ($name:ident, $root:expr, $gi:expr, $path:expr) => {
+ ignored!($name, $root, $gi, $path, false);
+ };
+ ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => {
+ #[test]
+ fn $name() {
+ let gi = Gitignore::from_str($root, $gi).unwrap();
+ assert!(gi.matched($path, $is_dir).is_ignored());
+ }
+ };
+ }
+
+ macro_rules! not_ignored {
+ ($name:ident, $root:expr, $gi:expr, $path:expr) => {
+ not_ignored!($name, $root, $gi, $path, false);
+ };
+ ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => {
+ #[test]
+ fn $name() {
+ let gi = Gitignore::from_str($root, $gi).unwrap();
+ assert!(!gi.matched($path, $is_dir).is_ignored());
+ }
+ };
+ }
+
+ const ROOT: &'static str = "/home/foobar/rust/xrep";
+
+ ignored!(ig1, ROOT, "months", "months");
+ ignored!(ig2, ROOT, "*.lock", "Cargo.lock");
+ ignored!(ig3, ROOT, "*.rs", "src/main.rs");
+ ignored!(ig4, ROOT, "src/*.rs", "src/main.rs");
+ ignored!(ig5, ROOT, "/*.c", "cat-file.c");
+ ignored!(ig6, ROOT, "/src/*.rs", "src/main.rs");
+ ignored!(ig7, ROOT, "!src/main.rs\n*.rs", "src/main.rs");
+ ignored!(ig8, ROOT, "foo/", "foo", true);
+ ignored!(ig9, ROOT, "**/foo", "foo");
+ ignored!(ig10, ROOT, "**/foo", "src/foo");
+ ignored!(ig11, ROOT, "**/foo/**", "src/foo/bar");
+ ignored!(ig12, ROOT, "**/foo/**", "wat/src/foo/bar/baz");
+ ignored!(ig13, ROOT, "**/foo/bar", "foo/bar");
+ ignored!(ig14, ROOT, "**/foo/bar", "src/foo/bar");
+ ignored!(ig15, ROOT, "abc/**", "abc/x");
+ ignored!(ig16, ROOT, "abc/**", "abc/x/y");
+ ignored!(ig17, ROOT, "abc/**", "abc/x/y/z");
+ ignored!(ig18, ROOT, "a/**/b", "a/b");
+ ignored!(ig19, ROOT, "a/**/b", "a/x/b");
+ ignored!(ig20, ROOT, "a/**/b", "a/x/y/b");
+ ignored!(ig21, ROOT, r"\!xy", "!xy");
+ ignored!(ig22, ROOT, r"\#foo", "#foo");
+ ignored!(ig23, ROOT, "foo", "./foo");
+ ignored!(ig24, ROOT, "target", "grep/target");
+
+ not_ignored!(ignot1, ROOT, "amonths", "months");
+ not_ignored!(ignot2, ROOT, "monthsa", "months");
+ not_ignored!(ignot3, ROOT, "src/*.rs", "src/grep/src/main.rs");
+ not_ignored!(ignot4, ROOT, "/*.c", "mozilla-sha1/sha1.c");
+ not_ignored!(ignot5, ROOT, "/src/*.rs", "src/grep/src/main.rs");
+ not_ignored!(ignot6, ROOT, "*.rs\n!src/main.rs", "src/main.rs");
+ not_ignored!(ignot7, ROOT, "foo/", "foo", false);
+ not_ignored!(ignot8, ROOT, "**/foo/**", "wat/src/afoo/bar/baz");
+ not_ignored!(ignot9, ROOT, "**/foo/**", "wat/src/fooa/bar/baz");
+ not_ignored!(ignot10, ROOT, "**/foo/bar", "foo/src/bar");
+ not_ignored!(ignot11, ROOT, "#foo", "#foo");
+ not_ignored!(ignot12, ROOT, "\n\n\n", "foo");
+}
diff --git a/src/glob.rs b/src/glob.rs
index 6ae57a34..c1d28809 100644
--- a/src/glob.rs
+++ b/src/glob.rs
@@ -20,7 +20,7 @@ use std::path;
use std::str;
use regex;
-use regex::bytes::{RegexSet, SetMatches};
+use regex::bytes::{Regex, RegexSet, SetMatches};
/// Represents an error that can occur when parsing a glob pattern.
#[derive(Clone, Debug, Eq, PartialEq)]
@@ -63,7 +63,8 @@ impl fmt::Display for Error {
/// pass.
#[derive(Clone, Debug)]
pub struct Set {
- re: RegexSet,
+ re: Regex,
+ set: RegexSet,
}
impl Set {
@@ -76,21 +77,12 @@ impl Set {
/// Returns every glob pattern (by sequence number) that matches the given
/// path.
pub fn matches<T: AsRef<[u8]>>(&self, path: T) -> SetMatches {
- self.re.matches(path.as_ref())
- }
-
- /// Populates the given slice with corresponding patterns that matched.
- pub fn matches_with<T: AsRef<[u8]>>(
- &self,
- path: T,
- matches: &mut [bool],
- ) -> bool {
- self.re.matches_with(path.as_ref(), matches)
+ self.set.matches(path.as_ref())
}
/// Returns the number of glob patterns in this set.
pub fn len(&self) -> usize {
- self.re.len()
+ self.set.len()
}
}
@@ -113,8 +105,18 @@ impl SetBuilder {
/// Once a matcher is built, no new patterns can be added to it.
pub fn build(&self) -> Result<Set, regex::Error> {
let it = self.pats.iter().map(|&(ref p, ref o)| p.to_regex_with(o));
- let re = try!(RegexSet::new(it));
- Ok(Set { re: re })
+ let set = try!(RegexSet::new(it));
+
+ let mut joined = String::new();
+ for &(ref p, ref o) in &self.pats {
+ let part = format!("(?:{})", p.to_regex_with(o));
+ if !joined.is_empty() {
+ joined.push('|');
+ }
+ joined.push_str(&part);
+ }
+ let re = try!(Regex::new(&joined));
+ Ok(Set { re: re, set: set })
}
/// Add a new pattern to this set.
@@ -151,10 +153,10 @@ pub struct Pattern {
#[derive(Clone, Debug, Default)]
pub struct MatchOptions {
/// When true, matching is done case insensitively.
- case_insensitive: bool,
+ pub case_insensitive: bool,
/// When true, neither `*` nor `?` match the current system's path
/// separator.
- require_literal_separator: bool,
+ pub require_literal_separator: bool,
}
#[derive(Clone, Debug, Eq, PartialEq)]
@@ -199,6 +201,7 @@ impl Pattern {
pub fn to_regex_with(&self, options: &MatchOptions) -> String {
let sep = path::MAIN_SEPARATOR.to_string();
let mut re = String::new();
+ re.push_str("(?-u)");
if options.case_insensitive {
re.push_str("(?i)");
}
@@ -457,7 +460,6 @@ mod tests {
fn $name() {
let pat = Pattern::new($pat).unwrap();
let re = Regex::new(&pat.to_regex_with(&$options)).unwrap();
- // println!("{:?}", re);
assert!(!re.is_match($path));
}
};
diff --git a/src/ignore.rs b/src/ignore.rs
new file mode 100644
index 00000000..08b4541d
--- /dev/null
+++ b/src/ignore.rs
@@ -0,0 +1,200 @@
+/*!
+The ignore module is responsible for managing the state required to determine
+whether a *single* file path should be searched or not.
+
+In general, there are two ways to ignore a particular file:
+
+1. Specify an ignore rule in some "global" configuration, such as a
+ $HOME/.xrepignore or on the command line.
+2. A specific ignore file (like .gitignore) found during directory traversal.
+
+The `IgnoreDir` type handles ignore patterns for any one particular directory
+(including "global" ignore patterns), while the `Ignore` type handles a stack
+of `IgnoreDir`s for use during directory traversal.
+*/
+
+use std::error::Error as StdError;
+use std::fmt;
+use std::path::{Path, PathBuf};
+
+use gitignore::{self, Gitignore, GitignoreBuilder, Match};
+
+/// Represents an error that can occur when parsing a gitignore file.
+#[derive(Debug)]
+pub enum Error {
+ Gitignore(gitignore::Error),
+}
+
+impl StdError for Error {
+ fn description(&self) -> &str {
+ match *self {
+ Error::Gitignore(ref err) => err.description(),
+ }
+ }
+}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match *self {
+ Error::Gitignore(ref err) => err.fmt(f),
+ }
+ }
+}
+
+impl From<gitignore::Error> for Error {
+ fn from(err: gitignore::Error) -> Error {
+ Error::Gitignore(err)
+ }
+}
+
+/// Ignore represents a collection of ignore patterns organized by directory.
+/// In particular, a stack is maintained, where the top of the stack
+/// corresponds to the current directory being searched and the bottom of the
+/// stack represents the root of a search. Ignore patterns at the top of the
+/// stack take precedence over ignore patterns at the bottom of the stack.
+pub struct Ignore {
+ /// A stack of ignore patterns at each directory level of traversal.
+ /// A directory that contributes no ignore patterns is `None`.
+ stack: Vec<Option<IgnoreDir>>,
+ // TODO(burntsushi): Add other patterns from the command line here.
+}
+
+impl Ignore {
+ /// Create an empty set of ignore patterns.
+ pub fn new() -> Ignore {
+ Ignore { stack: vec![] }
+ }
+
+ /// Add a directory to the stack.
+ pub fn push<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
+ self.stack.push(try!(IgnoreDir::new(path)));
+ Ok(())
+ }
+
+ /// Pop a directory from the stack.
+ ///
+ /// This panics if the stack is empty.
+ pub fn pop(&mut self) {
+ self.stack.pop().expect("non-empty stack");
+ }
+
+ /// Returns true if and only if the given file path should be ignored.
+ pub fn ignored<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> bool {
+ let path = path.as_ref();
+ for id in self.stack.iter().rev().filter_map(|id| id.as_ref()) {
+ match id.matched(path, is_dir) {
+ Match::Whitelist => return false,
+ Match::Ignored => return true,
+ Match::None => {}
+ }
+ }
+ false
+ }
+}
+
+/// IgnoreDir represents a set of ignore patterns retrieved from a single
+/// directory.
+pub struct IgnoreDir {
+ /// The path to this directory as given.
+ path: PathBuf,
+ /// A single accumulation of glob patterns for this directory, matched
+ /// using gitignore semantics.
+ ///
+ /// This will include patterns from xrepignore as well. The patterns are
+ /// ordered so that precedence applies automatically (e.g., xrepignore
+ /// patterns procede gitignore patterns).
+ gi: Option<Gitignore>,
+ // TODO(burntsushi): Matching other types of glob patterns that don't
+ // conform to gitignore will probably require refactoring this approach.
+}
+
+impl IgnoreDir {
+ /// Create a new matcher for the given directory.
+ ///
+ /// If no ignore glob patterns could be found in the directory then `None`
+ /// is returned.
+ pub fn new<P: AsRef<Path>>(path: P) -> Result<Option<IgnoreDir>, Error> {
+ let mut id = IgnoreDir {
+ path: path.as_ref().to_path_buf(),
+ gi: None,
+ };
+ let mut ok = false;
+ let mut builder = GitignoreBuilder::new(&id.path);
+ // The ordering here is important. Later globs have higher precedence.
+ ok = builder.add_path(id.path.join(".gitignore")).is_ok() || ok;
+ ok = builder.add_path(id.path.join(".agignore")).is_ok() || ok;
+ ok = builder.add_path(id.path.join(".xrepignore")).is_ok() || ok;
+ if !ok {
+ Ok(None)
+ } else {
+ id.gi = Some(try!(builder.build()));
+ Ok(Some(id))
+ }
+ }
+
+ /// Returns true if and only if the given file path should be ignored
+ /// according to the globs in this directory. `is_dir` should be true if
+ /// the path refers to a directory and false otherwise.
+ ///
+ /// Before matching path, its prefix (as determined by a common suffix
+ /// of this directory) is stripped. If there is
+ /// no common suffix/prefix overlap, then path is assumed to reside
+ /// directly in this directory.
+ ///
+ /// If the given path has a `./` prefix then it is stripped before
+ /// matching.
+ pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
+ self.gi.as_ref()
+ .map(|gi| gi.matched(path, is_dir))
+ .unwrap_or(Match::None)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use std::path::Path;
+ use gitignore::GitignoreBuilder;
+ use super::IgnoreDir;
+
+ macro_rules! ignored_dir {
+ ($name:ident, $root:expr, $gi:expr, $xi:expr, $path:expr) => {
+ #[test]
+ fn $name() {
+ let mut builder = GitignoreBuilder::new(&$root);
+ builder.add_str($gi).unwrap();
+ builder.add_str($xi).unwrap();
+ let gi = builder.build().unwrap();
+ let id = IgnoreDir {
+ path: Path::new($root).to_path_buf(),
+ gi: Some(gi),
+ };
+ assert!(id.matched($path, false).is_ignored());
+ }
+ };
+ }
+
+ macro_rules! not_ignored_dir {
+ ($name:ident, $root:expr, $gi:expr, $xi:expr, $path:expr) => {
+ #[test]
+ fn $name() {
+ let mut builder = GitignoreBuilder::new(&$root);
+ builder.add_str($gi).unwrap();
+ builder.add_str($xi).unwrap();
+ let gi = builder.build().unwrap();
+ let id = IgnoreDir {
+ path: Path::new($root).to_path_buf(),
+ gi: Some(gi),
+ };
+ assert!(!id.matched($path, false).is_ignored());
+ }
+ };
+ }
+
+ const ROOT: &'static str = "/home/foobar/rust/xrep";
+
+ ignored_dir!(id1, ROOT, "src/main.rs", "", "src/main.rs");
+ ignored_dir!(id2, ROOT, "", "src/main.rs", "src/main.rs");
+ ignored_dir!(id3, ROOT, "!src/main.rs", "*.rs", "src/main.rs");
+
+ not_ignored_dir!(idnot1, ROOT, "*.rs", "!src/main.rs", "src/main.rs");
+}
diff --git a/src/main.rs b/src/main.rs
index 4d90cae7..da0d8efc 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -30,6 +30,8 @@ use docopt::Docopt;
use grep::Grep;
use walkdir::{WalkDir, WalkDirIterator};
+use ignore::Ignore;
+
macro_rules! errored {
($($tt:tt)*) => {
return Err(From::from(format!($($tt)*)));
@@ -43,7 +45,9 @@ macro_rules! eprintln {
}}
}
+mod gitignore;
mod glob;
+mod ignore;
pub type Result<T> = result::Result<T, Box<Error + Send + Sync>>;
@@ -72,24 +76,40 @@ impl Args {
if self.arg_path.is_empty() {
return errored!("Searching stdin is not currently supported.");
}
+ let mut stdout = io::BufWriter::new(io::stdout());
+ let mut ig = Ignore::new();
for p in &self.arg_path {
- let mut it = WalkDir::new(p).into_iter();
+ let mut it = WalkEventIter::from(WalkDir::new(p));
loop {
- let ent = match it.next() {
+ let ev = match it.next() {
None => break,
+ Some(Ok(ev)) => ev,
Some(Err(err)) => {
eprintln!("{}", err);
continue;
}
- Some(Ok(ent)) => ent,
};
- if is_hidden(&ent) {
- if ent.file_type().is_dir() {
- it.skip_current_dir();
+ match ev {
+ WalkEvent::Exit => {
+ ig.pop();
+ }
+ WalkEvent::Dir(ent) => {
+ try!(ig.push(ent.path()));
+ if is_hidden(&ent) || ig.ignored(ent.path(), true) {
+ // if is_hidden(&ent) {
+ it.it.skip_current_dir();
+ continue;
+ }
+ }
+ WalkEvent::File(ent) => {
+ if is_hidden(&ent) || ig.ignored(ent.path(), false) {
+ // if is_hidden(&ent) {
+ continue;
+ }
+ let _ = writeln!(
+ &mut stdout, "{}", ent.path().display());
}
- continue;
}
- println!("{}", ent.path().display());
}
}
Ok(0)
@@ -108,6 +128,60 @@ impl Args {
}
}
+/// WalkEventIter transforms a WalkDir iterator into an iterator that more
+/// accurately describes the directory tree. Namely, it emits events that are
+/// one of three types: directory, file or "exit." An "exit" event means that
+/// the entire contents of a directory have been enumerated.
+struct WalkEventIter {
+ depth: usize,
+ it: walkdir::Iter,
+ next: Option<result::Result<walkdir::DirEntry, walkdir::Error>>,
+}
+
+#[derive(Debug)]
+enum WalkEvent {
+ Dir(walkdir::DirEntry),
+ File(walkdir::DirEntry),
+ Exit,
+}
+
+impl From<walkdir::WalkDir> for WalkEventIter {
+ fn from(it: walkdir::WalkDir) -> WalkEventIter {
+ WalkEventIter { depth: 0, it: it.into_iter(), next: None }
+ }
+}
+
+impl Iterator for WalkEventIter {
+ type Item = io::Result<WalkEvent>;
+
+ fn next(&mut self) -> Option<io::Result<WalkEvent>> {
+ let dent = self.next.take().or_else(|| self.it.next());
+ let depth = match dent {
+ None => 0,
+ Some(Ok(ref dent)) => dent.depth(),
+ Some(Err(ref err)) => err.depth(),
+ };
+ if depth < self.depth {
+ self.depth -= 1;
+ self.next = dent;
+ return Some(Ok(WalkEvent::Exit));
+ }
+ self.depth = depth;
+ match dent {
+ None => None,
+ Some(Err(err)) => Some(Err(From::from(err))),
+ Some(Ok(dent)) => {
+ if dent.file_type().is_dir() {
+ self.depth += 1;
+ Some(Ok(WalkEvent::Dir(dent)))
+ } else {
+ Some(Ok(WalkEvent::File(dent)))
+ }
+ }
+ }
+ }
+}
+
fn is_hidden(ent: &walkdir::DirEntry) -> bool {
ent.depth() > 0 &&
ent.file_name().to_str().map(|s| s.starts_with(".")).unwrap_or(false)