summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2016-10-11 19:57:09 -0400
committerAndrew Gallant <jamslam@gmail.com>2016-10-29 20:48:59 -0400
commitd79add341ba4be10bb3459877318b9c5a30f5db3 (patch)
treea6c5222c63d53522635bc847c6ac2cf2e000ff7f /src
parent12b2b1f6242e0c9082e93111ffef24a93fea5f6e (diff)
Move all gitignore matching to separate crate.
This PR introduces a new sub-crate, `ignore`, which primarily provides a fast recursive directory iterator that respects ignore files like gitignore and other configurable filtering rules based on globs or even file types. This results in a substantial source of complexity moved out of ripgrep's core and into a reusable component that others can now (hopefully) benefit from. While much of the ignore code carried over from ripgrep's core, a substantial portion of it was rewritten with the following goals in mind: 1. Reuse matchers built from gitignore files across directory iteration. 2. Design the matcher data structure to be amenable for parallelizing directory iteration. (Indeed, writing the parallel iterator is the next step.) Fixes #9, #44, #45
Diffstat (limited to 'src')
-rw-r--r--src/args.rs105
-rw-r--r--src/gitignore.rs455
-rw-r--r--src/ignore.rs493
-rw-r--r--src/main.rs90
-rw-r--r--src/pathutil.rs78
-rw-r--r--src/printer.rs6
-rw-r--r--src/terminal.rs0
-rw-r--r--src/types.rs458
-rw-r--r--src/walk.rs140
9 files changed, 103 insertions, 1722 deletions
diff --git a/src/args.rs b/src/args.rs
index 012d9150..9d2923b8 100644
--- a/src/args.rs
+++ b/src/args.rs
@@ -14,19 +14,17 @@ use term::Terminal;
use term;
#[cfg(windows)]
use term::WinConsole;
-use walkdir::WalkDir;
use atty;
-use gitignore::{Gitignore, GitignoreBuilder};
-use ignore::Ignore;
+use ignore::overrides::{Override, OverrideBuilder};
+use ignore::types::{FileTypeDef, Types, TypesBuilder};
+use ignore;
use out::{Out, ColoredTerminal};
use printer::Printer;
use search_buffer::BufferSearcher;
use search_stream::{InputBuffer, Searcher};
#[cfg(windows)]
use terminal_win::WindowsBuffer;
-use types::{FileTypeDef, Types, TypesBuilder};
-use walk;
use Result;
@@ -131,6 +129,13 @@ Less common options:
Search hidden directories and files. (Hidden directories and files are
skipped by default.)
+ --ignore-file FILE ...
+ Specify additional ignore files for filtering file paths. Ignore files
+ should be in the gitignore format and are matched relative to the
+ current working directory. These ignore files have lower precedence
+ than all other ignore file types. When specifying multiple ignore
+ files, earlier files have lower precedence than later files.
+
-L, --follow
Follow symlinks.
@@ -234,6 +239,7 @@ pub struct RawArgs {
flag_heading: bool,
flag_hidden: bool,
flag_ignore_case: bool,
+ flag_ignore_file: Vec<String>,
flag_invert_match: bool,
flag_line_number: bool,
flag_fixed_strings: bool,
@@ -279,11 +285,12 @@ pub struct Args {
eol: u8,
files: bool,
follow: bool,
- glob_overrides: Option<Gitignore>,
+ glob_overrides: Override,
grep: Grep,
heading: bool,
hidden: bool,
ignore_case: bool,
+ ignore_files: Vec<PathBuf>,
invert_match: bool,
line_number: bool,
line_per_match: bool,
@@ -347,14 +354,13 @@ impl RawArgs {
}
let glob_overrides =
if self.flag_glob.is_empty() {
- None
+ Override::empty()
} else {
- let cwd = try!(env::current_dir());
- let mut bgi = GitignoreBuilder::new(cwd);
+ let mut ovr = OverrideBuilder::new(try!(env::current_dir()));
for pat in &self.flag_glob {
- try!(bgi.add("<argv>", pat));
+ try!(ovr.add(pat));
}
- Some(try!(bgi.build()))
+ try!(ovr.build())
};
let threads =
if self.flag_threads == 0 {
@@ -382,6 +388,9 @@ impl RawArgs {
let no_ignore = self.flag_no_ignore || self.flag_unrestricted >= 1;
let hidden = self.flag_hidden || self.flag_unrestricted >= 2;
let text = self.flag_text || self.flag_unrestricted >= 3;
+ let ignore_files: Vec<_> = self.flag_ignore_file.iter().map(|p| {
+ Path::new(p).to_path_buf()
+ }).collect();
let mut args = Args {
paths: paths,
after_context: after_context,
@@ -399,6 +408,7 @@ impl RawArgs {
heading: !self.flag_no_heading && self.flag_heading,
hidden: hidden,
ignore_case: self.flag_ignore_case,
+ ignore_files: ignore_files,
invert_match: self.flag_invert_match,
line_number: !self.flag_no_line_number && self.flag_line_number,
line_per_match: self.flag_vimgrep,
@@ -711,31 +721,30 @@ impl Args {
self.type_list
}
- /// Create a new recursive directory iterator at the path given.
- pub fn walker(&self, path: &Path) -> Result<walk::Iter> {
- // Always follow symlinks for explicitly specified files.
- let mut wd = WalkDir::new(path).follow_links(
- self.follow || path.is_file());
- if let Some(maxdepth) = self.maxdepth {
- wd = wd.max_depth(maxdepth);
+ /// Create a new recursive directory iterator over the paths in argv.
+ pub fn walker(&self) -> Walk {
+ let paths = self.paths();
+ let mut wd = ignore::WalkBuilder::new(&paths[0]);
+ for path in &paths[1..] {
+ wd.add(path);
}
- let mut ig = Ignore::new();
- // Only register ignore rules if this is a directory. If it's a file,
- // then it was explicitly given by the end user, so we always search
- // it.
- if path.is_dir() {
- ig.ignore_hidden(!self.hidden);
- ig.no_ignore(self.no_ignore);
- ig.no_ignore_vcs(self.no_ignore_vcs);
- ig.add_types(self.types.clone());
- if !self.no_ignore_parent {
- try!(ig.push_parents(path));
- }
- if let Some(ref overrides) = self.glob_overrides {
- ig.add_override(overrides.clone());
+ for path in &self.ignore_files {
+ if let Some(err) = wd.add_ignore(path) {
+ eprintln!("{}", err);
}
}
- Ok(walk::Iter::new(ig, wd))
+
+ wd.follow_links(self.follow);
+ wd.hidden(!self.hidden);
+ wd.max_depth(self.maxdepth);
+ wd.overrides(self.glob_overrides.clone());
+ wd.types(self.types.clone());
+ wd.git_global(!self.no_ignore && !self.no_ignore_vcs);
+ wd.git_ignore(!self.no_ignore && !self.no_ignore_vcs);
+ wd.git_exclude(!self.no_ignore && !self.no_ignore_vcs);
+ wd.ignore(!self.no_ignore);
+ wd.parents(!self.no_ignore_parent);
+ Walk(wd.build())
}
}
@@ -752,6 +761,34 @@ fn version() -> String {
}
}
+/// A simple wrapper around the ignore::Walk iterator. This will
+/// automatically emit error messages to stderr and will skip directories.
+pub struct Walk(ignore::Walk);
+
+impl Iterator for Walk {
+ type Item = ignore::DirEntry;
+
+ fn next(&mut self) -> Option<ignore::DirEntry> {
+ while let Some(result) = self.0.next() {
+ match result {
+ Ok(dent) => {
+ if let Some(err) = dent.error() {
+ eprintln!("{}", err);
+ }
+ if dent.file_type().map_or(false, |x| x.is_dir()) {
+ continue;
+ }
+ return Some(dent);
+ }
+ Err(err) => {
+ eprintln!("{}", err);
+ }
+ }
+ }
+ None
+ }
+}
+
/// A single state in the state machine used by `unescape`.
#[derive(Clone, Copy, Eq, PartialEq)]
enum State {
@@ -761,7 +798,7 @@ enum State {
Literal,
}
-/// Unescapes a string given on the command line. It supports a limit set of
+/// Unescapes a string given on the command line. It supports a limited set of
/// escape sequences:
///
/// * \t, \r and \n are mapped to their corresponding ASCII bytes.
diff --git a/src/gitignore.rs b/src/gitignore.rs
deleted file mode 100644
index 9daeb3cb..00000000
--- a/src/gitignore.rs
+++ /dev/null
@@ -1,455 +0,0 @@
-/*!
-The gitignore module provides a way of reading a gitignore file and applying
-it to a particular file name to determine whether it should be ignore or not.
-The motivation for this submodule is performance and portability:
-
-1. There is a gitignore crate on crates.io, but it uses the standard `glob`
- crate and checks patterns one-by-one. This is a reasonable implementation,
- but not suitable for the performance we need here.
-2. We could shell out to a `git` sub-command like ls-files or status, but it
- seems better to not rely on the existence of external programs for a search
- tool. Besides, we need to implement this logic anyway to support things like
- an .ignore file.
-
-The key implementation detail here is that a single gitignore file is compiled
-into a single RegexSet, which can be used to report which globs match a
-particular file name. We can then do a quick post-processing step to implement
-additional rules such as whitelists (prefix of `!`) or directory-only globs
-(suffix of `/`).
-*/
-
-// TODO(burntsushi): Implement something similar, but for Mercurial. We can't
-// use this exact implementation because hgignore files are different.
-
-use std::cell::RefCell;
-use std::error::Error as StdError;
-use std::fmt;
-use std::fs::File;
-use std::io::{self, BufRead};
-use std::path::{Path, PathBuf};
-use std::sync::Arc;
-
-use globset::{self, Candidate, GlobBuilder, GlobSet, GlobSetBuilder};
-use regex;
-use thread_local::ThreadLocal;
-
-use pathutil::{is_file_name, strip_prefix};
-
-/// Represents an error that can occur when parsing a gitignore file.
-#[derive(Debug)]
-pub enum Error {
- Glob(globset::Error),
- Regex(regex::Error),
- Io(io::Error),
-}
-
-impl StdError for Error {
- fn description(&self) -> &str {
- match *self {
- Error::Glob(ref err) => err.description(),
- Error::Regex(ref err) => err.description(),
- Error::Io(ref err) => err.description(),
- }
- }
-}
-
-impl fmt::Display for Error {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- match *self {
- Error::Glob(ref err) => err.fmt(f),
- Error::Regex(ref err) => err.fmt(f),
- Error::Io(ref err) => err.fmt(f),
- }
- }
-}
-
-impl From<globset::Error> for Error {
- fn from(err: globset::Error) -> Error {
- Error::Glob(err)
- }
-}
-
-impl From<regex::Error> for Error {
- fn from(err: regex::Error) -> Error {
- Error::Regex(err)
- }
-}
-
-impl From<io::Error> for Error {
- fn from(err: io::Error) -> Error {
- Error::Io(err)
- }
-}
-
-/// Gitignore is a matcher for the glob patterns in a single gitignore file.
-#[derive(Clone, Debug)]
-pub struct Gitignore {
- set: GlobSet,
- root: PathBuf,
- patterns: Vec<Pattern>,
- num_ignores: u64,
- num_whitelist: u64,
- matches: Arc<ThreadLocal<RefCell<Vec<usize>>>>,
-}
-
-impl Gitignore {
- /// Create a new gitignore glob matcher from the given root directory and
- /// string containing the contents of a gitignore file.
- #[allow(dead_code)]
- fn from_str<P: AsRef<Path>>(
- root: P,
- gitignore: &str,
- ) -> Result<Gitignore, Error> {
- let mut builder = GitignoreBuilder::new(root);
- try!(builder.add_str(gitignore));
- builder.build()
- }
-
- /// Returns true if and only if the given file path should be ignored
- /// according to the globs in this gitignore. `is_dir` should be true if
- /// the path refers to a directory and false otherwise.
- ///
- /// Before matching path, its prefix (as determined by a common suffix
- /// of the directory containing this gitignore) is stripped. If there is
- /// no common suffix/prefix overlap, then path is assumed to reside in the
- /// same directory as this gitignore file.
- pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
- let mut path = path.as_ref();
- if let Some(p) = strip_prefix("./", path) {
- path = p;
- }
- // Strip any common prefix between the candidate path and the root
- // of the gitignore, to make sure we get relative matching right.
- // BUT, a file name might not have any directory components to it,
- // in which case, we don't want to accidentally strip any part of the
- // file name.
- if !is_file_name(path) {
- if let Some(p) = strip_prefix(&self.root, path) {
- path = p;
- }
- }
- if let Some(p) = strip_prefix("/", path) {
- path = p;
- }
- self.matched_stripped(path, is_dir)
- }
-
- /// Like matched, but takes a path that has already been stripped.
- pub fn matched_stripped(&self, path: &Path, is_dir: bool) -> Match {
- let _matches = self.matches.get_default();
- let mut matches = _matches.borrow_mut();
- let candidate = Candidate::new(path);
- self.set.matches_candidate_into(&candidate, &mut *matches);
- for &i in matches.iter().rev() {
- let pat = &self.patterns[i];
- if !pat.only_dir || is_dir {
- return if pat.whitelist {
- Match::Whitelist(pat)
- } else {
- Match::Ignored(pat)
- };
- }
- }
- Match::None
- }
-
- /// Returns the total number of ignore patterns.
- pub fn num_ignores(&self) -> u64 {
- self.num_ignores
- }
-}
-
-/// The result of a glob match.
-///
-/// The lifetime `'a` refers to the lifetime of the pattern that resulted in
-/// a match (whether ignored or whitelisted).
-#[derive(Clone, Debug)]
-pub enum Match<'a> {
- /// The path didn't match any glob in the gitignore file.
- None,
- /// The last glob matched indicates the path should be ignored.
- Ignored(&'a Pattern),
- /// The last glob matched indicates the path should be whitelisted.
- Whitelist(&'a Pattern),
-}
-
-impl<'a> Match<'a> {
- /// Returns true if the match result implies the path should be ignored.
- #[allow(dead_code)]
- pub fn is_ignored(&self) -> bool {
- match *self {
- Match::Ignored(_) => true,
- Match::None | Match::Whitelist(_) => false,
- }
- }
-
- /// Returns true if the match result didn't match any globs.
- pub fn is_none(&self) -> bool {
- match *self {
- Match::None => true,
- Match::Ignored(_) | Match::Whitelist(_) => false,
- }
- }
-
- /// Inverts the match so that Ignored becomes Whitelisted and Whitelisted
- /// becomes Ignored. A non-match remains the same.
- pub fn invert(self) -> Match<'a> {
- match self {
- Match::None => Match::None,
- Match::Ignored(pat) => Match::Whitelist(pat),
- Match::Whitelist(pat) => Match::Ignored(pat),
- }
- }
-}
-
-/// GitignoreBuilder constructs a matcher for a single set of globs from a
-/// .gitignore file.
-pub struct GitignoreBuilder {
- builder: GlobSetBuilder,
- root: PathBuf,
- patterns: Vec<Pattern>,
-}
-
-/// Pattern represents a single pattern in a gitignore file. It doesn't
-/// know how to do glob matching directly, but it does store additional
-/// options on a pattern, such as whether it's whitelisted.
-#[derive(Clone, Debug)]
-pub struct Pattern {
- /// The file path that this pattern was extracted from (may be empty).
- pub from: PathBuf,
- /// The original glob pattern string.
- pub original: String,
- /// The actual glob pattern string used to convert to a regex.
- pub pat: String,
- /// Whether this is a whitelisted pattern or not.
- pub whitelist: bool,
- /// Whether this pattern should only match directories or not.
- pub only_dir: bool,
-}
-
-impl GitignoreBuilder {
- /// Create a new builder for a gitignore file.
- ///
- /// The path given should be the path at which the globs for this gitignore
- /// file should be matched.
- pub fn new<P: AsRef<Path>>(root: P) -> GitignoreBuilder {
- let root = strip_prefix("./", root.as_ref()).unwrap_or(root.as_ref());
- GitignoreBuilder {
- builder: GlobSetBuilder::new(),
- root: root.to_path_buf(),
- patterns: vec![],
- }
- }
-
- /// Builds a new matcher from the glob patterns added so far.
- ///
- /// Once a matcher is built, no new glob patterns can be added to it.
- pub fn build(self) -> Result<Gitignore, Error> {
- let nignores = self.patterns.iter().filter(|p| !p.whitelist).count();
- let nwhitelist = self.patterns.iter().filter(|p| p.whitelist).count();
- Ok(Gitignore {
- set: try!(self.builder.build()),
- root: self.root,
- patterns: self.patterns,
- num_ignores: nignores as u64,
- num_whitelist: nwhitelist as u64,
- matches: Arc::new(ThreadLocal::default()),
- })
- }
-
- /// Add each pattern line from the file path given.
- pub fn add_path<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
- let rdr = io::BufReader::new(try!(File::open(&path)));
- debug!("gitignore: {}", path.as_ref().display());
- for (i, line) in rdr.lines().enumerate() {
- let line = match line {
- Ok(line) => line,
- Err(err) => {
- debug!("error reading line {} in {}: {}",
- i, path.as_ref().display(), err);
- continue;
- }
- };
- if let Err(err) = self.add(&path, &line) {
- debug!("error adding gitignore pattern: '{}': {}", line, err);
- }
- }
- Ok(())
- }
-
- /// Add each pattern line from the string given.
- pub fn add_str(&mut self, gitignore: &str) -> Result<(), Error> {
- for line in gitignore.lines() {
- try!(self.add("", line));
- }
- Ok(())
- }
-
- /// Add a line from a gitignore file to this builder.
- ///
- /// If the line could not be parsed as a glob, then an error is returned.
- pub fn add<P: AsRef<Path>>(
- &mut self,
- from: P,
- mut line: &str,
- ) -> Result<(), Error> {
- if line.starts_with("#") {
- return Ok(());
- }
- if !line.ends_with("\\ ") {
- line = line.trim_right();
- }
- if line.is_empty() {
- return Ok(());
- }
- let mut pat = Pattern {
- from: from.as_ref().to_path_buf(),
- original: line.to_string(),
- pat: String::new(),
- whitelist: false,
- only_dir: false,
- };
- let mut literal_separator = false;
- let has_slash = line.chars().any(|c| c == '/');
- let is_absolute = line.chars().nth(0).unwrap() == '/';
- if line.starts_with("\\!") || line.starts_with("\\#") {
- line = &line[1..];
- } else {
- if line.starts_with("!") {
- pat.whitelist = true;
- line = &line[1..];
- }
- if line.starts_with("/") {
- // `man gitignore` says that if a glob starts with a slash,
- // then the glob can only match the beginning of a path
- // (relative to the location of gitignore). We achieve this by
- // simply banning wildcards from matching /.
- literal_separator = true;
- line = &line[1..];
- }
- }
- // If it ends with a slash, then this should only match directories,
- // but the slash should otherwise not be used while globbing.
- if let Some((i, c)) = line.char_indices().rev().nth(0) {
- if c == '/' {
- pat.only_dir = true;
- line = &line[..i];
- }
- }
- // If there is a literal slash, then we note that so that globbing
- // doesn't let wildcards match slashes.
- pat.pat = line.to_string();
- if has_slash {
- literal_separator = true;
- }
- // If there was a leading slash, then this is a pattern that must
- // match the entire path name. Otherwise, we should let it match
- // anywhere, so use a **/ prefix.
- if !is_absolute {
- // ... but only if we don't already have a **/ prefix.
- if !pat.pat.starts_with("**/") {
- pat.pat = format!("**/{}", pat.pat);
- }
- }
- // If the pattern ends with `/**`, then we should only match everything
- // inside a directory, but not the directory itself. Standard globs
- // will match the directory. So we add `/*` to force the issue.
- if pat.pat.ends_with("/**") {
- pat.pat = format!("{}/*", pat.pat);
- }
- let parsed = try!(
- GlobBuilder::new(&pat.pat)
- .literal_separator(literal_separator)
- .build());
- self.builder.add(parsed);
- self.patterns.push(pat);
- Ok(())
- }
-}
-
-#[cfg(test)]
-mod tests {
- use super::Gitignore;
-
- macro_rules! ignored {
- ($name:ident, $root:expr, $gi:expr, $path:expr) => {
- ignored!($name, $root, $gi, $path, false);
- };
- ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => {
- #[test]
- fn $name() {
- let gi = Gitignore::from_str($root, $gi).unwrap();
- assert!(gi.matched($path, $is_dir).is_ignored());
- }
- };
- }
-
- macro_rules! not_ignored {
- ($name:ident, $root:expr, $gi:expr, $path:expr) => {
- not_ignored!($name, $root, $gi, $path, false);
- };
- ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => {
- #[test]
- fn $name() {
- let gi = Gitignore::from_str($root, $gi).unwrap();
- assert!(!gi.matched($path, $is_dir).is_ignored());
- }
- };
- }
-
- const ROOT: &'static str = "/home/foobar/rust/rg";
-
- ignored!(ig1, ROOT, "months", "months");
- ignored!(ig2, ROOT, "*.lock", "Cargo.lock");
- ignored!(ig3, ROOT, "*.rs", "src/main.rs");
- ignored!(ig4, ROOT, "src/*.rs", "src/main.rs");
- ignored!(ig5, ROOT, "/*.c", "cat-file.c");
- ignored!(ig6, ROOT, "/src/*.rs", "src/main.rs");
- ignored!(ig7, ROOT, "!src/main.rs\n*.rs", "src/main.rs");
- ignored!(ig8, ROOT, "foo/", "foo", true);
- ignored!(ig9, ROOT, "**/foo", "foo");
- ignored!(ig10, ROOT, "**/foo", "src/foo");
- ignored!(ig11, ROOT, "**/foo/**", "src/foo/bar");
- ignored!(ig12, ROOT, "**/foo/**", "wat/src/foo/bar/baz");
- ignored!(ig13, ROOT, "**/foo/bar", "foo/bar");
- ignored!(ig14, ROOT, "**/foo/bar", "src/foo/bar");
- ignored!(ig15, ROOT, "abc/**", "abc/x");
- ignored!(ig16, ROOT, "abc/**", "abc/x/y");
- ignored!(ig17, ROOT, "abc/**", "abc/x/y/z");
- ignored!(ig18, ROOT, "a/**/b", "a/b");
- ignored!(ig19, ROOT, "a/**/b", "a/x/b");
- ignored!(ig20, ROOT, "a/**/b", "a/x/y/b");
- ignored!(ig21, ROOT, r"\!xy", "!xy");
- ignored!(ig22, ROOT, r"\#foo", "#foo");
- ignored!(ig23, ROOT, "foo", "./foo");
- ignored!(ig24, ROOT, "target", "grep/target");
- ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock");
- ignored!(ig26, ROOT, "/foo/bar/baz", "./foo/bar/baz");
- ignored!(ig27, ROOT, "foo/", "xyz/foo", true);
- ignored!(ig28, ROOT, "src/*.rs", "src/grep/src/main.rs");
- ignored!(ig29, "./src", "/llvm/", "./src/llvm", true);
- ignored!(ig30, ROOT, "node_modules/ ", "node_modules", true);
-
- not_ignored!(ignot1, ROOT, "amonths", "months");
- not_ignored!(ignot2, ROOT, "monthsa", "months");
- not_ignored!(ignot3, ROOT, "/src/*.rs", "src/grep/src/main.rs");
- not_ignored!(ignot4, ROOT, "/*.c", "mozilla-sha1/sha1.c");
- not_ignored!(ignot5, ROOT, "/src/*.rs", "src/grep/src/main.rs");
- not_ignored!(ignot6, ROOT, "*.rs\n!src/main.rs", "src/main.rs");
- not_ignored!(ignot7, ROOT, "foo/", "foo", false);
- not_ignored!(ignot8, ROOT, "**/foo/**", "wat/src/afoo/bar/baz");
- not_ignored!(ignot9, ROOT, "**/foo/**", "wat/src/fooa/bar/baz");
- not_ignored!(ignot10, ROOT, "**/foo/bar", "foo/src/bar");
- not_ignored!(ignot11, ROOT, "#foo", "#foo");
- not_ignored!(ignot12, ROOT, "\n\n\n", "foo");
- not_ignored!(ignot13, ROOT, "foo/**", "foo", true);
- not_ignored!(
- ignot14, "./third_party/protobuf", "m4/ltoptions.m4",
- "./third_party/protobuf/csharp/src/packages/repositories.config");
-
- // See: https://github.com/BurntSushi/ripgrep/issues/106
- #[test]
- fn regression_106() {
- Gitignore::from_str("/", " ").unwrap();
- }
-}
diff --git a/src/ignore.rs b/src/ignore.rs
deleted file mode 100644
index a8cbac1a..00000000
--- a/src/ignore.rs
+++ /dev/null
@@ -1,493 +0,0 @@
-/*!
-The ignore module is responsible for managing the state required to determine
-whether a *single* file path should be searched or not.
-
-In general, there are two ways to ignore a particular file:
-
-1. Specify an ignore rule in some "global" configuration, such as a
- $HOME/.ignore or on the command line.
-2. A specific ignore file (like .gitignore) found during directory traversal.
-
-The `IgnoreDir` type handles ignore patterns for any one particular directory
-(including "global" ignore patterns), while the `Ignore` type handles a stack
-of `IgnoreDir`s for use during directory traversal.
-*/
-
-use std::error::Error as StdError;
-use std::ffi::OsString;
-use std::fmt;
-use std::io;
-use std::path::{Path, PathBuf};
-
-use gitignore::{self, Gitignore, GitignoreBuilder, Match, Pattern};
-use pathutil::{file_name, is_hidden, strip_prefix};
-use types::Types;
-
-const IGNORE_NAMES: &'static [&'static str] = &[
- ".gitignore",
- ".ignore",
- ".rgignore",
-];
-
-/// Represents an error that can occur when parsing a gitignore file.
-#[derive(Debug)]
-pub enum Error {
- Gitignore(gitignore::Error),
- Io {
- path: PathBuf,
- err: io::Error,
- },
-}
-
-impl Error {
- fn from_io<P: AsRef<Path>>(path: P, err: io::Error) -> Error {
- Error::Io { path: path.as_ref().to_path_buf(), err: err }
- }
-}
-
-impl StdError for Error {
- fn description(&self) -> &str {
- match *self {
- Error::Gitignore(ref err) => err.description(),
- Error::Io { ref err, .. } => err.description(),
- }
- }
-}
-
-impl fmt::Display for Error {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- match *self {
- Error::Gitignore(ref err) => err.fmt(f),
- Error::Io { ref path, ref err } => {
- write!(f, "{}: {}", path.display(), err)
- }
- }
- }
-}
-
-impl From<gitignore::Error> for Error {
- fn from(err: gitignore::Error) -> Error {
- Error::Gitignore(err)
- }
-}
-
-/// Ignore represents a collection of ignore patterns organized by directory.
-/// In particular, a stack is maintained, where the top of the stack
-/// corresponds to the current directory being searched and the bottom of the
-/// stack represents the root of a search. Ignore patterns at the top of the
-/// stack take precedence over ignore patterns at the bottom of the stack.
-pub struct Ignore {
- /// A stack of ignore patterns at each directory level of traversal.
- /// A directory that contributes no ignore patterns is `None`.
- stack: Vec<IgnoreDir>,
- /// A stack of parent directories above the root of the current search.
- parent_stack: Vec<IgnoreDir>,
- /// A set of override globs that are always checked first. A match (whether
- /// it's whitelist or blacklist) trumps anything in stack.
- overrides: Overrides,
- /// A file type matcher.
- types: Types,
- /// Whether to ignore hidden files or not.
- ignore_hidden: bool,
- /// When true, don't look at .gitignore or .ignore files for ignore
- /// rules.
- no_ignore: bool,
- /// When true, don't look at .gitignore files for ignore rules.
- no_ignore_vcs: bool,
-}
-
-impl Ignore {
- /// Create an empty set of ignore patterns.
- pub fn new() -> Ignore {
- Ignore {
- stack: vec![],
- parent_stack: vec![],
- overrides: Overrides::new(None),
- types: Types::empty(),
- ignore_hidden: true,
- no_ignore: false,
- no_ignore_vcs: true,
- }
- }
-
- /// Set whether hidden files/folders should be ignored (defaults to true).
- pub fn ignore_hidden(&mut self, yes: bool) -> &mut Ignore {
- self.ignore_hidden = yes;
- self
- }
-
- /// When set, ignore files are ignored.
- pub fn no_ignore(&mut self, yes: bool) -> &mut Ignore {
- self.no_ignore = yes;
- self
- }
-
- /// When set, VCS ignore files are ignored.
- pub fn no_ignore_vcs(&mut self, yes: bool) -> &mut Ignore {
- self.no_ignore_vcs = yes;
- self
- }
-
- /// Add a set of globs that overrides all other match logic.
- pub fn add_override(&mut self, gi: Gitignore) -> &mut Ignore {
- self.overrides = Overrides::new(Some(gi));
- self
- }
-
- /// Add a file type matcher. The file type matcher has the lowest
- /// precedence.
- pub fn add_types(&mut self, types: Types) -> &mut Ignore {
- self.types = types;
- self
- }
-
- /// Push parent directories of `path` on to the stack.
- pub fn push_parents<P: AsRef<Path>>(
- &mut self,
- path: P,
- ) -> Result<(), Error> {
- let path = try!(path.as_ref().canonicalize().map_err(|err| {
- Error::from_io(path.as_ref(), err)
- }));
- let mut path = &*path;
- let mut saw_git = path.join(".git").is_dir();
- let mut ignore_names = IGNORE_NAMES.to_vec();
- if self.no_ignore_vcs {
- ignore_names.retain(|&name| name != ".gitignore");
- }
- let mut ignore_dir_results = vec![];
- while let Some(parent) = path.parent() {
- if self.no_ignore {
- ignore_dir_results.push(Ok(IgnoreDir::empty(parent)));
- } else {
- if saw_git {
- ignore_names.retain(|&name| name != ".gitignore");
- } else {
- saw_git = parent.join(".git").is_dir();
- }
- let ignore_dir_result =
- IgnoreDir::with_ignore_names(parent, ignore_names.iter());
- ignore_dir_results.push(ignore_dir_result);
- }
- path = parent;
- }
-
- for ignore_dir_result in ignore_dir_results.into_iter().rev() {
- self.parent_stack.push(try!(ignore_dir_result));
- }
- Ok(())
- }
-
- /// Add a directory to the stack.
- ///
- /// Note that even if this returns an error, the directory is added to the
- /// stack (and therefore should be popped).
- pub fn push<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
- if self.no_ignore {
- self.stack.push(IgnoreDir::empty(path));
- Ok(())
- } else if self.no_ignore_vcs {
- self.push_ignore_dir(IgnoreDir::without_vcs(path))
- } else {
- self.push_ignore_dir(IgnoreDir::new(path))
- }
- }
-
- /// Pushes the result of building a directory matcher on to the stack.
- ///
- /// If the result given contains an error, then it is returned.
- pub fn push_ignore_dir(
- &mut self,
- result: Result<IgnoreDir, Error>,
- ) -> Result<(), Error> {
- match result {
- Ok(id) => {
- self.stack.push(id);
- Ok(())
- }
- Err(err) => {
- // Don't leave the stack in an inconsistent state.
- self.stack.push(IgnoreDir::empty("error"));
- Err(err)
- }
- }
- }
-
- /// Pop a directory from the stack.
- ///
- /// This panics if the stack is empty.
- pub fn pop(&mut self) {
- self.stack.pop().expect("non-empty stack");
- }
-
- /// Returns true if and only if the given file path should be ignored.
- pub fn ignored<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> bool {
- let mut path = path.as_ref();
- if let Some(p) = strip_prefix("./", path) {
- path = p;
- }
- let mat = self.overrides.matched(path, is_dir);
- if let Some(is_ignored) = self.ignore_match(path, mat) {
- return is_ignored;
- }
- let mut whitelisted = false;
- if !self.no_ignore {
- for id in self.stack.iter().rev() {
- let mat = id.matched(path, is_dir);
- if let Some(is_ignored) = self.ignore_match(path, mat) {
- if is_ignored {
- return true;
- }
- // If this path is whitelisted by an ignore, then
- // fallthrough and let the file type matcher have a say.
- whitelisted = true;
- break;
- }
- }
- // If the file has been whitelisted, then we have to stop checking
- // parent directories. The only thing that can override a whitelist
- // at this point is a type filter.
- if !whitelisted {
- let mut path = path.to_path_buf();
- for id in self.parent_stack.iter().rev() {
- if let Some(ref dirname) = id.name {
- path = Path::new(dirname).join(path);
- }
- let mat = id.matched(&*path, is_dir);
-