diff options
Diffstat (limited to 'ignore/src/walk.rs')
-rw-r--r-- | ignore/src/walk.rs | 592 |
1 files changed, 592 insertions, 0 deletions
diff --git a/ignore/src/walk.rs b/ignore/src/walk.rs new file mode 100644 index 00000000..0bcc6136 --- /dev/null +++ b/ignore/src/walk.rs @@ -0,0 +1,592 @@ +use std::ffi::OsStr; +use std::fs::{FileType, Metadata}; +use std::io; +use std::path::{Path, PathBuf}; +use std::vec; + +use walkdir::{self, WalkDir, WalkDirIterator}; + +use dir::{Ignore, IgnoreBuilder}; +use gitignore::GitignoreBuilder; +use overrides::Override; +use types::Types; +use {Error, PartialErrorBuilder}; + +/// WalkBuilder builds a recursive directory iterator. +/// +/// The builder supports a large number of configurable options. This includes +/// specific glob overrides, file type matching, toggling whether hidden +/// files are ignored or not, and of course, support for respecting gitignore +/// files. +/// +/// By default, all ignore files found are respected. This includes `.ignore`, +/// `.gitignore`, `.git/info/exclude` and even your global gitignore +/// globs, usually found in `$XDG_CONFIG_HOME/git/ignore`. +/// +/// Some standard recursive directory options are also supported, such as +/// limiting the recursive depth or whether to follow symbolic links (disabled +/// by default). +/// +/// # Ignore rules +/// +/// There are many rules that influence whether a particular file or directory +/// is skipped by this iterator. Those rules are documented here. Note that +/// the rules assume a default configuration. +/// +/// * First, glob overrides are checked. If a path matches a glob override, +/// then matching stops. The path is then only skipped if the glob that matched +/// the path is an ignore glob. (An override glob is a whitelist glob unless it +/// starts with a `!`, in which case it is an ignore glob.) +/// * Second, ignore files are checked. Ignore files currently only come from +/// git ignore files (`.gitignore`, `.git/info/exclude` and the configured +/// global gitignore file), plain `.ignore` files, which have the same format +/// as gitignore files, or explicitly added ignore files. The precedence order +/// is: `.ignore`, `.gitignore`, `.git/info/exclude`, global gitignore and +/// finally explicitly added ignore files. Note that precedence between +/// different types of ignore files is not impacted by the directory hierarchy; +/// any `.ignore` file overrides all `.gitignore` files. Within each +/// precedence level, more nested ignore files have a higher precedence over +/// less nested ignore files. +/// * Third, if the previous step yields an ignore match, than all matching +/// is stopped and the path is skipped.. If it yields a whitelist match, then +/// process continues. A whitelist match can be overridden by a later matcher. +/// * Fourth, unless the path is a directory, the file type matcher is run on +/// the path. As above, if it's an ignore match, then all matching is stopped +/// and the path is skipped. If it's a whitelist match, then matching +/// continues. +/// * Fifth, if the path hasn't been whitelisted and it is hidden, then the +/// path is skipped. +/// * Sixth, if the path has made it this far then it is yielded in the +/// iterator. +pub struct WalkBuilder { + paths: Vec<PathBuf>, + ig_builder: IgnoreBuilder, + parents: bool, + max_depth: Option<usize>, + follow_links: bool, +} + +impl WalkBuilder { + /// Create a new builder for a recursive directory iterator for the + /// directory given. + /// + /// Note that if you want to traverse multiple different directories, it + /// is better to call `add` on this builder than to create multiple + /// `Walk` values. + pub fn new<P: AsRef<Path>>(path: P) -> WalkBuilder { + WalkBuilder { + paths: vec![path.as_ref().to_path_buf()], + ig_builder: IgnoreBuilder::new(), + parents: true, + max_depth: None, + follow_links: false, + } + } + + /// Build a new `Walk` iterator. + pub fn build(&self) -> Walk { + let follow_links = self.follow_links; + let max_depth = self.max_depth; + let its = self.paths.iter().map(move |p| { + if p == Path::new("-") { + (p.to_path_buf(), None) + } else { + let mut wd = WalkDir::new(p); + wd = wd.follow_links(follow_links || p.is_file()); + if let Some(max_depth) = max_depth { + wd = wd.max_depth(max_depth); + } + (p.to_path_buf(), Some(WalkEventIter::from(wd))) + } + }).collect::<Vec<_>>().into_iter(); + let ig_root = self.ig_builder.build(); + Walk { + its: its, + it: None, + ig_root: ig_root.clone(), + ig: ig_root.clone(), + parents: self.parents, + } + } + + /// Add a file path to the iterator. + /// + /// Each additional file path added is traversed recursively. This should + /// be preferred over building multiple `Walk` iterators since this + /// enables reusing resources across iteration. + pub fn add<P: AsRef<Path>>(&mut self, path: P) -> &mut WalkBuilder { + self.paths.push(path.as_ref().to_path_buf()); + self + } + + /// The maximum depth to recurse. + /// + /// The default, `None`, imposes no depth restriction. + pub fn max_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder { + self.max_depth = depth; + self + } + + /// Whether to follow symbolic links or not. + pub fn follow_links(&mut self, yes: bool) -> &mut WalkBuilder { + self.follow_links = yes; + self + } + + /// Add an ignore file to the matcher. + /// + /// This has lower precedence than all other sources of ignore rules. + /// + /// If there was a problem adding the ignore file, then an error is + /// returned. Note that the error may indicate *partial* failure. For + /// example, if an ignore file contains an invalid glob, all other globs + /// are still applied. + pub fn add_ignore<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> { + let mut builder = GitignoreBuilder::new(""); + let mut errs = PartialErrorBuilder::default(); + errs.maybe_push_ignore_io(builder.add(path)); + match builder.build() { + Ok(gi) => { self.ig_builder.add_ignore(gi); } + Err(err) => { errs.push(err); } + } + errs.into_error_option() + } + + /// Add an override matcher. + /// + /// By default, no override matcher is used. + /// + /// This overrides any previous setting. + pub fn overrides(&mut self, overrides: Override) -> &mut WalkBuilder { + self.ig_builder.overrides(overrides); + self + } + + /// Add a file type matcher. + /// + /// By default, no file type matcher is used. + /// + /// This overrides any previous setting. + pub fn types(&mut self, types: Types) -> &mut WalkBuilder { + self.ig_builder.types(types); + self + } + + /// Enables ignoring hidden files. + /// + /// This is enabled by default. + pub fn hidden(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.hidden(yes); + self + } + + /// Enables reading ignore files from parent directories. + /// + /// If this is enabled, then the parent directories of each file path given + /// are traversed for ignore files (subject to the ignore settings on + /// this builder). Note that file paths are canonicalized with respect to + /// the current working directory in order to determine parent directories. + /// + /// This is enabled by default. + pub fn parents(&mut self, yes: bool) -> &mut WalkBuilder { + self.parents = yes; + self + } + + /// Enables reading `.ignore` files. + /// + /// `.ignore` files have the same semantics as `gitignore` files and are + /// supported by search tools such as ripgrep and The Silver Searcher. + /// + /// This is enabled by default. + pub fn ignore(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.ignore(yes); + self + } + + /// Enables reading a global gitignore file, whose path is specified in + /// git's `core.excludesFile` config option. + /// + /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig` + /// does not exist or does not specify `core.excludesFile`, then + /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not + /// set or is empty, then `$HOME/.config/git/ignore` is used instead. + pub fn git_global(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.git_global(yes); + self + } + + /// Enables reading `.gitignore` files. + /// + /// `.gitignore` files have match semantics as described in the `gitignore` + /// man page. + /// + /// This is enabled by default. + pub fn git_ignore(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.git_ignore(yes); + self + } + + /// Enables reading `.git/info/exclude` files. + /// + /// `.git/info/exclude` files have match semantics as described in the + /// `gitignore` man page. + /// + /// This is enabled by default. + pub fn git_exclude(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.git_exclude(yes); + self + } +} + +/// Walk is a recursive directory iterator over file paths in a directory. +/// +/// Only file and directory paths matching the rules are returned. By default, +/// ignore files like `.gitignore` are respected. The precise matching rules +/// and precedence is explained in the documentation for `WalkBuilder`. +pub struct Walk { + its: vec::IntoIter<(PathBuf, Option<WalkEventIter>)>, + it: Option<WalkEventIter>, + ig_root: Ignore, + ig: Ignore, + parents: bool, +} + +impl Walk { + /// Creates a new recursive directory iterator for the file path given. + /// + /// Note that this uses default settings, which include respecting + /// `.gitignore` files. To configure the iterator, use `WalkBuilder` + /// instead. + pub fn new<P: AsRef<Path>>(path: P) -> Walk { + WalkBuilder::new(path).build() + } + + fn skip_entry(&self, ent: &walkdir::DirEntry) -> bool { + if ent.depth() == 0 { + // Never skip the root directory. + return false; + } + let m = self.ig.matched(ent.path(), ent.file_type().is_dir()); + if m.is_ignore() { + debug!("ignoring {}: {:?}", ent.path().display(), m); + return true; + } else if m.is_whitelist() { + debug!("whitelisting {}: {:?}", ent.path().display(), m); + } + false + } +} + +impl Iterator for Walk { + type Item = Result<DirEntry, Error>; + + #[inline(always)] + fn next(&mut self) -> Option<Result<DirEntry, Error>> { + loop { + let ev = match self.it.as_mut().and_then(|it| it.next()) { + Some(ev) => ev, + None => { + match self.its.next() { + None => return None, + Some((_, None)) => { + return Some(Ok(DirEntry { + dent: None, + err: None, + })); + } + Some((path, Some(it))) => { + self.it = Some(it); + if self.parents && path.is_dir() { + let (ig, err) = self.ig_root.add_parents(path); + self.ig = ig; + if let Some(err) = err { + return Some(Err(err)); + } + } else { + self.ig = self.ig_root.clone(); + } + } + } + continue; + } + }; + match ev { + Err(err) => { + let path = err.path().map(|p| p.to_path_buf()); + let mut ig_err = Error::Io(io::Error::from(err)); + if let Some(path) = path { + ig_err = Error::WithPath { + path: path.to_path_buf(), + err: Box::new(ig_err), + }; + } + return Some(Err(ig_err)); + } + Ok(WalkEvent::Exit) => { + self.ig = self.ig.parent().unwrap(); + } + Ok(WalkEvent::Dir(ent)) => { + if self.skip_entry(&ent) { + self.it.as_mut().unwrap().it.skip_current_dir(); + // Still need to push this on the stack because + // we'll get a WalkEvent::Exit event for this dir. + // We don't care if it errors though. + let (igtmp, _) = self.ig.add_child(ent.path()); + self.ig = igtmp; + continue; + } + let (igtmp, err) = self.ig.add_child(ent.path()); + self.ig = igtmp; + return Some(Ok(DirEntry { dent: Some(ent), err: err })); + } + Ok(WalkEvent::File(ent)) => { + if self.skip_entry(&ent) { + continue; + } + // If this isn't actually a file (e.g., a symlink), + // then skip it. + if !ent.file_type().is_file() { + continue; + } + return Some(Ok(DirEntry { dent: Some(ent), err: None })); + } + } + } + } +} + +/// A directory entry with a possible error attached. +/// +/// The error typically refers to a problem parsing ignore files in a +/// particular directory. +#[derive(Debug)] +pub struct DirEntry { + dent: Option<walkdir::DirEntry>, + err: Option<Error>, +} + +impl DirEntry { + /// The full path that this entry represents. + pub fn path(&self) -> &Path { + self.dent.as_ref().map_or(Path::new("<stdin>"), |x| x.path()) + } + + /// Whether this entry corresponds to a symbolic link or not. + pub fn path_is_symbolic_link(&self) -> bool { + self.dent.as_ref().map_or(false, |x| x.path_is_symbolic_link()) + } + + /// Returns true if and only if this entry corresponds to stdin. + /// + /// i.e., The entry has depth 0 and its file name is `-`. + pub fn is_stdin(&self) -> bool { + self.dent.is_none() + } + + /// Return the metadata for the file that this entry points to. + pub fn metadata(&self) -> Result<Metadata, Error> { + if let Some(dent) = self.dent.as_ref() { + dent.metadata().map_err(|err| Error::WithPath { + path: self.path().to_path_buf(), + err: Box::new(Error::Io(io::Error::from(err))), + }) + } else { + let ioerr = io::Error::new( + io::ErrorKind::Other, "stdin has no metadata"); + Err(Error::WithPath { + path: Path::new("<stdin>").to_path_buf(), + err: Box::new(Error::Io(ioerr)), + }) + } + } + + /// Return the file type for the file that this entry points to. + /// + /// This entry doesn't have a file type if it corresponds to stdin. + pub fn file_type(&self) -> Option<FileType> { + self.dent.as_ref().map(|x| x.file_type()) + } + + /// Return the file name of this entry. + /// + /// If this entry has no file name (e.g., `/`), then the full path is + /// returned. + pub fn file_name(&self) -> &OsStr { + self.dent.as_ref().map_or(OsStr::new("<stdin>"), |x| x.file_name()) + } + + /// Returns the depth at which this entry was created relative to the root. + pub fn depth(&self) -> usize { + self.dent.as_ref().map_or(0, |x| x.depth()) + } + + /// Returns an error, if one exists, associated with processing this entry. + /// + /// An example of an error is one that occurred while parsing an ignore + /// file. + pub fn error(&self) -> Option<&Error> { + self.err.as_ref() + } +} + +/// WalkEventIter transforms a WalkDir iterator into an iterator that more +/// accurately describes the directory tree. Namely, it emits events that are +/// one of three types: directory, file or "exit." An "exit" event means that +/// the entire contents of a directory have been enumerated. +struct WalkEventIter { + depth: usize, + it: walkdir::Iter, + next: Option<Result<walkdir::DirEntry, walkdir::Error>>, +} + +#[derive(Debug)] +enum WalkEvent { + Dir(walkdir::DirEntry), + File(walkdir::DirEntry), + Exit, +} + +impl From<WalkDir> for WalkEventIter { + fn from(it: WalkDir) -> WalkEventIter { + WalkEventIter { depth: 0, it: it.into_iter(), next: None } + } +} + +impl Iterator for WalkEventIter { + type Item = walkdir::Result<WalkEvent>; + + #[inline(always)] + fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> { + let dent = self.next.take().or_else(|| self.it.next()); + let depth = match dent { + None => 0, + Some(Ok(ref dent)) => dent.depth(), + Some(Err(ref err)) => err.depth(), + }; + if depth < self.depth { + self.depth -= 1; + self.next = dent; + return Some(Ok(WalkEvent::Exit)); + } + self.depth = depth; + match dent { + None => None, + Some(Err(err)) => Some(Err(err)), + Some(Ok(dent)) => { + if dent.file_type().is_dir() { + self.depth += 1; + Some(Ok(WalkEvent::Dir(dent))) + } else { + Some(Ok(WalkEvent::File(dent))) + } + } + } + } +} + +#[cfg(test)] +mod tests { + use std::fs::{self, File}; + use std::io::Write; + use std::path::Path; + + use tempdir::TempDir; + + use super::{Walk, WalkBuilder}; + + fn wfile<P: AsRef<Path>>(path: P, contents: &str) { + let mut file = File::create(path).unwrap(); + file.write_all(contents.as_bytes()).unwrap(); + } + + fn mkdirp<P: AsRef<Path>>(path: P) { + fs::create_dir_all(path).unwrap(); + } + + fn normal_path(unix: &str) -> String { + if cfg!(windows) { + unix.replace("\\", "/") + } else { + unix.to_string() + } + } + + fn walk_collect(prefix: &Path, walk: Walk) -> Vec<String> { + let mut paths = vec![]; + for dent in walk { + let dent = dent.unwrap(); + let path = dent.path().strip_prefix(prefix).unwrap(); + if path.as_os_str().is_empty() { + continue; + } + paths.push(normal_path(path.to_str().unwrap())); + } + paths.sort(); + paths + } + + fn mkpaths(paths: &[&str]) -> Vec<String> { + let mut paths: Vec<_> = paths.iter().map(|s| s.to_string()).collect(); + paths.sort(); + paths + } + + #[test] + fn no_ignores() { + let td = TempDir::new("walk-test-").unwrap(); + mkdirp(td.path().join("a/b/c")); + mkdirp(td.path().join("x/y")); + wfile(td.path().join("a/b/foo"), ""); + wfile(td.path().join("x/y/foo"), ""); + + let got = walk_collect(td.path(), Walk::new(td.path())); + assert_eq!(got, mkpaths(&[ + "x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c", + ])); + } + + #[test] + fn gitignore() { + let td = TempDir::new("walk-test-").unwrap(); + mkdirp(td.path().join("a")); + wfile(td.path().join(".gitignore"), "foo"); + wfile(td.path().join("foo"), ""); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("bar"), ""); + wfile(td.path().join("a/bar"), ""); + + let got = walk_collect(td.path(), Walk::new(td.path())); + assert_eq!(got, mkpaths(&["bar", "a", "a/bar"])); + } + + #[test] + fn explicit_ignore() { + let td = TempDir::new("walk-test-").unwrap(); + let igpath = td.path().join(".not-an-ignore"); + mkdirp(td.path().join("a")); + wfile(&igpath, "foo"); + wfile(td.path().join("foo"), ""); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("bar"), ""); + wfile(td.path().join("a/bar"), ""); + + let mut builder = WalkBuilder::new(td.path()); + assert!(builder.add_ignore(&igpath).is_none()); + let got = walk_collect(td.path(), builder.build()); + assert_eq!(got, mkpaths(&["bar", "a", "a/bar"])); + } + + #[test] + fn gitignore_parent() { + let td = TempDir::new("walk-test-").unwrap(); + mkdirp(td.path().join("a")); + wfile(td.path().join(".gitignore"), "foo"); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("a/bar"), ""); + + let root = td.path().join("a"); + let got = walk_collect(&root, Walk::new(&root)); + assert_eq!(got, mkpaths(&["bar"])); + } +} |