diff options
Diffstat (limited to 'crates/ignore/src/walk.rs')
-rw-r--r-- | crates/ignore/src/walk.rs | 2162 |
1 files changed, 2162 insertions, 0 deletions
diff --git a/crates/ignore/src/walk.rs b/crates/ignore/src/walk.rs new file mode 100644 index 00000000..b2063cde --- /dev/null +++ b/crates/ignore/src/walk.rs @@ -0,0 +1,2162 @@ +use std::cmp; +use std::ffi::OsStr; +use std::fmt; +use std::fs::{self, FileType, Metadata}; +use std::io; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use std::sync::Arc; +use std::vec; + +use channel::{self, TryRecvError}; +use same_file::Handle; +use walkdir::{self, WalkDir}; + +use dir::{Ignore, IgnoreBuilder}; +use gitignore::GitignoreBuilder; +use overrides::Override; +use types::Types; +use {Error, PartialErrorBuilder}; + +/// A directory entry with a possible error attached. +/// +/// The error typically refers to a problem parsing ignore files in a +/// particular directory. +#[derive(Clone, Debug)] +pub struct DirEntry { + dent: DirEntryInner, + err: Option<Error>, +} + +impl DirEntry { + /// The full path that this entry represents. + pub fn path(&self) -> &Path { + self.dent.path() + } + + /// The full path that this entry represents. + /// Analogous to [`path`], but moves ownership of the path. + /// + /// [`path`]: struct.DirEntry.html#method.path + pub fn into_path(self) -> PathBuf { + self.dent.into_path() + } + + /// Whether this entry corresponds to a symbolic link or not. + pub fn path_is_symlink(&self) -> bool { + self.dent.path_is_symlink() + } + + /// Returns true if and only if this entry corresponds to stdin. + /// + /// i.e., The entry has depth 0 and its file name is `-`. + pub fn is_stdin(&self) -> bool { + self.dent.is_stdin() + } + + /// Return the metadata for the file that this entry points to. + pub fn metadata(&self) -> Result<Metadata, Error> { + self.dent.metadata() + } + + /// Return the file type for the file that this entry points to. + /// + /// This entry doesn't have a file type if it corresponds to stdin. + pub fn file_type(&self) -> Option<FileType> { + self.dent.file_type() + } + + /// Return the file name of this entry. + /// + /// If this entry has no file name (e.g., `/`), then the full path is + /// returned. + pub fn file_name(&self) -> &OsStr { + self.dent.file_name() + } + + /// Returns the depth at which this entry was created relative to the root. + pub fn depth(&self) -> usize { + self.dent.depth() + } + + /// Returns the underlying inode number if one exists. + /// + /// If this entry doesn't have an inode number, then `None` is returned. + #[cfg(unix)] + pub fn ino(&self) -> Option<u64> { + self.dent.ino() + } + + /// Returns an error, if one exists, associated with processing this entry. + /// + /// An example of an error is one that occurred while parsing an ignore + /// file. Errors related to traversing a directory tree itself are reported + /// as part of yielding the directory entry, and not with this method. + pub fn error(&self) -> Option<&Error> { + self.err.as_ref() + } + + /// Returns true if and only if this entry points to a directory. + pub(crate) fn is_dir(&self) -> bool { + self.dent.is_dir() + } + + fn new_stdin() -> DirEntry { + DirEntry { dent: DirEntryInner::Stdin, err: None } + } + + fn new_walkdir(dent: walkdir::DirEntry, err: Option<Error>) -> DirEntry { + DirEntry { dent: DirEntryInner::Walkdir(dent), err: err } + } + + fn new_raw(dent: DirEntryRaw, err: Option<Error>) -> DirEntry { + DirEntry { dent: DirEntryInner::Raw(dent), err: err } + } +} + +/// DirEntryInner is the implementation of DirEntry. +/// +/// It specifically represents three distinct sources of directory entries: +/// +/// 1. From the walkdir crate. +/// 2. Special entries that represent things like stdin. +/// 3. From a path. +/// +/// Specifically, (3) has to essentially re-create the DirEntry implementation +/// from WalkDir. +#[derive(Clone, Debug)] +enum DirEntryInner { + Stdin, + Walkdir(walkdir::DirEntry), + Raw(DirEntryRaw), +} + +impl DirEntryInner { + fn path(&self) -> &Path { + use self::DirEntryInner::*; + match *self { + Stdin => Path::new("<stdin>"), + Walkdir(ref x) => x.path(), + Raw(ref x) => x.path(), + } + } + + fn into_path(self) -> PathBuf { + use self::DirEntryInner::*; + match self { + Stdin => PathBuf::from("<stdin>"), + Walkdir(x) => x.into_path(), + Raw(x) => x.into_path(), + } + } + + fn path_is_symlink(&self) -> bool { + use self::DirEntryInner::*; + match *self { + Stdin => false, + Walkdir(ref x) => x.path_is_symlink(), + Raw(ref x) => x.path_is_symlink(), + } + } + + fn is_stdin(&self) -> bool { + match *self { + DirEntryInner::Stdin => true, + _ => false, + } + } + + fn metadata(&self) -> Result<Metadata, Error> { + use self::DirEntryInner::*; + match *self { + Stdin => { + let err = Error::Io(io::Error::new( + io::ErrorKind::Other, + "<stdin> has no metadata", + )); + Err(err.with_path("<stdin>")) + } + Walkdir(ref x) => x.metadata().map_err(|err| { + Error::Io(io::Error::from(err)).with_path(x.path()) + }), + Raw(ref x) => x.metadata(), + } + } + + fn file_type(&self) -> Option<FileType> { + use self::DirEntryInner::*; + match *self { + Stdin => None, + Walkdir(ref x) => Some(x.file_type()), + Raw(ref x) => Some(x.file_type()), + } + } + + fn file_name(&self) -> &OsStr { + use self::DirEntryInner::*; + match *self { + Stdin => OsStr::new("<stdin>"), + Walkdir(ref x) => x.file_name(), + Raw(ref x) => x.file_name(), + } + } + + fn depth(&self) -> usize { + use self::DirEntryInner::*; + match *self { + Stdin => 0, + Walkdir(ref x) => x.depth(), + Raw(ref x) => x.depth(), + } + } + + #[cfg(unix)] + fn ino(&self) -> Option<u64> { + use self::DirEntryInner::*; + use walkdir::DirEntryExt; + match *self { + Stdin => None, + Walkdir(ref x) => Some(x.ino()), + Raw(ref x) => Some(x.ino()), + } + } + + /// Returns true if and only if this entry points to a directory. + fn is_dir(&self) -> bool { + self.file_type().map(|ft| ft.is_dir()).unwrap_or(false) + } +} + +/// DirEntryRaw is essentially copied from the walkdir crate so that we can +/// build `DirEntry`s from whole cloth in the parallel iterator. +#[derive(Clone)] +struct DirEntryRaw { + /// The path as reported by the `fs::ReadDir` iterator (even if it's a + /// symbolic link). + path: PathBuf, + /// The file type. Necessary for recursive iteration, so store it. + ty: FileType, + /// Is set when this entry was created from a symbolic link and the user + /// expects the iterator to follow symbolic links. + follow_link: bool, + /// The depth at which this entry was generated relative to the root. + depth: usize, + /// The underlying inode number (Unix only). + #[cfg(unix)] + ino: u64, + /// The underlying metadata (Windows only). We store this on Windows + /// because this comes for free while reading a directory. + #[cfg(windows)] + metadata: fs::Metadata, +} + +impl fmt::Debug for DirEntryRaw { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + // Leaving out FileType because it doesn't have a debug impl + // in Rust 1.9. We could add it if we really wanted to by manually + // querying each possibly file type. Meh. ---AG + f.debug_struct("DirEntryRaw") + .field("path", &self.path) + .field("follow_link", &self.follow_link) + .field("depth", &self.depth) + .finish() + } +} + +impl DirEntryRaw { + fn path(&self) -> &Path { + &self.path + } + + fn into_path(self) -> PathBuf { + self.path + } + + fn path_is_symlink(&self) -> bool { + self.ty.is_symlink() || self.follow_link + } + + fn metadata(&self) -> Result<Metadata, Error> { + self.metadata_internal() + } + + #[cfg(windows)] + fn metadata_internal(&self) -> Result<fs::Metadata, Error> { + if self.follow_link { + fs::metadata(&self.path) + } else { + Ok(self.metadata.clone()) + } + .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path)) + } + + #[cfg(not(windows))] + fn metadata_internal(&self) -> Result<fs::Metadata, Error> { + if self.follow_link { + fs::metadata(&self.path) + } else { + fs::symlink_metadata(&self.path) + } + .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path)) + } + + fn file_type(&self) -> FileType { + self.ty + } + + fn file_name(&self) -> &OsStr { + self.path.file_name().unwrap_or_else(|| self.path.as_os_str()) + } + + fn depth(&self) -> usize { + self.depth + } + + #[cfg(unix)] + fn ino(&self) -> u64 { + self.ino + } + + fn from_entry( + depth: usize, + ent: &fs::DirEntry, + ) -> Result<DirEntryRaw, Error> { + let ty = ent.file_type().map_err(|err| { + let err = Error::Io(io::Error::from(err)).with_path(ent.path()); + Error::WithDepth { depth: depth, err: Box::new(err) } + })?; + DirEntryRaw::from_entry_os(depth, ent, ty) + } + + #[cfg(windows)] + fn from_entry_os( + depth: usize, + ent: &fs::DirEntry, + ty: fs::FileType, + ) -> Result<DirEntryRaw, Error> { + let md = ent.metadata().map_err(|err| { + let err = Error::Io(io::Error::from(err)).with_path(ent.path()); + Error::WithDepth { depth: depth, err: Box::new(err) } + })?; + Ok(DirEntryRaw { + path: ent.path(), + ty: ty, + follow_link: false, + depth: depth, + metadata: md, + }) + } + + #[cfg(unix)] + fn from_entry_os( + depth: usize, + ent: &fs::DirEntry, + ty: fs::FileType, + ) -> Result<DirEntryRaw, Error> { + use std::os::unix::fs::DirEntryExt; + + Ok(DirEntryRaw { + path: ent.path(), + ty: ty, + follow_link: false, + depth: depth, + ino: ent.ino(), + }) + } + + // Placeholder implementation to allow compiling on non-standard platforms (e.g. wasm32). + #[cfg(not(any(windows, unix)))] + fn from_entry_os( + depth: usize, + ent: &fs::DirEntry, + ty: fs::FileType, + ) -> Result<DirEntryRaw, Error> { + Err(Error::Io(io::Error::new( + io::ErrorKind::Other, + "unsupported platform", + ))) + } + + #[cfg(windows)] + fn from_path( + depth: usize, + pb: PathBuf, + link: bool, + ) -> Result<DirEntryRaw, Error> { + let md = + fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?; + Ok(DirEntryRaw { + path: pb, + ty: md.file_type(), + follow_link: link, + depth: depth, + metadata: md, + }) + } + + #[cfg(unix)] + fn from_path( + depth: usize, + pb: PathBuf, + link: bool, + ) -> Result<DirEntryRaw, Error> { + use std::os::unix::fs::MetadataExt; + + let md = + fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?; + Ok(DirEntryRaw { + path: pb, + ty: md.file_type(), + follow_link: link, + depth: depth, + ino: md.ino(), + }) + } + + // Placeholder implementation to allow compiling on non-standard platforms (e.g. wasm32). + #[cfg(not(any(windows, unix)))] + fn from_path( + depth: usize, + pb: PathBuf, + link: bool, + ) -> Result<DirEntryRaw, Error> { + Err(Error::Io(io::Error::new( + io::ErrorKind::Other, + "unsupported platform", + ))) + } +} + +/// WalkBuilder builds a recursive directory iterator. +/// +/// The builder supports a large number of configurable options. This includes +/// specific glob overrides, file type matching, toggling whether hidden +/// files are ignored or not, and of course, support for respecting gitignore +/// files. +/// +/// By default, all ignore files found are respected. This includes `.ignore`, +/// `.gitignore`, `.git/info/exclude` and even your global gitignore +/// globs, usually found in `$XDG_CONFIG_HOME/git/ignore`. +/// +/// Some standard recursive directory options are also supported, such as +/// limiting the recursive depth or whether to follow symbolic links (disabled +/// by default). +/// +/// # Ignore rules +/// +/// There are many rules that influence whether a particular file or directory +/// is skipped by this iterator. Those rules are documented here. Note that +/// the rules assume a default configuration. +/// +/// * First, glob overrides are checked. If a path matches a glob override, +/// then matching stops. The path is then only skipped if the glob that matched +/// the path is an ignore glob. (An override glob is a whitelist glob unless it +/// starts with a `!`, in which case it is an ignore glob.) +/// * Second, ignore files are checked. Ignore files currently only come from +/// git ignore files (`.gitignore`, `.git/info/exclude` and the configured +/// global gitignore file), plain `.ignore` files, which have the same format +/// as gitignore files, or explicitly added ignore files. The precedence order +/// is: `.ignore`, `.gitignore`, `.git/info/exclude`, global gitignore and +/// finally explicitly added ignore files. Note that precedence between +/// different types of ignore files is not impacted by the directory hierarchy; +/// any `.ignore` file overrides all `.gitignore` files. Within each precedence +/// level, more nested ignore files have a higher precedence than less nested +/// ignore files. +/// * Third, if the previous step yields an ignore match, then all matching +/// is stopped and the path is skipped. If it yields a whitelist match, then +/// matching continues. A whitelist match can be overridden by a later matcher. +/// * Fourth, unless the path is a directory, the file type matcher is run on +/// the path. As above, if it yields an ignore match, then all matching is +/// stopped and the path is skipped. If it yields a whitelist match, then +/// matching continues. +/// * Fifth, if the path hasn't been whitelisted and it is hidden, then the +/// path is skipped. +/// * Sixth, unless the path is a directory, the size of the file is compared +/// against the max filesize limit. If it exceeds the limit, it is skipped. +/// * Seventh, if the path has made it this far then it is yielded in the +/// iterator. +#[derive(Clone)] +pub struct WalkBuilder { + paths: Vec<PathBuf>, + ig_builder: IgnoreBuilder, + max_depth: Option<usize>, + max_filesize: Option<u64>, + follow_links: bool, + same_file_system: bool, + sorter: Option<Sorter>, + threads: usize, + skip: Option<Arc<Handle>>, +} + +#[derive(Clone)] +enum Sorter { + ByName( + Arc<dyn Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static>, + ), + ByPath(Arc<dyn Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static>), +} + +impl fmt::Debug for WalkBuilder { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("WalkBuilder") + .field("paths", &self.paths) + .field("ig_builder", &self.ig_builder) + .field("max_depth", &self.max_depth) + .field("max_filesize", &self.max_filesize) + .field("follow_links", &self.follow_links) + .field("threads", &self.threads) + .field("skip", &self.skip) + .finish() + } +} + +impl WalkBuilder { + /// Create a new builder for a recursive directory iterator for the + /// directory given. + /// + /// Note that if you want to traverse multiple different directories, it + /// is better to call `add` on this builder than to create multiple + /// `Walk` values. + pub fn new<P: AsRef<Path>>(path: P) -> WalkBuilder { + WalkBuilder { + paths: vec![path.as_ref().to_path_buf()], + ig_builder: IgnoreBuilder::new(), + max_depth: None, + max_filesize: None, + follow_links: false, + same_file_system: false, + sorter: None, + threads: 0, + skip: None, + } + } + + /// Build a new `Walk` iterator. + pub fn build(&self) -> Walk { + let follow_links = self.follow_links; + let max_depth = self.max_depth; + let sorter = self.sorter.clone(); + let its = self + .paths + .iter() + .map(move |p| { + if p == Path::new("-") { + (p.to_path_buf(), None) + } else { + let mut wd = WalkDir::new(p); + wd = wd.follow_links(follow_links || p.is_file()); + wd = wd.same_file_system(self.same_file_system); + if let Some(max_depth) = max_depth { + wd = wd.max_depth(max_depth); + } + if let Some(ref sorter) = sorter { + match sorter.clone() { + Sorter::ByName(cmp) => { + wd = wd.sort_by(move |a, b| { + cmp(a.file_name(), b.file_name()) + }); + } + Sorter::ByPath(cmp) => { + wd = wd.sort_by(move |a, b| { + cmp(a.path(), b.path()) + }); + } + } + } + (p.to_path_buf(), Some(WalkEventIter::from(wd))) + } + }) + .collect::<Vec<_>>() + .into_iter(); + let ig_root = self.ig_builder.build(); + Walk { + its: its, + it: None, + ig_root: ig_root.clone(), + ig: ig_root.clone(), + max_filesize: self.max_filesize, + skip: self.skip.clone(), + } + } + + /// Build a new `WalkParallel` iterator. + /// + /// Note that this *doesn't* return something that implements `Iterator`. + /// Instead, the returned value must be run with a closure. e.g., + /// `builder.build_parallel().run(|| |path| println!("{:?}", path))`. + pub fn build_parallel(&self) -> WalkParallel { + WalkParallel { + paths: self.paths.clone().into_iter(), + ig_root: self.ig_builder.build(), + max_depth: self.max_depth, + max_filesize: self.max_filesize, + follow_links: self.follow_links, + same_file_system: self.same_file_system, + threads: self.threads, + skip: self.skip.clone(), + } + } + + /// Add a file path to the iterator. + /// + /// Each additional file path added is traversed recursively. This should + /// be preferred over building multiple `Walk` iterators since this + /// enables reusing resources across iteration. + pub fn add<P: AsRef<Path>>(&mut self, path: P) -> &mut WalkBuilder { + self.paths.push(path.as_ref().to_path_buf()); + self + } + + /// The maximum depth to recurse. + /// + /// The default, `None`, imposes no depth restriction. + pub fn max_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder { + self.max_depth = depth; + self + } + + /// Whether to follow symbolic links or not. + pub fn follow_links(&mut self, yes: bool) -> &mut WalkBuilder { + self.follow_links = yes; + self + } + + /// Whether to ignore files above the specified limit. + pub fn max_filesize(&mut self, filesize: Option<u64>) -> &mut WalkBuilder { + self.max_filesize = filesize; + self + } + + /// The number of threads to use for traversal. + /// + /// Note that this only has an effect when using `build_parallel`. + /// + /// The default setting is `0`, which chooses the number of threads + /// automatically using heuristics. + pub fn threads(&mut self, n: usize) -> &mut WalkBuilder { + self.threads = n; + self + } + + /// Add a global ignore file to the matcher. + /// + /// This has lower precedence than all other sources of ignore rules. + /// + /// If there was a problem adding the ignore file, then an error is + /// returned. Note that the error may indicate *partial* failure. For + /// example, if an ignore file contains an invalid glob, all other globs + /// are still applied. + pub fn add_ignore<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> { + let mut builder = GitignoreBuilder::new(""); + let mut errs = PartialErrorBuilder::default(); + errs.maybe_push(builder.add(path)); + match builder.build() { + Ok(gi) => { + self.ig_builder.add_ignore(gi); + } + Err(err) => { + errs.push(err); + } + } + errs.into_error_option() + } + + /// Add a custom ignore file name + /// + /// These ignore files have higher precedence than all other ignore files. + /// + /// When specifying multiple names, earlier names have lower precedence than + /// later names. + pub fn add_custom_ignore_filename<S: AsRef<OsStr>>( + &mut self, + file_name: S, + ) -> &mut WalkBuilder { + self.ig_builder.add_custom_ignore_filename(file_name); + self + } + + /// Add an override matcher. + /// + /// By default, no override matcher is used. + /// + /// This overrides any previous setting. + pub fn overrides(&mut self, overrides: Override) -> &mut WalkBuilder { + self.ig_builder.overrides(overrides); + self + } + + /// Add a file type matcher. + /// + /// By default, no file type matcher is used. + /// + /// This overrides any previous setting. + pub fn types(&mut self, types: Types) -> &mut WalkBuilder { + self.ig_builder.types(types); + self + } + + /// Enables all the standard ignore filters. + /// + /// This toggles, as a group, all the filters that are enabled by default: + /// + /// - [hidden()](#method.hidden) + /// - [parents()](#method.parents) + /// - [ignore()](#method.ignore) + /// - [git_ignore()](#method.git_ignore) + /// - [git_global()](#method.git_global) + /// - [git_exclude()](#method.git_exclude) + /// + /// They may still be toggled individually after calling this function. + /// + /// This is (by definition) enabled by default. + pub fn standard_filters(&mut self, yes: bool) -> &mut WalkBuilder { + self.hidden(yes) + .parents(yes) + .ignore(yes) + .git_ignore(yes) + .git_global(yes) + .git_exclude(yes) + } + + /// Enables ignoring hidden files. + /// + /// This is enabled by default. + pub fn hidden(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.hidden(yes); + self + } + + /// Enables reading ignore files from parent directories. + /// + /// If this is enabled, then .gitignore files in parent directories of each + /// file path given are respected. Otherwise, they are ignored. + /// + /// This is enabled by default. + pub fn parents(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.parents(yes); + self + } + + /// Enables reading `.ignore` files. + /// + /// `.ignore` files have the same semantics as `gitignore` files and are + /// supported by search tools such as ripgrep and The Silver Searcher. + /// + /// This is enabled by default. + pub fn ignore(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.ignore(yes); + self + } + + /// Enables reading a global gitignore file, whose path is specified in + /// git's `core.excludesFile` config option. + /// + /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig` + /// does not exist or does not specify `core.excludesFile`, then + /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not + /// set or is empty, then `$HOME/.config/git/ignore` is used instead. + /// + /// This is enabled by default. + pub fn git_global(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.git_global(yes); + self + } + + /// Enables reading `.gitignore` files. + /// + /// `.gitignore` files have match semantics as described in the `gitignore` + /// man page. + /// + /// This is enabled by default. + pub fn git_ignore(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.git_ignore(yes); + self + } + + /// Enables reading `.git/info/exclude` files. + /// + /// `.git/info/exclude` files have match semantics as described in the + /// `gitignore` man page. + /// + /// This is enabled by default. + pub fn git_exclude(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.git_exclude(yes); + self + } + + /// Whether a git repository is required to apply git-related ignore + /// rules (global rules, .gitignore and local exclude rules). + /// + /// When disabled, git-related ignore rules are applied even when searching + /// outside a git repository. + pub fn require_git(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.require_git(yes); + self + } + + /// Process ignore files case insensitively + /// + /// This is disabled by default. + pub fn ignore_case_insensitive(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.ignore_case_insensitive(yes); + self + } + + /// Set a function for sorting directory entries by their path. + /// + /// If a compare function is set, the resulting iterator will return all + /// paths in sorted order. The compare function will be called to compare + /// entries from the same directory. + /// + /// This is like `sort_by_file_name`, except the comparator accepts + /// a `&Path` instead of the base file name, which permits it to sort by + /// more criteria. + /// + /// This method will override any previous sorter set by this method or + /// by `sort_by_file_name`. + /// + /// Note that this is not used in the parallel iterator. + pub fn sort_by_file_path<F>(&mut self, cmp: F) -> &mut WalkBuilder + where + F: Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static, + { + self.sorter = Some(Sorter::ByPath(Arc::new(cmp))); + self + } + + /// Set a function for sorting directory entries by file name. + /// + /// If a compare function is set, the resulting iterator will return all + /// paths in sorted order. The compare function will be called to compare + /// names from entries from the same directory using only the name of the + /// entry. + /// + /// This method will override any previous sorter set by this method or + /// by `sort_by_file_path`. + /// + /// Note that this is not used in the parallel iterator. + pub fn sort_by_file_name<F>(&mut self, cmp: F) -> &mut WalkBuilder + where + F: Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static, + { + self.sorter = Some(Sorter::ByName(Arc::new(cmp))); + self + } + + /// Do not cross file system boundaries. + /// + /// When this option is enabled, directory traversal will not descend into + /// directories that are on a different file system from the root path. + /// + /// Currently, this option is only supported on Unix and Windows. If this + /// option is used on an unsupported platform, then directory traversal + /// will immediately return an error and will not yield any entries. + pub fn same_file_system(&mut self, yes: bool) -> &mut WalkBuilder { + self.same_file_system = yes; + self + } + + /// Do not yield directory entries that are believed to correspond to + /// stdout. + /// + /// This is useful when a command is invoked via shell redirection to a + /// file that is also being read. For example, `grep -r foo ./ > results` + /// might end up trying to search `results` even though it is also writing + /// to it, which could cause an unbounded feedback loop. Setting this + /// option prevents this from happening by skipping over the `results` + /// file. + /// + /// This is disabled by default. + pub fn skip_stdout(&mut self, yes: bool) -> &mut WalkBuilder { + if yes { + self.skip = stdout_handle().map(Arc::new); + } else { + self.skip = None; + } + self + } +} + +/// Walk is a recursive directory iterator over file paths in one or more +/// directories. +/// +/// Only file and directory paths matching the rules are returned. By default, +/// ignore files like `.gitignore` are respected. The precise matching rules +/// and precedence is explained in the documentation for `WalkBuilder`. +pub struct Walk { + its: vec::IntoIter<(PathBuf, Option<WalkEventIter>)>, + it: Option<WalkEventIter>, + ig_root: Ignore, + ig: Ignore, + max_filesize: Option<u64>, + skip: Option<Arc<Handle>>, +} + +impl Walk { + /// Creates a new recursive directory iterator for the file path given. + /// + /// Note that this uses default settings, which include respecting + /// `.gitignore` files. To configure the iterator, use `WalkBuilder` + /// instead. + pub fn new<P: AsRef<Path>>(path: P) -> Walk { + WalkBuilder::new(path).build() + } + + fn skip_entry(&self, ent: &DirEntry) -> Result<bool, Error> { + if ent.depth() == 0 { + return Ok(false); + } + + if let Some(ref stdout) = self.skip { + if path_equals(ent, stdout)? { + return Ok(true); + } + } + if should_skip_entry(&self.ig, ent) { + return Ok(true); + } + if self.max_filesize.is_some() && !ent.is_dir() { + return Ok(skip_filesize( + self.max_filesize.unwrap(), + ent.path(), + &ent.metadata().ok(), + )); + } + Ok(false) + } +} + +impl Iterator for Walk { + type Item = Result<DirEntry, Error>; + + #[inline(always)] + fn next(&mut self) -> Option<Result<DirEntry, Error>> { + loop { + let ev = match self.it.as_mut().and_then(|it| it.next()) { + Some(ev) => ev, + None => { + match self.its.next() { + None => return None, + Some((_, None)) => { + return Some(Ok(DirEntry::new_stdin())); + } + Some((path, Some(it))) => { + self.it = Some(it); + if path.is_dir() { + let (ig, err) = self.ig_root.add_parents(path); + self.ig = ig; + if let Some(err) = err { + return Some(Err(err)); + } + } else { + self.ig = self.ig_root.clone(); + } + } + } + continue; + } + }; + match ev { + Err(err) => { + return Some(Err(Error::from_walkdir(err))); + } + Ok(WalkEvent::Exit) => { + self.ig = self.ig.parent().unwrap(); + } + Ok(WalkEvent::Dir(ent)) => { + let mut ent = DirEntry::new_walkdir(ent, None); + let should_skip = match self.skip_entry(&ent) { + Err(err) => return Some(Err(err)), + Ok(should_skip) => should_skip, + }; + if should_skip { + self.it.as_mut().unwrap().it.skip_current_dir(); + // Still need to push this on the stack because + // we'll get a WalkEvent::Exit event for this dir. + // We don't care if it errors though. + let (igtmp, _) = self.ig.add_child(ent.path()); + self.ig = igtmp; + continue; + } + let (igtmp, err) = self.ig.add_child(ent.path()); + self.ig = igtmp; + ent.err = err; + return Some(Ok(ent)); + } + Ok(WalkEvent::File(ent)) => { + let ent = DirEntry::new_walkdir(ent, None); + let should_skip = match self.skip_entry(&ent) { + Err(err) => return Some(Err(err)), + Ok(should_skip) => should_skip, + }; + if should_skip { + continue; + } + return Some(Ok(ent)); + } + } + } + } +} + +/// WalkEventIter transforms a WalkDir iterator into an iterator that more +/// accurately describes the directory tree. Namely, it emits events that are +/// one of three types: directory, file or "exit." An "exit" event means that +/// the entire contents of a directory have been enumerated. +struct WalkEventIter { + depth: usize, + it: walkdir::IntoIter, + next: Option<Result<walkdir::DirEntry, walkdir::Error>>, +} + +#[derive(Debug)] +enum WalkEvent { + Dir(walkdir::DirEntry), + File(walkdir::DirEntry), + Exit, +} + +impl From<WalkDir> for WalkEventIter { + fn from(it: WalkDir) -> WalkEventIter { + WalkEventIter { depth: 0, it: it.into_iter(), next: None } |