summaryrefslogtreecommitdiffstats
path: root/crates/ignore/src/walk.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/ignore/src/walk.rs')
-rw-r--r--crates/ignore/src/walk.rs2162
1 files changed, 2162 insertions, 0 deletions
diff --git a/crates/ignore/src/walk.rs b/crates/ignore/src/walk.rs
new file mode 100644
index 00000000..b2063cde
--- /dev/null
+++ b/crates/ignore/src/walk.rs
@@ -0,0 +1,2162 @@
+use std::cmp;
+use std::ffi::OsStr;
+use std::fmt;
+use std::fs::{self, FileType, Metadata};
+use std::io;
+use std::path::{Path, PathBuf};
+use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
+use std::sync::Arc;
+use std::vec;
+
+use channel::{self, TryRecvError};
+use same_file::Handle;
+use walkdir::{self, WalkDir};
+
+use dir::{Ignore, IgnoreBuilder};
+use gitignore::GitignoreBuilder;
+use overrides::Override;
+use types::Types;
+use {Error, PartialErrorBuilder};
+
+/// A directory entry with a possible error attached.
+///
+/// The error typically refers to a problem parsing ignore files in a
+/// particular directory.
+#[derive(Clone, Debug)]
+pub struct DirEntry {
+ dent: DirEntryInner,
+ err: Option<Error>,
+}
+
+impl DirEntry {
+ /// The full path that this entry represents.
+ pub fn path(&self) -> &Path {
+ self.dent.path()
+ }
+
+ /// The full path that this entry represents.
+ /// Analogous to [`path`], but moves ownership of the path.
+ ///
+ /// [`path`]: struct.DirEntry.html#method.path
+ pub fn into_path(self) -> PathBuf {
+ self.dent.into_path()
+ }
+
+ /// Whether this entry corresponds to a symbolic link or not.
+ pub fn path_is_symlink(&self) -> bool {
+ self.dent.path_is_symlink()
+ }
+
+ /// Returns true if and only if this entry corresponds to stdin.
+ ///
+ /// i.e., The entry has depth 0 and its file name is `-`.
+ pub fn is_stdin(&self) -> bool {
+ self.dent.is_stdin()
+ }
+
+ /// Return the metadata for the file that this entry points to.
+ pub fn metadata(&self) -> Result<Metadata, Error> {
+ self.dent.metadata()
+ }
+
+ /// Return the file type for the file that this entry points to.
+ ///
+ /// This entry doesn't have a file type if it corresponds to stdin.
+ pub fn file_type(&self) -> Option<FileType> {
+ self.dent.file_type()
+ }
+
+ /// Return the file name of this entry.
+ ///
+ /// If this entry has no file name (e.g., `/`), then the full path is
+ /// returned.
+ pub fn file_name(&self) -> &OsStr {
+ self.dent.file_name()
+ }
+
+ /// Returns the depth at which this entry was created relative to the root.
+ pub fn depth(&self) -> usize {
+ self.dent.depth()
+ }
+
+ /// Returns the underlying inode number if one exists.
+ ///
+ /// If this entry doesn't have an inode number, then `None` is returned.
+ #[cfg(unix)]
+ pub fn ino(&self) -> Option<u64> {
+ self.dent.ino()
+ }
+
+ /// Returns an error, if one exists, associated with processing this entry.
+ ///
+ /// An example of an error is one that occurred while parsing an ignore
+ /// file. Errors related to traversing a directory tree itself are reported
+ /// as part of yielding the directory entry, and not with this method.
+ pub fn error(&self) -> Option<&Error> {
+ self.err.as_ref()
+ }
+
+ /// Returns true if and only if this entry points to a directory.
+ pub(crate) fn is_dir(&self) -> bool {
+ self.dent.is_dir()
+ }
+
+ fn new_stdin() -> DirEntry {
+ DirEntry { dent: DirEntryInner::Stdin, err: None }
+ }
+
+ fn new_walkdir(dent: walkdir::DirEntry, err: Option<Error>) -> DirEntry {
+ DirEntry { dent: DirEntryInner::Walkdir(dent), err: err }
+ }
+
+ fn new_raw(dent: DirEntryRaw, err: Option<Error>) -> DirEntry {
+ DirEntry { dent: DirEntryInner::Raw(dent), err: err }
+ }
+}
+
+/// DirEntryInner is the implementation of DirEntry.
+///
+/// It specifically represents three distinct sources of directory entries:
+///
+/// 1. From the walkdir crate.
+/// 2. Special entries that represent things like stdin.
+/// 3. From a path.
+///
+/// Specifically, (3) has to essentially re-create the DirEntry implementation
+/// from WalkDir.
+#[derive(Clone, Debug)]
+enum DirEntryInner {
+ Stdin,
+ Walkdir(walkdir::DirEntry),
+ Raw(DirEntryRaw),
+}
+
+impl DirEntryInner {
+ fn path(&self) -> &Path {
+ use self::DirEntryInner::*;
+ match *self {
+ Stdin => Path::new("<stdin>"),
+ Walkdir(ref x) => x.path(),
+ Raw(ref x) => x.path(),
+ }
+ }
+
+ fn into_path(self) -> PathBuf {
+ use self::DirEntryInner::*;
+ match self {
+ Stdin => PathBuf::from("<stdin>"),
+ Walkdir(x) => x.into_path(),
+ Raw(x) => x.into_path(),
+ }
+ }
+
+ fn path_is_symlink(&self) -> bool {
+ use self::DirEntryInner::*;
+ match *self {
+ Stdin => false,
+ Walkdir(ref x) => x.path_is_symlink(),
+ Raw(ref x) => x.path_is_symlink(),
+ }
+ }
+
+ fn is_stdin(&self) -> bool {
+ match *self {
+ DirEntryInner::Stdin => true,
+ _ => false,
+ }
+ }
+
+ fn metadata(&self) -> Result<Metadata, Error> {
+ use self::DirEntryInner::*;
+ match *self {
+ Stdin => {
+ let err = Error::Io(io::Error::new(
+ io::ErrorKind::Other,
+ "<stdin> has no metadata",
+ ));
+ Err(err.with_path("<stdin>"))
+ }
+ Walkdir(ref x) => x.metadata().map_err(|err| {
+ Error::Io(io::Error::from(err)).with_path(x.path())
+ }),
+ Raw(ref x) => x.metadata(),
+ }
+ }
+
+ fn file_type(&self) -> Option<FileType> {
+ use self::DirEntryInner::*;
+ match *self {
+ Stdin => None,
+ Walkdir(ref x) => Some(x.file_type()),
+ Raw(ref x) => Some(x.file_type()),
+ }
+ }
+
+ fn file_name(&self) -> &OsStr {
+ use self::DirEntryInner::*;
+ match *self {
+ Stdin => OsStr::new("<stdin>"),
+ Walkdir(ref x) => x.file_name(),
+ Raw(ref x) => x.file_name(),
+ }
+ }
+
+ fn depth(&self) -> usize {
+ use self::DirEntryInner::*;
+ match *self {
+ Stdin => 0,
+ Walkdir(ref x) => x.depth(),
+ Raw(ref x) => x.depth(),
+ }
+ }
+
+ #[cfg(unix)]
+ fn ino(&self) -> Option<u64> {
+ use self::DirEntryInner::*;
+ use walkdir::DirEntryExt;
+ match *self {
+ Stdin => None,
+ Walkdir(ref x) => Some(x.ino()),
+ Raw(ref x) => Some(x.ino()),
+ }
+ }
+
+ /// Returns true if and only if this entry points to a directory.
+ fn is_dir(&self) -> bool {
+ self.file_type().map(|ft| ft.is_dir()).unwrap_or(false)
+ }
+}
+
+/// DirEntryRaw is essentially copied from the walkdir crate so that we can
+/// build `DirEntry`s from whole cloth in the parallel iterator.
+#[derive(Clone)]
+struct DirEntryRaw {
+ /// The path as reported by the `fs::ReadDir` iterator (even if it's a
+ /// symbolic link).
+ path: PathBuf,
+ /// The file type. Necessary for recursive iteration, so store it.
+ ty: FileType,
+ /// Is set when this entry was created from a symbolic link and the user
+ /// expects the iterator to follow symbolic links.
+ follow_link: bool,
+ /// The depth at which this entry was generated relative to the root.
+ depth: usize,
+ /// The underlying inode number (Unix only).
+ #[cfg(unix)]
+ ino: u64,
+ /// The underlying metadata (Windows only). We store this on Windows
+ /// because this comes for free while reading a directory.
+ #[cfg(windows)]
+ metadata: fs::Metadata,
+}
+
+impl fmt::Debug for DirEntryRaw {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ // Leaving out FileType because it doesn't have a debug impl
+ // in Rust 1.9. We could add it if we really wanted to by manually
+ // querying each possibly file type. Meh. ---AG
+ f.debug_struct("DirEntryRaw")
+ .field("path", &self.path)
+ .field("follow_link", &self.follow_link)
+ .field("depth", &self.depth)
+ .finish()
+ }
+}
+
+impl DirEntryRaw {
+ fn path(&self) -> &Path {
+ &self.path
+ }
+
+ fn into_path(self) -> PathBuf {
+ self.path
+ }
+
+ fn path_is_symlink(&self) -> bool {
+ self.ty.is_symlink() || self.follow_link
+ }
+
+ fn metadata(&self) -> Result<Metadata, Error> {
+ self.metadata_internal()
+ }
+
+ #[cfg(windows)]
+ fn metadata_internal(&self) -> Result<fs::Metadata, Error> {
+ if self.follow_link {
+ fs::metadata(&self.path)
+ } else {
+ Ok(self.metadata.clone())
+ }
+ .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
+ }
+
+ #[cfg(not(windows))]
+ fn metadata_internal(&self) -> Result<fs::Metadata, Error> {
+ if self.follow_link {
+ fs::metadata(&self.path)
+ } else {
+ fs::symlink_metadata(&self.path)
+ }
+ .map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
+ }
+
+ fn file_type(&self) -> FileType {
+ self.ty
+ }
+
+ fn file_name(&self) -> &OsStr {
+ self.path.file_name().unwrap_or_else(|| self.path.as_os_str())
+ }
+
+ fn depth(&self) -> usize {
+ self.depth
+ }
+
+ #[cfg(unix)]
+ fn ino(&self) -> u64 {
+ self.ino
+ }
+
+ fn from_entry(
+ depth: usize,
+ ent: &fs::DirEntry,
+ ) -> Result<DirEntryRaw, Error> {
+ let ty = ent.file_type().map_err(|err| {
+ let err = Error::Io(io::Error::from(err)).with_path(ent.path());
+ Error::WithDepth { depth: depth, err: Box::new(err) }
+ })?;
+ DirEntryRaw::from_entry_os(depth, ent, ty)
+ }
+
+ #[cfg(windows)]
+ fn from_entry_os(
+ depth: usize,
+ ent: &fs::DirEntry,
+ ty: fs::FileType,
+ ) -> Result<DirEntryRaw, Error> {
+ let md = ent.metadata().map_err(|err| {
+ let err = Error::Io(io::Error::from(err)).with_path(ent.path());
+ Error::WithDepth { depth: depth, err: Box::new(err) }
+ })?;
+ Ok(DirEntryRaw {
+ path: ent.path(),
+ ty: ty,
+ follow_link: false,
+ depth: depth,
+ metadata: md,
+ })
+ }
+
+ #[cfg(unix)]
+ fn from_entry_os(
+ depth: usize,
+ ent: &fs::DirEntry,
+ ty: fs::FileType,
+ ) -> Result<DirEntryRaw, Error> {
+ use std::os::unix::fs::DirEntryExt;
+
+ Ok(DirEntryRaw {
+ path: ent.path(),
+ ty: ty,
+ follow_link: false,
+ depth: depth,
+ ino: ent.ino(),
+ })
+ }
+
+ // Placeholder implementation to allow compiling on non-standard platforms (e.g. wasm32).
+ #[cfg(not(any(windows, unix)))]
+ fn from_entry_os(
+ depth: usize,
+ ent: &fs::DirEntry,
+ ty: fs::FileType,
+ ) -> Result<DirEntryRaw, Error> {
+ Err(Error::Io(io::Error::new(
+ io::ErrorKind::Other,
+ "unsupported platform",
+ )))
+ }
+
+ #[cfg(windows)]
+ fn from_path(
+ depth: usize,
+ pb: PathBuf,
+ link: bool,
+ ) -> Result<DirEntryRaw, Error> {
+ let md =
+ fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
+ Ok(DirEntryRaw {
+ path: pb,
+ ty: md.file_type(),
+ follow_link: link,
+ depth: depth,
+ metadata: md,
+ })
+ }
+
+ #[cfg(unix)]
+ fn from_path(
+ depth: usize,
+ pb: PathBuf,
+ link: bool,
+ ) -> Result<DirEntryRaw, Error> {
+ use std::os::unix::fs::MetadataExt;
+
+ let md =
+ fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
+ Ok(DirEntryRaw {
+ path: pb,
+ ty: md.file_type(),
+ follow_link: link,
+ depth: depth,
+ ino: md.ino(),
+ })
+ }
+
+ // Placeholder implementation to allow compiling on non-standard platforms (e.g. wasm32).
+ #[cfg(not(any(windows, unix)))]
+ fn from_path(
+ depth: usize,
+ pb: PathBuf,
+ link: bool,
+ ) -> Result<DirEntryRaw, Error> {
+ Err(Error::Io(io::Error::new(
+ io::ErrorKind::Other,
+ "unsupported platform",
+ )))
+ }
+}
+
+/// WalkBuilder builds a recursive directory iterator.
+///
+/// The builder supports a large number of configurable options. This includes
+/// specific glob overrides, file type matching, toggling whether hidden
+/// files are ignored or not, and of course, support for respecting gitignore
+/// files.
+///
+/// By default, all ignore files found are respected. This includes `.ignore`,
+/// `.gitignore`, `.git/info/exclude` and even your global gitignore
+/// globs, usually found in `$XDG_CONFIG_HOME/git/ignore`.
+///
+/// Some standard recursive directory options are also supported, such as
+/// limiting the recursive depth or whether to follow symbolic links (disabled
+/// by default).
+///
+/// # Ignore rules
+///
+/// There are many rules that influence whether a particular file or directory
+/// is skipped by this iterator. Those rules are documented here. Note that
+/// the rules assume a default configuration.
+///
+/// * First, glob overrides are checked. If a path matches a glob override,
+/// then matching stops. The path is then only skipped if the glob that matched
+/// the path is an ignore glob. (An override glob is a whitelist glob unless it
+/// starts with a `!`, in which case it is an ignore glob.)
+/// * Second, ignore files are checked. Ignore files currently only come from
+/// git ignore files (`.gitignore`, `.git/info/exclude` and the configured
+/// global gitignore file), plain `.ignore` files, which have the same format
+/// as gitignore files, or explicitly added ignore files. The precedence order
+/// is: `.ignore`, `.gitignore`, `.git/info/exclude`, global gitignore and
+/// finally explicitly added ignore files. Note that precedence between
+/// different types of ignore files is not impacted by the directory hierarchy;
+/// any `.ignore` file overrides all `.gitignore` files. Within each precedence
+/// level, more nested ignore files have a higher precedence than less nested
+/// ignore files.
+/// * Third, if the previous step yields an ignore match, then all matching
+/// is stopped and the path is skipped. If it yields a whitelist match, then
+/// matching continues. A whitelist match can be overridden by a later matcher.
+/// * Fourth, unless the path is a directory, the file type matcher is run on
+/// the path. As above, if it yields an ignore match, then all matching is
+/// stopped and the path is skipped. If it yields a whitelist match, then
+/// matching continues.
+/// * Fifth, if the path hasn't been whitelisted and it is hidden, then the
+/// path is skipped.
+/// * Sixth, unless the path is a directory, the size of the file is compared
+/// against the max filesize limit. If it exceeds the limit, it is skipped.
+/// * Seventh, if the path has made it this far then it is yielded in the
+/// iterator.
+#[derive(Clone)]
+pub struct WalkBuilder {
+ paths: Vec<PathBuf>,
+ ig_builder: IgnoreBuilder,
+ max_depth: Option<usize>,
+ max_filesize: Option<u64>,
+ follow_links: bool,
+ same_file_system: bool,
+ sorter: Option<Sorter>,
+ threads: usize,
+ skip: Option<Arc<Handle>>,
+}
+
+#[derive(Clone)]
+enum Sorter {
+ ByName(
+ Arc<dyn Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static>,
+ ),
+ ByPath(Arc<dyn Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static>),
+}
+
+impl fmt::Debug for WalkBuilder {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.debug_struct("WalkBuilder")
+ .field("paths", &self.paths)
+ .field("ig_builder", &self.ig_builder)
+ .field("max_depth", &self.max_depth)
+ .field("max_filesize", &self.max_filesize)
+ .field("follow_links", &self.follow_links)
+ .field("threads", &self.threads)
+ .field("skip", &self.skip)
+ .finish()
+ }
+}
+
+impl WalkBuilder {
+ /// Create a new builder for a recursive directory iterator for the
+ /// directory given.
+ ///
+ /// Note that if you want to traverse multiple different directories, it
+ /// is better to call `add` on this builder than to create multiple
+ /// `Walk` values.
+ pub fn new<P: AsRef<Path>>(path: P) -> WalkBuilder {
+ WalkBuilder {
+ paths: vec![path.as_ref().to_path_buf()],
+ ig_builder: IgnoreBuilder::new(),
+ max_depth: None,
+ max_filesize: None,
+ follow_links: false,
+ same_file_system: false,
+ sorter: None,
+ threads: 0,
+ skip: None,
+ }
+ }
+
+ /// Build a new `Walk` iterator.
+ pub fn build(&self) -> Walk {
+ let follow_links = self.follow_links;
+ let max_depth = self.max_depth;
+ let sorter = self.sorter.clone();
+ let its = self
+ .paths
+ .iter()
+ .map(move |p| {
+ if p == Path::new("-") {
+ (p.to_path_buf(), None)
+ } else {
+ let mut wd = WalkDir::new(p);
+ wd = wd.follow_links(follow_links || p.is_file());
+ wd = wd.same_file_system(self.same_file_system);
+ if let Some(max_depth) = max_depth {
+ wd = wd.max_depth(max_depth);
+ }
+ if let Some(ref sorter) = sorter {
+ match sorter.clone() {
+ Sorter::ByName(cmp) => {
+ wd = wd.sort_by(move |a, b| {
+ cmp(a.file_name(), b.file_name())
+ });
+ }
+ Sorter::ByPath(cmp) => {
+ wd = wd.sort_by(move |a, b| {
+ cmp(a.path(), b.path())
+ });
+ }
+ }
+ }
+ (p.to_path_buf(), Some(WalkEventIter::from(wd)))
+ }
+ })
+ .collect::<Vec<_>>()
+ .into_iter();
+ let ig_root = self.ig_builder.build();
+ Walk {
+ its: its,
+ it: None,
+ ig_root: ig_root.clone(),
+ ig: ig_root.clone(),
+ max_filesize: self.max_filesize,
+ skip: self.skip.clone(),
+ }
+ }
+
+ /// Build a new `WalkParallel` iterator.
+ ///
+ /// Note that this *doesn't* return something that implements `Iterator`.
+ /// Instead, the returned value must be run with a closure. e.g.,
+ /// `builder.build_parallel().run(|| |path| println!("{:?}", path))`.
+ pub fn build_parallel(&self) -> WalkParallel {
+ WalkParallel {
+ paths: self.paths.clone().into_iter(),
+ ig_root: self.ig_builder.build(),
+ max_depth: self.max_depth,
+ max_filesize: self.max_filesize,
+ follow_links: self.follow_links,
+ same_file_system: self.same_file_system,
+ threads: self.threads,
+ skip: self.skip.clone(),
+ }
+ }
+
+ /// Add a file path to the iterator.
+ ///
+ /// Each additional file path added is traversed recursively. This should
+ /// be preferred over building multiple `Walk` iterators since this
+ /// enables reusing resources across iteration.
+ pub fn add<P: AsRef<Path>>(&mut self, path: P) -> &mut WalkBuilder {
+ self.paths.push(path.as_ref().to_path_buf());
+ self
+ }
+
+ /// The maximum depth to recurse.
+ ///
+ /// The default, `None`, imposes no depth restriction.
+ pub fn max_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder {
+ self.max_depth = depth;
+ self
+ }
+
+ /// Whether to follow symbolic links or not.
+ pub fn follow_links(&mut self, yes: bool) -> &mut WalkBuilder {
+ self.follow_links = yes;
+ self
+ }
+
+ /// Whether to ignore files above the specified limit.
+ pub fn max_filesize(&mut self, filesize: Option<u64>) -> &mut WalkBuilder {
+ self.max_filesize = filesize;
+ self
+ }
+
+ /// The number of threads to use for traversal.
+ ///
+ /// Note that this only has an effect when using `build_parallel`.
+ ///
+ /// The default setting is `0`, which chooses the number of threads
+ /// automatically using heuristics.
+ pub fn threads(&mut self, n: usize) -> &mut WalkBuilder {
+ self.threads = n;
+ self
+ }
+
+ /// Add a global ignore file to the matcher.
+ ///
+ /// This has lower precedence than all other sources of ignore rules.
+ ///
+ /// If there was a problem adding the ignore file, then an error is
+ /// returned. Note that the error may indicate *partial* failure. For
+ /// example, if an ignore file contains an invalid glob, all other globs
+ /// are still applied.
+ pub fn add_ignore<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> {
+ let mut builder = GitignoreBuilder::new("");
+ let mut errs = PartialErrorBuilder::default();
+ errs.maybe_push(builder.add(path));
+ match builder.build() {
+ Ok(gi) => {
+ self.ig_builder.add_ignore(gi);
+ }
+ Err(err) => {
+ errs.push(err);
+ }
+ }
+ errs.into_error_option()
+ }
+
+ /// Add a custom ignore file name
+ ///
+ /// These ignore files have higher precedence than all other ignore files.
+ ///
+ /// When specifying multiple names, earlier names have lower precedence than
+ /// later names.
+ pub fn add_custom_ignore_filename<S: AsRef<OsStr>>(
+ &mut self,
+ file_name: S,
+ ) -> &mut WalkBuilder {
+ self.ig_builder.add_custom_ignore_filename(file_name);
+ self
+ }
+
+ /// Add an override matcher.
+ ///
+ /// By default, no override matcher is used.
+ ///
+ /// This overrides any previous setting.
+ pub fn overrides(&mut self, overrides: Override) -> &mut WalkBuilder {
+ self.ig_builder.overrides(overrides);
+ self
+ }
+
+ /// Add a file type matcher.
+ ///
+ /// By default, no file type matcher is used.
+ ///
+ /// This overrides any previous setting.
+ pub fn types(&mut self, types: Types) -> &mut WalkBuilder {
+ self.ig_builder.types(types);
+ self
+ }
+
+ /// Enables all the standard ignore filters.
+ ///
+ /// This toggles, as a group, all the filters that are enabled by default:
+ ///
+ /// - [hidden()](#method.hidden)
+ /// - [parents()](#method.parents)
+ /// - [ignore()](#method.ignore)
+ /// - [git_ignore()](#method.git_ignore)
+ /// - [git_global()](#method.git_global)
+ /// - [git_exclude()](#method.git_exclude)
+ ///
+ /// They may still be toggled individually after calling this function.
+ ///
+ /// This is (by definition) enabled by default.
+ pub fn standard_filters(&mut self, yes: bool) -> &mut WalkBuilder {
+ self.hidden(yes)
+ .parents(yes)
+ .ignore(yes)
+ .git_ignore(yes)
+ .git_global(yes)
+ .git_exclude(yes)
+ }
+
+ /// Enables ignoring hidden files.
+ ///
+ /// This is enabled by default.
+ pub fn hidden(&mut self, yes: bool) -> &mut WalkBuilder {
+ self.ig_builder.hidden(yes);
+ self
+ }
+
+ /// Enables reading ignore files from parent directories.
+ ///
+ /// If this is enabled, then .gitignore files in parent directories of each
+ /// file path given are respected. Otherwise, they are ignored.
+ ///
+ /// This is enabled by default.
+ pub fn parents(&mut self, yes: bool) -> &mut WalkBuilder {
+ self.ig_builder.parents(yes);
+ self
+ }
+
+ /// Enables reading `.ignore` files.
+ ///
+ /// `.ignore` files have the same semantics as `gitignore` files and are
+ /// supported by search tools such as ripgrep and The Silver Searcher.
+ ///
+ /// This is enabled by default.
+ pub fn ignore(&mut self, yes: bool) -> &mut WalkBuilder {
+ self.ig_builder.ignore(yes);
+ self
+ }
+
+ /// Enables reading a global gitignore file, whose path is specified in
+ /// git's `core.excludesFile` config option.
+ ///
+ /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig`
+ /// does not exist or does not specify `core.excludesFile`, then
+ /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not
+ /// set or is empty, then `$HOME/.config/git/ignore` is used instead.
+ ///
+ /// This is enabled by default.
+ pub fn git_global(&mut self, yes: bool) -> &mut WalkBuilder {
+ self.ig_builder.git_global(yes);
+ self
+ }
+
+ /// Enables reading `.gitignore` files.
+ ///
+ /// `.gitignore` files have match semantics as described in the `gitignore`
+ /// man page.
+ ///
+ /// This is enabled by default.
+ pub fn git_ignore(&mut self, yes: bool) -> &mut WalkBuilder {
+ self.ig_builder.git_ignore(yes);
+ self
+ }
+
+ /// Enables reading `.git/info/exclude` files.
+ ///
+ /// `.git/info/exclude` files have match semantics as described in the
+ /// `gitignore` man page.
+ ///
+ /// This is enabled by default.
+ pub fn git_exclude(&mut self, yes: bool) -> &mut WalkBuilder {
+ self.ig_builder.git_exclude(yes);
+ self
+ }
+
+ /// Whether a git repository is required to apply git-related ignore
+ /// rules (global rules, .gitignore and local exclude rules).
+ ///
+ /// When disabled, git-related ignore rules are applied even when searching
+ /// outside a git repository.
+ pub fn require_git(&mut self, yes: bool) -> &mut WalkBuilder {
+ self.ig_builder.require_git(yes);
+ self
+ }
+
+ /// Process ignore files case insensitively
+ ///
+ /// This is disabled by default.
+ pub fn ignore_case_insensitive(&mut self, yes: bool) -> &mut WalkBuilder {
+ self.ig_builder.ignore_case_insensitive(yes);
+ self
+ }
+
+ /// Set a function for sorting directory entries by their path.
+ ///
+ /// If a compare function is set, the resulting iterator will return all
+ /// paths in sorted order. The compare function will be called to compare
+ /// entries from the same directory.
+ ///
+ /// This is like `sort_by_file_name`, except the comparator accepts
+ /// a `&Path` instead of the base file name, which permits it to sort by
+ /// more criteria.
+ ///
+ /// This method will override any previous sorter set by this method or
+ /// by `sort_by_file_name`.
+ ///
+ /// Note that this is not used in the parallel iterator.
+ pub fn sort_by_file_path<F>(&mut self, cmp: F) -> &mut WalkBuilder
+ where
+ F: Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static,
+ {
+ self.sorter = Some(Sorter::ByPath(Arc::new(cmp)));
+ self
+ }
+
+ /// Set a function for sorting directory entries by file name.
+ ///
+ /// If a compare function is set, the resulting iterator will return all
+ /// paths in sorted order. The compare function will be called to compare
+ /// names from entries from the same directory using only the name of the
+ /// entry.
+ ///
+ /// This method will override any previous sorter set by this method or
+ /// by `sort_by_file_path`.
+ ///
+ /// Note that this is not used in the parallel iterator.
+ pub fn sort_by_file_name<F>(&mut self, cmp: F) -> &mut WalkBuilder
+ where
+ F: Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static,
+ {
+ self.sorter = Some(Sorter::ByName(Arc::new(cmp)));
+ self
+ }
+
+ /// Do not cross file system boundaries.
+ ///
+ /// When this option is enabled, directory traversal will not descend into
+ /// directories that are on a different file system from the root path.
+ ///
+ /// Currently, this option is only supported on Unix and Windows. If this
+ /// option is used on an unsupported platform, then directory traversal
+ /// will immediately return an error and will not yield any entries.
+ pub fn same_file_system(&mut self, yes: bool) -> &mut WalkBuilder {
+ self.same_file_system = yes;
+ self
+ }
+
+ /// Do not yield directory entries that are believed to correspond to
+ /// stdout.
+ ///
+ /// This is useful when a command is invoked via shell redirection to a
+ /// file that is also being read. For example, `grep -r foo ./ > results`
+ /// might end up trying to search `results` even though it is also writing
+ /// to it, which could cause an unbounded feedback loop. Setting this
+ /// option prevents this from happening by skipping over the `results`
+ /// file.
+ ///
+ /// This is disabled by default.
+ pub fn skip_stdout(&mut self, yes: bool) -> &mut WalkBuilder {
+ if yes {
+ self.skip = stdout_handle().map(Arc::new);
+ } else {
+ self.skip = None;
+ }
+ self
+ }
+}
+
+/// Walk is a recursive directory iterator over file paths in one or more
+/// directories.
+///
+/// Only file and directory paths matching the rules are returned. By default,
+/// ignore files like `.gitignore` are respected. The precise matching rules
+/// and precedence is explained in the documentation for `WalkBuilder`.
+pub struct Walk {
+ its: vec::IntoIter<(PathBuf, Option<WalkEventIter>)>,
+ it: Option<WalkEventIter>,
+ ig_root: Ignore,
+ ig: Ignore,
+ max_filesize: Option<u64>,
+ skip: Option<Arc<Handle>>,
+}
+
+impl Walk {
+ /// Creates a new recursive directory iterator for the file path given.
+ ///
+ /// Note that this uses default settings, which include respecting
+ /// `.gitignore` files. To configure the iterator, use `WalkBuilder`
+ /// instead.
+ pub fn new<P: AsRef<Path>>(path: P) -> Walk {
+ WalkBuilder::new(path).build()
+ }
+
+ fn skip_entry(&self, ent: &DirEntry) -> Result<bool, Error> {
+ if ent.depth() == 0 {
+ return Ok(false);
+ }
+
+ if let Some(ref stdout) = self.skip {
+ if path_equals(ent, stdout)? {
+ return Ok(true);
+ }
+ }
+ if should_skip_entry(&self.ig, ent) {
+ return Ok(true);
+ }
+ if self.max_filesize.is_some() && !ent.is_dir() {
+ return Ok(skip_filesize(
+ self.max_filesize.unwrap(),
+ ent.path(),
+ &ent.metadata().ok(),
+ ));
+ }
+ Ok(false)
+ }
+}
+
+impl Iterator for Walk {
+ type Item = Result<DirEntry, Error>;
+
+ #[inline(always)]
+ fn next(&mut self) -> Option<Result<DirEntry, Error>> {
+ loop {
+ let ev = match self.it.as_mut().and_then(|it| it.next()) {
+ Some(ev) => ev,
+ None => {
+ match self.its.next() {
+ None => return None,
+ Some((_, None)) => {
+ return Some(Ok(DirEntry::new_stdin()));
+ }
+ Some((path, Some(it))) => {
+ self.it = Some(it);
+ if path.is_dir() {
+ let (ig, err) = self.ig_root.add_parents(path);
+ self.ig = ig;
+ if let Some(err) = err {
+ return Some(Err(err));
+ }
+ } else {
+ self.ig = self.ig_root.clone();
+ }
+ }
+ }
+ continue;
+ }
+ };
+ match ev {
+ Err(err) => {
+ return Some(Err(Error::from_walkdir(err)));
+ }
+ Ok(WalkEvent::Exit) => {
+ self.ig = self.ig.parent().unwrap();
+ }
+ Ok(WalkEvent::Dir(ent)) => {
+ let mut ent = DirEntry::new_walkdir(ent, None);
+ let should_skip = match self.skip_entry(&ent) {
+ Err(err) => return Some(Err(err)),
+ Ok(should_skip) => should_skip,
+ };
+ if should_skip {
+ self.it.as_mut().unwrap().it.skip_current_dir();
+ // Still need to push this on the stack because
+ // we'll get a WalkEvent::Exit event for this dir.
+ // We don't care if it errors though.
+ let (igtmp, _) = self.ig.add_child(ent.path());
+ self.ig = igtmp;
+ continue;
+ }
+ let (igtmp, err) = self.ig.add_child(ent.path());
+ self.ig = igtmp;
+ ent.err = err;
+ return Some(Ok(ent));
+ }
+ Ok(WalkEvent::File(ent)) => {
+ let ent = DirEntry::new_walkdir(ent, None);
+ let should_skip = match self.skip_entry(&ent) {
+ Err(err) => return Some(Err(err)),
+ Ok(should_skip) => should_skip,
+ };
+ if should_skip {
+ continue;
+ }
+ return Some(Ok(ent));
+ }
+ }
+ }
+ }
+}
+
+/// WalkEventIter transforms a WalkDir iterator into an iterator that more
+/// accurately describes the directory tree. Namely, it emits events that are
+/// one of three types: directory, file or "exit." An "exit" event means that
+/// the entire contents of a directory have been enumerated.
+struct WalkEventIter {
+ depth: usize,
+ it: walkdir::IntoIter,
+ next: Option<Result<walkdir::DirEntry, walkdir::Error>>,
+}
+
+#[derive(Debug)]
+enum WalkEvent {
+ Dir(walkdir::DirEntry),
+ File(walkdir::DirEntry),
+ Exit,
+}
+
+impl From<WalkDir> for WalkEventIter {
+ fn from(it: WalkDir) -> WalkEventIter {
+ WalkEventIter { depth: 0, it: it.into_iter(), next: None }