summaryrefslogtreecommitdiffstats
path: root/ignore
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2016-11-05 21:44:15 -0400
committerAndrew Gallant <jamslam@gmail.com>2016-11-05 21:45:55 -0400
commitb272be25fa7b616689384d2e4bec3e01715e2477 (patch)
tree8743d1ec86294f81da800c024f094a4846158527 /ignore
parent1aeae3e22da4e9ffa78ece485369e7a24744b2d4 (diff)
Add parallel recursive directory iterator.
This adds a new walk type in the `ignore` crate, `WalkParallel`, which provides a way for recursively iterating over a set of paths in parallel while respecting various ignore rules. The API is a bit strange, as a closure producing a closure isn't something one often sees, but it does seem to work well. This also allowed us to simplify much of the worker logic in ripgrep proper, where MultiWorker is now gone.
Diffstat (limited to 'ignore')
-rw-r--r--ignore/Cargo.toml1
-rw-r--r--ignore/examples/walk.rs98
-rw-r--r--ignore/src/dir.rs12
-rw-r--r--ignore/src/lib.rs62
-rw-r--r--ignore/src/walk.rs1017
5 files changed, 1051 insertions, 139 deletions
diff --git a/ignore/Cargo.toml b/ignore/Cargo.toml
index 1d336441..635299f9 100644
--- a/ignore/Cargo.toml
+++ b/ignore/Cargo.toml
@@ -18,6 +18,7 @@ name = "ignore"
bench = false
[dependencies]
+crossbeam = "0.2"
globset = { version = "0.1.1", path = "../globset" }
lazy_static = "0.2"
log = "0.3"
diff --git a/ignore/examples/walk.rs b/ignore/examples/walk.rs
index 0ce0a086..0ff4ea94 100644
--- a/ignore/examples/walk.rs
+++ b/ignore/examples/walk.rs
@@ -1,28 +1,92 @@
-/*
+#![allow(dead_code, unused_imports, unused_mut, unused_variables)]
+
+extern crate crossbeam;
extern crate ignore;
extern crate walkdir;
use std::env;
use std::io::{self, Write};
-use std::os::unix::ffi::OsStrExt;
+use std::path::Path;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::thread;
-use ignore::ignore::IgnoreBuilder;
+use crossbeam::sync::MsQueue;
+use ignore::WalkBuilder;
use walkdir::WalkDir;
fn main() {
- let path = env::args().nth(1).unwrap();
- let ig = IgnoreBuilder::new().build();
- let wd = WalkDir::new(path);
- let walker = ignore::walk::Iter::new(ig, wd);
-
- let mut stdout = io::BufWriter::new(io::stdout());
- // let mut count = 0;
- for dirent in walker {
- // count += 1;
- stdout.write(dirent.path().as_os_str().as_bytes()).unwrap();
- stdout.write(b"\n").unwrap();
+ let mut path = env::args().nth(1).unwrap();
+ let mut parallel = false;
+ let mut simple = false;
+ let queue: Arc<MsQueue<Option<DirEntry>>> = Arc::new(MsQueue::new());
+ if path == "parallel" {
+ path = env::args().nth(2).unwrap();
+ parallel = true;
+ } else if path == "walkdir" {
+ path = env::args().nth(2).unwrap();
+ simple = true;
+ }
+
+ let stdout_queue = queue.clone();
+ let stdout_thread = thread::spawn(move || {
+ let mut stdout = io::BufWriter::new(io::stdout());
+ while let Some(dent) = stdout_queue.pop() {
+ write_path(&mut stdout, dent.path());
+ }
+ });
+
+ if parallel {
+ let walker = WalkBuilder::new(path).threads(6).build_parallel();
+ walker.run(|| {
+ let queue = queue.clone();
+ Box::new(move |result| {
+ use ignore::WalkState::*;
+
+ queue.push(Some(DirEntry::Y(result.unwrap())));
+ Continue
+ })
+ });
+ } else if simple {
+ let mut stdout = io::BufWriter::new(io::stdout());
+ let walker = WalkDir::new(path);
+ for result in walker {
+ queue.push(Some(DirEntry::X(result.unwrap())));
+ }
+ } else {
+ let mut stdout = io::BufWriter::new(io::stdout());
+ let walker = WalkBuilder::new(path).build();
+ for result in walker {
+ queue.push(Some(DirEntry::Y(result.unwrap())));
+ }
+ }
+ queue.push(None);
+ stdout_thread.join().unwrap();
+}
+
+enum DirEntry {
+ X(walkdir::DirEntry),
+ Y(ignore::DirEntry),
+}
+
+impl DirEntry {
+ fn path(&self) -> &Path {
+ match *self {
+ DirEntry::X(ref x) => x.path(),
+ DirEntry::Y(ref y) => y.path(),
+ }
}
- // println!("{}", count);
}
-*/
-fn main() {}
+
+#[cfg(unix)]
+fn write_path<W: Write>(mut wtr: W, path: &Path) {
+ use std::os::unix::ffi::OsStrExt;
+ wtr.write(path.as_os_str().as_bytes()).unwrap();
+ wtr.write(b"\n").unwrap();
+}
+
+#[cfg(not(unix))]
+fn write_path<W: Write>(mut wtr: W, path: &Path) {
+ wtr.write(path.to_string_lossy().as_bytes()).unwrap();
+ wtr.write(b"\n").unwrap();
+}
diff --git a/ignore/src/dir.rs b/ignore/src/dir.rs
index 6ac00627..496664f3 100644
--- a/ignore/src/dir.rs
+++ b/ignore/src/dir.rs
@@ -137,6 +137,11 @@ impl Ignore {
self.0.parent.is_none()
}
+ /// Returns true if this matcher was added via the `add_parents` method.
+ pub fn is_absolute_parent(&self) -> bool {
+ self.0.is_absolute_parent
+ }
+
/// Return this matcher's parent, if one exists.
pub fn parent(&self) -> Option<Ignore> {
self.0.parent.clone()
@@ -376,7 +381,7 @@ impl Ignore {
}
/// Returns an iterator over parent ignore matchers, including this one.
- fn parents(&self) -> Parents {
+ pub fn parents(&self) -> Parents {
Parents(Some(self))
}
@@ -387,7 +392,10 @@ impl Ignore {
}
}
-struct Parents<'a>(Option<&'a Ignore>);
+/// An iterator over all parents of an ignore matcher, including itself.
+///
+/// The lifetime `'a` refers to the lifetime of the initial `Ignore` matcher.
+pub struct Parents<'a>(Option<&'a Ignore>);
impl<'a> Iterator for Parents<'a> {
type Item = &'a Ignore;
diff --git a/ignore/src/lib.rs b/ignore/src/lib.rs
index a3aa0c8f..d489712d 100644
--- a/ignore/src/lib.rs
+++ b/ignore/src/lib.rs
@@ -44,6 +44,7 @@ for result in WalkBuilder::new("./").hidden(false).build() {
See the documentation for `WalkBuilder` for many other options.
*/
+extern crate crossbeam;
extern crate globset;
#[macro_use]
extern crate lazy_static;
@@ -61,7 +62,7 @@ use std::fmt;
use std::io;
use std::path::{Path, PathBuf};
-pub use walk::{DirEntry, Walk, WalkBuilder};
+pub use walk::{DirEntry, Walk, WalkBuilder, WalkParallel, WalkState};
mod dir;
pub mod gitignore;
@@ -80,6 +81,12 @@ pub enum Error {
WithLineNumber { line: u64, err: Box<Error> },
/// An error associated with a particular file path.
WithPath { path: PathBuf, err: Box<Error> },
+ /// An error associated with a particular directory depth when recursively
+ /// walking a directory.
+ WithDepth { depth: usize, err: Box<Error> },
+ /// An error that occurs when a file loop is detected when traversing
+ /// symbolic links.
+ Loop { ancestor: PathBuf, child: PathBuf },
/// An error that occurs when doing I/O, such as reading an ignore file.
Io(io::Error),
/// An error that occurs when trying to parse a glob.
@@ -101,6 +108,7 @@ impl Error {
Error::Partial(_) => true,
Error::WithLineNumber { ref err, .. } => err.is_partial(),
Error::WithPath { ref err, .. } => err.is_partial(),
+ Error::WithDepth { ref err, .. } => err.is_partial(),
_ => false,
}
}
@@ -111,6 +119,8 @@ impl Error {
Error::Partial(ref errs) => errs.len() == 1 && errs[0].is_io(),
Error::WithLineNumber { ref err, .. } => err.is_io(),
Error::WithPath { ref err, .. } => err.is_io(),
+ Error::WithDepth { ref err, .. } => err.is_io(),
+ Error::Loop { .. } => false,
Error::Io(_) => true,
Error::Glob(_) => false,
Error::UnrecognizedFileType(_) => false,
@@ -118,6 +128,16 @@ impl Error {
}
}
+ /// Returns a depth associated with recursively walking a directory (if
+ /// this error was generated from a recursive directory iterator).
+ pub fn depth(&self) -> Option<usize> {
+ match *self {
+ Error::WithPath { ref err, .. } => err.depth(),
+ Error::WithDepth { depth, .. } => Some(depth),
+ _ => None,
+ }
+ }
+
/// Turn an error into a tagged error with the given file path.
fn with_path<P: AsRef<Path>>(self, path: P) -> Error {
Error::WithPath {
@@ -126,6 +146,14 @@ impl Error {
}
}
+ /// Turn an error into a tagged error with the given depth.
+ fn with_depth(self, depth: usize) -> Error {
+ Error::WithDepth {
+ depth: depth,
+ err: Box::new(self),
+ }
+ }
+
/// Turn an error into a tagged error with the given file path and line
/// number. If path is empty, then it is omitted from the error.
fn tagged<P: AsRef<Path>>(self, path: P, lineno: u64) -> Error {
@@ -146,6 +174,8 @@ impl error::Error for Error {
Error::Partial(_) => "partial error",
Error::WithLineNumber { ref err, .. } => err.description(),
Error::WithPath { ref err, .. } => err.description(),
+ Error::WithDepth { ref err, .. } => err.description(),
+ Error::Loop { .. } => "file system loop found",
Error::Io(ref err) => err.description(),
Error::Glob(ref msg) => msg,
Error::UnrecognizedFileType(_) => "unrecognized file type",
@@ -168,6 +198,12 @@ impl fmt::Display for Error {
Error::WithPath { ref path, ref err } => {
write!(f, "{}: {}", path.display(), err)
}
+ Error::WithDepth { ref err, .. } => err.fmt(f),
+ Error::Loop { ref ancestor, ref child } => {
+ write!(f, "File system loop found: \
+ {} points to an ancestor {}",
+ child.display(), ancestor.display())
+ }
Error::Io(ref err) => err.fmt(f),
Error::Glob(ref msg) => write!(f, "{}", msg),
Error::UnrecognizedFileType(ref ty) => {
@@ -187,6 +223,30 @@ impl From<io::Error> for Error {
}
}
+impl From<walkdir::Error> for Error {
+ fn from(err: walkdir::Error) -> Error {
+ let depth = err.depth();
+ if let (Some(anc), Some(child)) = (err.loop_ancestor(), err.path()) {
+ return Error::WithDepth {
+ depth: depth,
+ err: Box::new(Error::Loop {
+ ancestor: anc.to_path_buf(),
+ child: child.to_path_buf(),
+ }),
+ };
+ }
+ let path = err.path().map(|p| p.to_path_buf());
+ let mut ig_err = Error::Io(io::Error::from(err));
+ if let Some(path) = path {
+ ig_err = Error::WithPath {
+ path: path,
+ err: Box::new(ig_err),
+ };
+ }
+ ig_err
+ }
+}
+
#[derive(Debug, Default)]
struct PartialErrorBuilder(Vec<Error>);
diff --git a/ignore/src/walk.rs b/ignore/src/walk.rs
index 0bcc6136..a1ac2de5 100644
--- a/ignore/src/walk.rs
+++ b/ignore/src/walk.rs
@@ -1,10 +1,15 @@
use std::ffi::OsStr;
-use std::fs::{FileType, Metadata};
+use std::fmt;
+use std::fs::{self, FileType, Metadata};
use std::io;
use std::path::{Path, PathBuf};
+use std::sync::Arc;
+use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
+use std::thread;
use std::vec;
-use walkdir::{self, WalkDir, WalkDirIterator};
+use crossbeam::sync::MsQueue;
+use walkdir::{self, WalkDir, WalkDirIterator, is_same_file};
use dir::{Ignore, IgnoreBuilder};
use gitignore::GitignoreBuilder;
@@ -12,6 +17,278 @@ use overrides::Override;
use types::Types;
use {Error, PartialErrorBuilder};
+/// A directory entry with a possible error attached.
+///
+/// The error typically refers to a problem parsing ignore files in a
+/// particular directory.
+#[derive(Debug)]
+pub struct DirEntry {
+ dent: DirEntryInner,
+ err: Option<Error>,
+}
+
+impl DirEntry {
+ /// The full path that this entry represents.
+ pub fn path(&self) -> &Path {
+ self.dent.path()
+ }
+
+ /// Whether this entry corresponds to a symbolic link or not.
+ pub fn path_is_symbolic_link(&self) -> bool {
+ self.dent.path_is_symbolic_link()
+ }
+
+ /// Returns true if and only if this entry corresponds to stdin.
+ ///
+ /// i.e., The entry has depth 0 and its file name is `-`.
+ pub fn is_stdin(&self) -> bool {
+ self.dent.is_stdin()
+ }
+
+ /// Return the metadata for the file that this entry points to.
+ pub fn metadata(&self) -> Result<Metadata, Error> {
+ self.dent.metadata()
+ }
+
+ /// Return the file type for the file that this entry points to.
+ ///
+ /// This entry doesn't have a file type if it corresponds to stdin.
+ pub fn file_type(&self) -> Option<FileType> {
+ self.dent.file_type()
+ }
+
+ /// Return the file name of this entry.
+ ///
+ /// If this entry has no file name (e.g., `/`), then the full path is
+ /// returned.
+ pub fn file_name(&self) -> &OsStr {
+ self.dent.file_name()
+ }
+
+ /// Returns the depth at which this entry was created relative to the root.
+ pub fn depth(&self) -> usize {
+ self.dent.depth()
+ }
+
+ /// Returns an error, if one exists, associated with processing this entry.
+ ///
+ /// An example of an error is one that occurred while parsing an ignore
+ /// file.
+ pub fn error(&self) -> Option<&Error> {
+ self.err.as_ref()
+ }
+
+ fn new_stdin() -> DirEntry {
+ DirEntry {
+ dent: DirEntryInner::Stdin,
+ err: None,
+ }
+ }
+
+ fn new_walkdir(dent: walkdir::DirEntry, err: Option<Error>) -> DirEntry {
+ DirEntry {
+ dent: DirEntryInner::Walkdir(dent),
+ err: err,
+ }
+ }
+
+ fn new_raw(dent: DirEntryRaw, err: Option<Error>) -> DirEntry {
+ DirEntry {
+ dent: DirEntryInner::Raw(dent),
+ err: err,
+ }
+ }
+}
+
+/// DirEntryInner is the implementation of DirEntry.
+///
+/// It specifically represents three distinct sources of directory entries:
+///
+/// 1. From the walkdir crate.
+/// 2. Special entries that represent things like stdin.
+/// 3. From a path.
+///
+/// Specifically, (3) has to essentially re-create the DirEntry implementation
+/// from WalkDir.
+#[derive(Debug)]
+enum DirEntryInner {
+ Stdin,
+ Walkdir(walkdir::DirEntry),
+ Raw(DirEntryRaw),
+}
+
+impl DirEntryInner {
+ fn path(&self) -> &Path {
+ use self::DirEntryInner::*;
+ match *self {
+ Stdin => Path::new("<stdin>"),
+ Walkdir(ref x) => x.path(),
+ Raw(ref x) => x.path(),
+ }
+ }
+
+ fn path_is_symbolic_link(&self) -> bool {
+ use self::DirEntryInner::*;
+ match *self {
+ Stdin => false,
+ Walkdir(ref x) => x.path_is_symbolic_link(),
+ Raw(ref x) => x.path_is_symbolic_link(),
+ }
+ }
+
+ fn is_stdin(&self) -> bool {
+ match *self {
+ DirEntryInner::Stdin => true,
+ _ => false,
+ }
+ }
+
+ fn metadata(&self) -> Result<Metadata, Error> {
+ use self::DirEntryInner::*;
+ match *self {
+ Stdin => {
+ let err = Error::Io(io::Error::new(
+ io::ErrorKind::Other, "<stdin> has no metadata"));
+ Err(err.with_path("<stdin>"))
+ }
+ Walkdir(ref x) => {
+ x.metadata().map_err(|err| {
+ Error::Io(io::Error::from(err)).with_path(x.path())
+ })
+ }
+ Raw(ref x) => x.metadata(),
+ }
+ }
+
+ fn file_type(&self) -> Option<FileType> {
+ use self::DirEntryInner::*;
+ match *self {
+ Stdin => None,
+ Walkdir(ref x) => Some(x.file_type()),
+ Raw(ref x) => Some(x.file_type()),
+ }
+ }
+
+ fn file_name(&self) -> &OsStr {
+ use self::DirEntryInner::*;
+ match *self {
+ Stdin => OsStr::new("<stdin>"),
+ Walkdir(ref x) => x.file_name(),
+ Raw(ref x) => x.file_name(),
+ }
+ }
+
+ fn depth(&self) -> usize {
+ use self::DirEntryInner::*;
+ match *self {
+ Stdin => 0,
+ Walkdir(ref x) => x.depth(),
+ Raw(ref x) => x.depth(),
+ }
+ }
+}
+
+/// DirEntryRaw is essentially copied from the walkdir crate so that we can
+/// build `DirEntry`s from whole cloth in the parallel iterator.
+struct DirEntryRaw {
+ /// The path as reported by the `fs::ReadDir` iterator (even if it's a
+ /// symbolic link).
+ path: PathBuf,
+ /// The file type. Necessary for recursive iteration, so store it.
+ ty: FileType,
+ /// Is set when this entry was created from a symbolic link and the user
+ /// expects the iterator to follow symbolic links.
+ follow_link: bool,
+ /// The depth at which this entry was generated relative to the root.
+ depth: usize,
+}
+
+impl fmt::Debug for DirEntryRaw {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ // Leaving out FileType because it doesn't have a debug impl
+ // in Rust 1.9. We could add it if we really wanted to by manually
+ // querying each possibly file type. Meh. ---AG
+ f.debug_struct("DirEntryRaw")
+ .field("path", &self.path)
+ .field("follow_link", &self.follow_link)
+ .field("depth", &self.depth)
+ .finish()
+ }
+}
+
+impl DirEntryRaw {
+ fn path(&self) -> &Path {
+ &self.path
+ }
+
+ fn path_is_symbolic_link(&self) -> bool {
+ self.ty.is_symlink() || self.follow_link
+ }
+
+ fn metadata(&self) -> Result<Metadata, Error> {
+ if self.follow_link {
+ fs::metadata(&self.path)
+ } else {
+ fs::symlink_metadata(&self.path)
+ }.map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
+ }
+
+ fn file_type(&self) -> FileType {
+ self.ty
+ }
+
+ fn file_name(&self) -> &OsStr {
+ self.path.file_name().unwrap_or_else(|| self.path.as_os_str())
+ }
+
+ fn depth(&self) -> usize {
+ self.depth
+ }
+
+ fn from_entry(
+ depth: usize,
+ ent: &fs::DirEntry,
+ ) -> Result<DirEntryRaw, Error> {
+ let ty = try!(ent.file_type().map_err(|err| {
+ let err = Error::Io(io::Error::from(err)).with_path(ent.path());
+ Error::WithDepth {
+ depth: depth,
+ err: Box::new(err),
+ }
+ }));
+ Ok(DirEntryRaw {
+ path: ent.path(),
+ ty: ty,
+ follow_link: false,
+ depth: depth,
+ })
+ }
+
+ fn from_link(depth: usize, pb: PathBuf) -> Result<DirEntryRaw, Error> {
+ let md = try!(fs::metadata(&pb).map_err(|err| {
+ Error::Io(err).with_path(&pb)
+ }));
+ Ok(DirEntryRaw {
+ path: pb,
+ ty: md.file_type(),
+ follow_link: true,
+ depth: depth,
+ })
+ }
+
+ fn from_path(depth: usize, pb: PathBuf) -> Result<DirEntryRaw, Error> {
+ let md = try!(fs::symlink_metadata(&pb).map_err(|err| {
+ Error::Io(err).with_path(&pb)
+ }));
+ Ok(DirEntryRaw {
+ path: pb,
+ ty: md.file_type(),
+ follow_link: false,
+ depth: depth,
+ })
+ }
+}
+
/// WalkBuilder builds a recursive directory iterator.
///
/// The builder supports a large number of configurable options. This includes
@@ -58,12 +335,14 @@ use {Error, PartialErrorBuilder};
/// path is skipped.
/// * Sixth, if the path has made it this far then it is yielded in the
/// iterator.
+#[derive(Clone, Debug)]
pub struct WalkBuilder {
paths: Vec<PathBuf>,
ig_builder: IgnoreBuilder,
parents: bool,
max_depth: Option<usize>,
follow_links: bool,
+ threads: usize,
}
impl WalkBuilder {
@@ -80,6 +359,7 @@ impl WalkBuilder {
parents: true,
max_depth: None,
follow_links: false,
+ threads: 0,
}
}
@@ -109,6 +389,22 @@ impl WalkBuilder {
}
}
+ /// Build a new `WalkParallel` iterator.
+ ///
+ /// Note that this *doesn't* return something that implements `Iterator`.
+ /// Instead, the returned value must be run with a closure. e.g.,
+ /// `builder.build_parallel().run(|| |path| println!("{:?}", path))`.
+ pub fn build_parallel(&self) -> WalkParallel {
+ WalkParallel {
+ paths: self.paths.clone().into_iter(),
+ ig_root: self.ig_builder.build(),
+ max_depth: self.max_depth,
+ follow_links: self.follow_links,
+ parents: self.parents,
+ threads: self.threads,
+ }
+ }
+
/// Add a file path to the iterator.
///
/// Each additional file path added is traversed recursively. This should
@@ -133,6 +429,17 @@ impl WalkBuilder {
self
}
+ /// The number of threads to use for traversal.
+ ///
+ /// Note that this only has an effect when using `build_parallel`.
+ ///
+ /// The default setting is `0`, which chooses the number of threads
+ /// automatically using heuristics.
+ pub fn threads(&mut self, n: usize) -> &mut WalkBuilder {
+ self.threads = n;
+ self
+ }
+
/// Add an ignore file to the matcher.
///
/// This has lower precedence than all other sources of ignore rules.
@@ -239,7 +546,8 @@ impl WalkBuilder {
}
}
-/// Walk is a recursive directory iterator over file paths in a directory.
+/// Walk is a recursive directory iterator over file paths in one or more
+/// directories.
///
/// Only file and directory paths matching the rules are returned. By default,
/// ignore files like `.gitignore` are respected. The precise matching rules
@@ -264,17 +572,9 @@ impl Walk {
fn skip_entry(&self, ent: &walkdir::DirEntry) -> bool {
if ent.depth() == 0 {
- // Never skip the root directory.
return false;
}
- let m = self.ig.matched(ent.path(), ent.file_type().is_dir());
- if m.is_ignore() {
- debug!("ignoring {}: {:?}", ent.path().display(), m);
- return true;
- } else if m.is_whitelist() {
- debug!("whitelisting {}: {:?}", ent.path().display(), m);
- }
- false
+ skip_path(&self.ig, ent.path(), ent.file_type().is_dir())
}
}
@@ -290,10 +590,7 @@ impl Iterator for Walk {
match self.its.next() {
None => return None,
Some((_, None)) => {
- return Some(Ok(DirEntry {
- dent: None,
- err: None,
- }));
+ return Some(Ok(DirEntry::new_stdin()));
}
Some((path, Some(it))) => {
self.it = Some(it);
@@ -313,15 +610,7 @@ impl Iterator for Walk {
};
match ev {
Err(err) => {
- let path = err.path().map(|p| p.to_path_buf());
- let mut ig_err = Error::Io(io::Error::from(err));
- if let Some(path) = path {
- ig_err = Error::WithPath {
- path: path.to_path_buf(),
- err: Box::new(ig_err),
- };
- }
- return Some(Err(ig_err));
+ return Some(Err(Error::from(err)));
}
Ok(WalkEvent::Exit) => {
self.ig = self.ig.parent().unwrap();
@@ -338,98 +627,19 @@ impl Iterator for Walk {
}
let (igtmp, err) = self.ig.add_child(ent.path());
self.ig = igtmp;
- return Some(Ok(DirEntry { dent: Some(ent), err: err }));
+ return Some(Ok(DirEntry::new_walkdir(ent, err)));
}
Ok(WalkEvent::File(ent)) => {
if self.skip_entry(&ent) {
continue;
}
- // If this isn't actually a file (e.g., a symlink),
- // then skip it.
- if !ent.file_type().is_file() {
- continue;
- }
- return Some(Ok(DirEntry { dent: Some(ent), err: None }));
+ return Some(Ok(DirEntry::new_walkdir(ent, None)));
}
}
}
}
}
-/// A directory entry with a possible error attached.
-///
-/// The error typically refers to a problem parsing ignore files in a
-/// particular directory.
-#[derive(Debug)]
-pub struct DirEntry {
- dent: Option<walkdir::DirEntry>,
- err: Option<Error>,
-}
-
-impl DirEntry {
- /// The full path that this entry represents.
- pub fn path(&self) -> &Path {
- self.dent.as_ref().map_or(Path::new("<stdin>"), |x| x.path())
- }
-
- /// Whether this entry corresponds to a symbolic link or not.
- pub fn path_is_symbolic_link(&self) -> bool {
- self.dent.as_ref().map_or(false, |x| x.path_is_symbolic_link())
- }
-
- /// Returns true if and only if this entry corresponds to stdin.
- ///
- /// i.e., The entry has depth 0 and its file name is `-`.
- pub fn is_stdin(&self) -> bool {
- self.dent.is_none()
- }
-
- /// Return the metadata for the file that this entry points to.
- pub fn metadata(&self) -> Result<Metadata, Error> {
- if let Some(dent) = self.dent.as_ref() {
- dent.metadata().map_err(|err| Error::WithPath {
- path: self.path().to_path_buf(),
- err: Box::new(Error::Io(io::Error::from(err))),
- })
- } else {
- let ioerr = io::Error::new(
- io::ErrorKind::Other, "stdin has no metadata");
- Err(Error::WithPath {
- path: Path::new("<stdin>").to_path_buf(),
- err: Box::new(Error::Io(ioerr)),
- })
- }
- }
-
- /// Return the file type for the file that this entry points to.
- ///
- /// This entry doesn't have a file type if it corresponds to stdin.
- pub fn file_type(&self) -> Option<FileType> {
- self.dent.as_ref().map(|x| x.file_type())
- }
-
- /// Return the file name of this entry.
- ///
- /// If this entry has no file name (e.g., `/`), then the full path is
- /// returned.
- pub fn file_name(&self) -> &OsStr {
- self.dent.as_ref().map_or(OsStr::new("<stdin>"), |x| x.file_name())
- }
-
- /// Returns the depth at which this entry was created relative to the root.
- pub fn depth(&self) -> usize {
- self.dent.as_ref().map_or(0, |x| x.depth())
- }
-
- /// Returns an error, if one exists, associated with processing this entry.
- ///
- /// An example of an error is one that occurred while parsing an ignore
- /// file.
- pub fn error(&self) -> Option<&Error> {
- self.err.as_ref()
- }
-}
-
/// WalkEventIter transforms a WalkDir iterator into an iterator that more
/// accurately describes the directory tree. Namely, it emits events that are
/// one of three types: directory, file or "exit." An "exit" event means that
@@ -485,21 +695,497 @@ impl Iterator for WalkEventIter {
}
}
+/// WalkState is used in the parallel recursive directory iterator to indicate
+/// whether walking should continue as normal, skip descending into a
+/// particular directory or quit the walk entirely.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum WalkState {
+ /// Continue walking as normal.
+ Continue,
+ /// If the directory entry given is a directory, don't descend into it.
+ /// In all other cases, this has no effect.
+ Skip,
+ /// Quit the entire iterator as soon as possible.
+ ///
+ /// Note that this is an inherently asynchronous action. It is possible
+ /// for more entries to be yielded even after instructing the iterator
+ /// to quit.
+ Quit,
+}
+
+impl WalkState {
+ fn is_quit(&self) -> bool {
+ *self == WalkState::Quit
+ }
+}
+
+/// WalkParallel is a parallel recursive directory iterator over files paths
+/// in one or more directories.
+///
+/// Only file and directory paths matching the rules are returned. By default,
+/// ignore files like `.gitignore` are respected. The precise matching rules
+/// and precedence is explained in the documentation for `WalkBuilder`.
+///
+/// Unlike `Walk`, this uses multiple threads for traversing a directory.
+pub struct WalkParallel {
+ paths: vec::IntoIter<PathBuf>,
+ ig_root: Ignore,
+ parents: bool,
+ max_depth: Option<usize>,
+ follow_links: bool,
+ threads: usize,
+}
+
+impl WalkParallel {
+ /// Execute the parallel recursive directory iterator. `mkf` is called
+ /// for each thread used for iteration. The function produced by `mkf`
+ /// is then in turn called for each visited file path.
+ pub fn run<F>(
+ self,
+ mut mkf: F,
+ ) where F: FnMut() -> Box<FnMut(Result<DirEntry, Error>) -> WalkState + Send + 'static> {
+ let mut f = mkf();
+ let threads = self.threads();
+ let queue = Arc::new(MsQueue::new());
+ let mut any_dirs = false;
+ // Send the initial set of root paths to the pool of workers.
+ // Note that we only send directories. For files, we send to them the
+ // callback directly.
+ for path in self.paths {
+ if path == Path::new("-") {
+ if f(Ok(DirEntry::new_stdin())).is_quit() {
+ return;
+ }
+ continue;
+ }
+ let dent = match DirEntryRaw::from_path(0, path) {
+ Ok(dent) => DirEntry::new_raw(dent, None),
+ Err(err) => {
+ if f(Err(err)).is_quit() {
+ return;
+ }
+ continue;
+ }
+ };
+ if !dent.file_type().map_or(false, |t| t.is_dir()) {
+ if f(Ok(dent)).is_quit() {
+ return;
+ }
+ } else {
+ any_dirs = true;
+ queue.push(Message::Work(Work {
+ dent: dent,
+ ignore: self.ig_root.clone(),
+ }));
+ }
+ }
+ // ... but there's no need to start workers if we don't need them.
+ if !any_dirs {
+ return;
+ }
+ // Create the workers and then wait for them to finish.
+ let num_waiting = Arc::new(AtomicUsize::new(0));
+ let num_quitting = Arc::new(AtomicUsize::new(0));
+ let mut handles = vec![];
+ for _ in 0..threads {
+ let worker = Worker {
+ f: mkf(),
+ queue: queue.clone(),
+ quit_now: Arc::new(AtomicBool::new(false)),
+ is_waiting: false,
+ is_quitting: false,
+ num_waiting: num_waiting.clone(),
+ num_quitting: num_quitting.clone(),
+ threads: threads,
+ parents: self.parents,
+ max_depth: self.max_depth,
+ follow_links: self.follow_links,
+ };
+ handles.push(thread::spawn(|| worker.run()));
+ }
+ for handle in handles {
+ handle.join().unwrap();
+ }
+ }
+
+ fn threads(&self) -> usize {
+ if self.threads == 0 {
+ 2
+ } else {
+ self.threads
+ }
+ }
+}
+
+/// Message is the set of instructions that a worker knows how to process.
+enum Message {
+ /// A work item corresponds to a directory that should be descended into.
+ /// Work items for entries that should be skipped or ignored should not
+ /// be produced.
+ Work(Work),
+ /// This instruction indicates that the worker should start quitting.
+ Quit,
+}
+
+/// A unit of work for each worker to process.
+///
+/// Each unit of work corresponds to a directory that should be descended
+/// into.
+struct Work {
+ /// The directory entry.
+ dent: DirEntry,
+ /// Any ignore matchers that have been built for this directory's parents.
+ ignore: Ignore,
+}
+
+impl Work {
+ /// Adds ignore rules for parent directories.
+ ///
+ /// Note that this only applies to entries at depth 0. On all other
+ /// entries, this is a no-op.
+ fn add_parents(&mut self) -> Option<Error> {
+ if self.dent.depth() > 0 {
+ return None;
+ }
+ // At depth 0, the path of this entry is a root path, so we can
+ // use it directly to add parent ignore rules.
+ let (ig, err) = self.ignore.add_parents(self.dent.path());
+ self.ignore = ig;
+ err
+ }
+
+ /// Reads the directory contents of this work item and adds ignore
+ /// rules for this directory.
+ ///
+ /// If there was a problem with reading the directory contents, then
+ /// an error is returned. If there was a problem reading the ignore
+ /// rules for this directory, then the error is attached to this
+ /// work item's directory entry.
+ fn read_dir(&mut self) -> Result<fs::ReadDir, Error> {
+ let readdir = match fs::read_dir(self.dent.path()) {
+ Ok(readdir) => readdir,
+ Err(err) => {
+ let err = Error::from(err)