diff options
-rw-r--r-- | Cargo.lock | 2 | ||||
-rw-r--r-- | complete/_rg | 4 | ||||
-rw-r--r-- | ignore/Cargo.toml | 5 | ||||
-rw-r--r-- | ignore/src/lib.rs | 2 | ||||
-rw-r--r-- | ignore/src/walk.rs | 172 | ||||
-rw-r--r-- | src/app.rs | 28 | ||||
-rw-r--r-- | src/args.rs | 1 |
7 files changed, 194 insertions, 20 deletions
@@ -248,7 +248,7 @@ dependencies = [ "tempdir 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", "walkdir 2.2.5 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] diff --git a/complete/_rg b/complete/_rg index fcb5bbf9..77f8394c 100644 --- a/complete/_rg +++ b/complete/_rg @@ -81,6 +81,10 @@ _rg() { {-H,--with-filename}'[show file name for matches]' "--no-filename[don't show file name for matches]" + + '(file-system)' # File system options + "--one-file-system[don't descend into directories on other file systems]" + $no'--no-one-file-system[descend into directories on other file systems]' + + '(fixed)' # Fixed-string options {-F,--fixed-strings}'[treat pattern as literal string instead of regular expression]' $no"--no-fixed-strings[don't treat pattern as literal string]" diff --git a/ignore/Cargo.toml b/ignore/Cargo.toml index 859ac349..c2a47b90 100644 --- a/ignore/Cargo.toml +++ b/ignore/Cargo.toml @@ -28,9 +28,8 @@ same-file = "1" thread_local = "0.3.2" walkdir = "2.2.2" -[target.'cfg(windows)'.dependencies.winapi] -version = "0.3" -features = ["std", "winnt"] +[target.'cfg(windows)'.dependencies.winapi-util] +version = "0.1.1" [dev-dependencies] tempdir = "0.3.5" diff --git a/ignore/src/lib.rs b/ignore/src/lib.rs index 190794f5..ee313685 100644 --- a/ignore/src/lib.rs +++ b/ignore/src/lib.rs @@ -60,7 +60,7 @@ extern crate tempdir; extern crate thread_local; extern crate walkdir; #[cfg(windows)] -extern crate winapi; +extern crate winapi_util; use std::error; use std::fmt; diff --git a/ignore/src/walk.rs b/ignore/src/walk.rs index 70bbdc20..6cbd76d6 100644 --- a/ignore/src/walk.rs +++ b/ignore/src/walk.rs @@ -452,6 +452,7 @@ pub struct WalkBuilder { max_depth: Option<usize>, max_filesize: Option<u64>, follow_links: bool, + same_file_system: bool, sorter: Option<Arc< Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static >>, @@ -485,6 +486,7 @@ impl WalkBuilder { max_depth: None, max_filesize: None, follow_links: false, + same_file_system: false, sorter: None, threads: 0, } @@ -501,6 +503,7 @@ impl WalkBuilder { } else { let mut wd = WalkDir::new(p); wd = wd.follow_links(follow_links || p.is_file()); + wd = wd.same_file_system(self.same_file_system); if let Some(max_depth) = max_depth { wd = wd.max_depth(max_depth); } @@ -535,6 +538,7 @@ impl WalkBuilder { max_depth: self.max_depth, max_filesize: self.max_filesize, follow_links: self.follow_links, + same_file_system: self.same_file_system, threads: self.threads, } } @@ -736,6 +740,19 @@ impl WalkBuilder { self.sorter = Some(Arc::new(cmp)); self } + + /// Do not cross file system boundaries. + /// + /// When this option is enabled, directory traversal will not descend into + /// directories that are on a different file system from the root path. + /// + /// Currently, this option is only supported on Unix and Windows. If this + /// option is used on an unsupported platform, then directory traversal + /// will immediately return an error and will not yield any entries. + pub fn same_file_system(&mut self, yes: bool) -> &mut WalkBuilder { + self.same_file_system = yes; + self + } } /// Walk is a recursive directory iterator over file paths in one or more @@ -935,6 +952,7 @@ pub struct WalkParallel { max_filesize: Option<u64>, max_depth: Option<usize>, follow_links: bool, + same_file_system: bool, threads: usize, } @@ -949,24 +967,42 @@ impl WalkParallel { let mut f = mkf(); let threads = self.threads(); // TODO: Figure out how to use a bounded channel here. With an - // unbounded channel, the workers can run away and will up memory + // unbounded channel, the workers can run away and fill up memory // with all of the file paths. But a bounded channel doesn't work since // our producers are also are consumers, so they end up getting stuck. // // We probably need to rethink parallel traversal completely to fix - // this. + // this. The best case scenario would be finding a way to use rayon + // to do this. let (tx, rx) = channel::unbounded(); let mut any_work = false; // Send the initial set of root paths to the pool of workers. // Note that we only send directories. For files, we send to them the // callback directly. for path in self.paths { - let dent = + let (dent, root_device) = if path == Path::new("-") { - DirEntry::new_stdin() + (DirEntry::new_stdin(), None) } else { + let root_device = + if !self.same_file_system { + None + } else { + match device_num(&path) { + Ok(root_device) => Some(root_device), + Err(err) => { + let err = Error::Io(err).with_path(path); + if f(Err(err)).is_quit() { + return; + } + continue; + } + } + }; match DirEntryRaw::from_path(0, path, false) { - Ok(dent) => DirEntry::new_raw(dent, None), + Ok(dent) => { + (DirEntry::new_raw(dent, None), root_device) + } Err(err) => { if f(Err(err)).is_quit() { return; @@ -978,6 +1014,7 @@ impl WalkParallel { tx.send(Message::Work(Work { dent: dent, ignore: self.ig_root.clone(), + root_device: root_device, })); any_work = true; } @@ -1042,6 +1079,9 @@ struct Work { dent: DirEntry, /// Any ignore matchers that have been built for this directory's parents. ignore: Ignore, + /// The root device number. When present, only files with the same device + /// number should be considered. + root_device: Option<u64>, } impl Work { @@ -1163,6 +1203,23 @@ impl Worker { continue; } }; + let descend = + if let Some(root_device) = work.root_device { + match is_same_file_system(root_device, work.dent.path()) { + Ok(true) => true, + Ok(false) => false, + Err(err) => { + if (self.f)(Err(err)).is_quit() { + self.quit_now(); + return; + } + false + } + } + } else { + true + }; + let depth = work.dent.depth(); match (self.f)(Ok(work.dent)) { WalkState::Continue => {} @@ -1172,11 +1229,20 @@ impl Worker { return; } } + if !descend { + continue; + } if self.max_depth.map_or(false, |max| depth >= max) { continue; } for result in readdir { - if self.run_one(&work.ignore, depth + 1, result).is_quit() { + let state = self.run_one( + &work.ignore, + depth + 1, + work.root_device, + result, + ); + if state.is_quit() { self.quit_now(); return; } @@ -1200,6 +1266,7 @@ impl Worker { &mut self, ig: &Ignore, depth: usize, + root_device: Option<u64>, result: Result<fs::DirEntry, io::Error>, ) -> WalkState { let fs_dent = match result { @@ -1232,16 +1299,22 @@ impl Worker { let is_dir = dent.is_dir(); let max_size = self.max_filesize; let should_skip_path = skip_path(ig, dent.path(), is_dir); - let should_skip_filesize = if !is_dir && max_size.is_some() { - skip_filesize(max_size.unwrap(), dent.path(), &dent.metadata().ok()) - } else { - false - }; + let should_skip_filesize = + if !is_dir && max_size.is_some() { + skip_filesize( + max_size.unwrap(), + dent.path(), + &dent.metadata().ok(), + ) + } else { + false + }; if !should_skip_path && !should_skip_filesize { self.tx.send(Message::Work(Work { dent: dent, ignore: ig.clone(), + root_device: root_device, })); } WalkState::Continue @@ -1412,7 +1485,11 @@ fn skip_filesize( } } -fn skip_path(ig: &Ignore, path: &Path, is_dir: bool) -> bool { +fn skip_path( + ig: &Ignore, + path: &Path, + is_dir: bool, +) -> bool { let m = ig.matched(path, is_dir); if m.is_ignore() { debug!("ignoring {}: {:?}", path.display(), m); @@ -1425,6 +1502,37 @@ fn skip_path(ig: &Ignore, path: &Path, is_dir: bool) -> bool { } } +/// Returns true if and only if the given path is on the same device as the +/// given root device. +fn is_same_file_system(root_device: u64, path: &Path) -> Result<bool, Error> { + let dent_device = device_num(path) + .map_err(|err| Error::Io(err).with_path(path))?; + Ok(root_device == dent_device) +} + +#[cfg(unix)] +fn device_num<P: AsRef<Path>>(path: P)-> io::Result<u64> { + use std::os::unix::fs::MetadataExt; + + path.as_ref().metadata().map(|md| md.dev()) +} + + #[cfg(windows)] +fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> { + use winapi_util::{Handle, file}; + + let h = Handle::from_path_any(path)?; + file::information(h).map(|info| info.volume_serial_number()) +} + +#[cfg(not(any(unix, windows)))] +fn device_num<P: AsRef<Path>>(_: P)-> io::Result<u64> { + Err(io::Error::new( + io::ErrorKind::Other, + "walkdir: same_file_system option not supported on this platform", + )) +} + #[cfg(test)] mod tests { use std::fs::{self, File}; @@ -1434,7 +1542,7 @@ mod tests { use tempdir::TempDir; - use super::{DirEntry, WalkBuilder, WalkState}; + use super::{DirEntry, WalkBuilder, WalkState, device_num}; fn wfile<P: AsRef<Path>>(path: P, contents: &str) { let mut file = File::create(path).unwrap(); @@ -1525,9 +1633,9 @@ mod tests { expected: &[&str], ) { let got = walk_collect(prefix, builder); - assert_eq!(got, mkpaths(expected)); + assert_eq!(got, mkpaths(expected), "single threaded"); let got = walk_collect_parallel(prefix, builder); - assert_eq!(got, mkpaths(expected)); + assert_eq!(got, mkpaths(expected), "parallel"); } #[test] @@ -1741,4 +1849,38 @@ mod tests { "a", "a/b", ]); } + + // It's a little tricky to test the 'same_file_system' option since + // we need an environment with more than one file system. We adopt a + // heuristic where /sys is typically a distinct volume on Linux and roll + // with that. + #[test] + #[cfg(target_os = "linux")] + fn same_file_system() { + // If for some reason /sys doesn't exist or isn't a directory, just + // skip this test. + if !Path::new("/sys").is_dir() { + return; + } + + // If our test directory actually isn't a different volume from /sys, + // then this test is meaningless and we shouldn't run it. + let td = TempDir::new("walk-test-").unwrap(); + if device_num(td.path()).unwrap() == device_num("/sys").unwrap() { + return; + } + + mkdirp(td.path().join("same_file")); + symlink("/sys", td.path().join("same_file").join("alink")); + + // Create a symlink to sys and enable following symlinks. If the + // same_file_system option doesn't work, then this probably will hit a + // permission error. Otherwise, it should just skip over the symlink + // completely. + let mut builder = WalkBuilder::new(td.path()); + builder.follow_links(true).same_file_system(true); + assert_paths(td.path(), &builder, &[ + "same_file", "same_file/alink", + ]); + } } @@ -586,6 +586,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> { flag_no_pcre2_unicode(&mut args); flag_null(&mut args); flag_null_data(&mut args); + flag_one_file_system(&mut args); flag_only_matching(&mut args); flag_path_separator(&mut args); flag_passthru(&mut args); @@ -1647,6 +1648,33 @@ Using this flag implies -a/--text. args.push(arg); } +fn flag_one_file_system(args: &mut Vec<RGArg>) { + const SHORT: &str = + "Do not descend into directories on other file systems."; + const LONG: &str = long!("\ +When enabled, ripgrep will not cross file system boundaries relative to where +the search started from. + +Note that this applies to each path argument given to ripgrep. For example, in +the command 'rg --one-file-system /foo/bar /quux/baz', ripgrep will search both +'/foo/bar' and '/quux/baz' even if they are on different file systems, but will +not cross a file system boundary when traversing each path's directory tree. + +This is similar to find's '-xdev' or '-mount' flag. + +This flag can be disabled with --no-one-file-system. +"); + let arg = RGArg::switch("one-file-system") + .help(SHORT).long_help(LONG) + .overrides("no-one-file-system"); + args.push(arg); + + let arg = RGArg::switch("no-one-file-system") + .hidden() + .overrides("one-file-system"); + args.push(arg); +} + fn flag_only_matching(args: &mut Vec<RGArg>) { const SHORT: &str = "Print only matches parts of a line."; const LONG: &str = long!("\ diff --git a/src/args.rs b/src/args.rs index 2343102e..6e79cb0a 100644 --- a/src/args.rs +++ b/src/args.rs @@ -663,6 +663,7 @@ impl ArgMatches { .follow_links(self.is_present("follow")) .max_filesize(self.max_file_size()?) .threads(self.threads()?) + .same_file_system(self.is_present("one-file-system")) .overrides(self.overrides()?) .types(self.types()?) .hidden(!self.hidden()) |