diff options
author | Andrew Gallant <jamslam@gmail.com> | 2017-01-08 10:27:30 -0500 |
---|---|---|
committer | Andrew Gallant <jamslam@gmail.com> | 2017-01-09 16:12:08 -0500 |
commit | 461e0c4e33b38532545af4f97ec92bd367963017 (patch) | |
tree | 26bbdd3340c1d9b29f90ac7856b18d5d4bb3ebe0 /ignore | |
parent | 82df3b768587e2839db849e6eed2db518a728778 (diff) |
Don't search stdout redirected file.
When running ripgrep like this:
rg foo > output
we must be careful not to search `output` since ripgrep is actively writing
to it. Searching it can cause massive blowups where the file grows without
bound.
While this is conceptually easy to fix (check the inode of the redirection
and the inode of the file you're about to search), there are a few problems
with it.
First, inodes are a Unix thing, so we need a Windows specific solution to
this as well. To resolve this concern, I created a new crate, `same-file`,
which provides a cross platform abstraction.
Second, stat'ing every file is costly. This is not avoidable on Windows,
but on Unix, we can get the inode number directly from directory traversal.
However, this information wasn't exposed, but now it is (through both the
ignore and walkdir crates).
Fixes #286
Diffstat (limited to 'ignore')
-rw-r--r-- | ignore/Cargo.toml | 2 | ||||
-rw-r--r-- | ignore/src/walk.rs | 71 |
2 files changed, 71 insertions, 2 deletions
diff --git a/ignore/Cargo.toml b/ignore/Cargo.toml index dc903783..238fe44e 100644 --- a/ignore/Cargo.toml +++ b/ignore/Cargo.toml @@ -25,7 +25,7 @@ log = "0.3" memchr = "1" regex = "0.2.0" thread_local = "0.3.2" -walkdir = "1" +walkdir = "1.0.7" [dev-dependencies] tempdir = "0.3.5" diff --git a/ignore/src/walk.rs b/ignore/src/walk.rs index e1dd2052..860f26d8 100644 --- a/ignore/src/walk.rs +++ b/ignore/src/walk.rs @@ -72,6 +72,14 @@ impl DirEntry { self.dent.depth() } + /// Returns the underlying inode number if one exists. + /// + /// If this entry doesn't have an inode number, then `None` is returned. + #[cfg(unix)] + pub fn ino(&self) -> Option<u64> { + self.dent.ino() + } + /// Returns an error, if one exists, associated with processing this entry. /// /// An example of an error is one that occurred while parsing an ignore @@ -188,6 +196,16 @@ impl DirEntryInner { Raw(ref x) => x.depth(), } } + + #[cfg(unix)] + fn ino(&self) -> Option<u64> { + use self::DirEntryInner::*; + match *self { + Stdin => None, + Walkdir(ref x) => Some(x.ino()), + Raw(ref x) => Some(x.ino()), + } + } } /// DirEntryRaw is essentially copied from the walkdir crate so that we can @@ -203,6 +221,9 @@ struct DirEntryRaw { follow_link: bool, /// The depth at which this entry was generated relative to the root. depth: usize, + /// The underlying inode number (Unix only). + #[cfg(unix)] + ino: u64, } impl fmt::Debug for DirEntryRaw { @@ -247,6 +268,11 @@ impl DirEntryRaw { self.depth } + #[cfg(unix)] + fn ino(&self) -> u64 { + self.ino + } + fn from_entry( depth: usize, ent: &fs::DirEntry, @@ -258,15 +284,57 @@ impl DirEntryRaw { err: Box::new(err), } })); - Ok(DirEntryRaw { + Ok(DirEntryRaw::from_entry_os(depth, ent, ty)) + } + + #[cfg(not(unix))] + fn from_entry_os( + depth: usize, + ent: &fs::DirEntry, + ty: fs::FileType, + ) -> DirEntryRaw { + DirEntryRaw { path: ent.path(), ty: ty, follow_link: false, depth: depth, + } + } + + #[cfg(unix)] + fn from_entry_os( + depth: usize, + ent: &fs::DirEntry, + ty: fs::FileType, + ) -> DirEntryRaw { + use std::os::unix::fs::DirEntryExt; + + DirEntryRaw { + path: ent.path(), + ty: ty, + follow_link: false, + depth: depth, + ino: ent.ino(), + } + } + + #[cfg(not(unix))] + fn from_link(depth: usize, pb: PathBuf) -> Result<DirEntryRaw, Error> { + let md = try!(fs::metadata(&pb).map_err(|err| { + Error::Io(err).with_path(&pb) + })); + Ok(DirEntryRaw { + path: pb, + ty: md.file_type(), + follow_link: true, + depth: depth, }) } + #[cfg(unix)] fn from_link(depth: usize, pb: PathBuf) -> Result<DirEntryRaw, Error> { + use std::os::unix::fs::MetadataExt; + let md = try!(fs::metadata(&pb).map_err(|err| { Error::Io(err).with_path(&pb) })); @@ -275,6 +343,7 @@ impl DirEntryRaw { ty: md.file_type(), follow_link: true, depth: depth, + ino: md.ino(), }) } } |