From a28e664abd0d8912e4a2d85039fe282b9d37b994 Mon Sep 17 00:00:00 2001 From: Richard Khoury Date: Mon, 17 Aug 2020 09:08:31 +1000 Subject: ignore: check ignore rules before issuing stat calls This seems like an obvious optimization but becomes critical when filesystem operations even as simple as stat can result in significant overheads; an example of this was a bespoke filesystem layer in Windows that hosted files remotely and would download them on-demand when particular filesystem operations occurred. Users of this system who ensured correct file-type fileters were being used could still get unnecessary file access resulting in large downloads. Fixes #1657, Closes #1660 --- CHANGELOG.md | 5 +++++ crates/ignore/src/walk.rs | 25 ++++++++++++++++++------- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e98fc5e9..a8c47c1b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,11 @@ Security fixes: This is the public facing issue tracking CVE-2021-3013. ripgrep's README now contains a section describing how to report a vulnerability. +Performance improvements: + +* [PERF #1657](https://github.com/BurntSushi/ripgrep/discussions/1657): + Check if a file should be ignored first before issuing stat calls. + Feature enhancements: * Added or improved file type filtering for ASP, Bazel, dvc, FlatBuffers, diff --git a/crates/ignore/src/walk.rs b/crates/ignore/src/walk.rs index 31c1d9f1..4d4054a9 100644 --- a/crates/ignore/src/walk.rs +++ b/crates/ignore/src/walk.rs @@ -934,15 +934,23 @@ impl Walk { if ent.depth() == 0 { return Ok(false); } - + // We ensure that trivial skipping is done before any other potentially + // expensive operations (stat, filesystem other) are done. This seems + // like an obvious optimization but becomes critical when filesystem + // operations even as simple as stat can result in significant + // overheads; an example of this was a bespoke filesystem layer in + // Windows that hosted files remotely and would download them on-demand + // when particular filesystem operations occurred. Users of this system + // who ensured correct file-type fileters were being used could still + // get unnecessary file access resulting in large downloads. + if should_skip_entry(&self.ig, ent) { + return Ok(true); + } if let Some(ref stdout) = self.skip { if path_equals(ent, stdout)? { return Ok(true); } } - if should_skip_entry(&self.ig, ent) { - return Ok(true); - } if self.max_filesize.is_some() && !ent.is_dir() { return Ok(skip_filesize( self.max_filesize.unwrap(), @@ -1549,6 +1557,11 @@ impl<'s> Worker<'s> { } } } + // N.B. See analogous call in the single-threaded implementation about + // why it's important for this to come before the checks below. + if should_skip_entry(ig, &dent) { + return WalkState::Continue; + } if let Some(ref stdout) = self.skip { let is_stdout = match path_equals(&dent, stdout) { Ok(is_stdout) => is_stdout, @@ -1558,7 +1571,6 @@ impl<'s> Worker<'s> { return WalkState::Continue; } } - let should_skip_path = should_skip_entry(ig, &dent); let should_skip_filesize = if self.max_filesize.is_some() && !dent.is_dir() { skip_filesize( @@ -1575,8 +1587,7 @@ impl<'s> Worker<'s> { } else { false }; - if !should_skip_path && !should_skip_filesize && !should_skip_filtered - { + if !should_skip_filesize && !should_skip_filtered { self.send(Work { dent, ignore: ig.clone(), root_device }); } WalkState::Continue -- cgit v1.2.3