summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Khoury <richen.khoury@gmail.com>2020-08-17 09:08:31 +1000
committerAndrew Gallant <jamslam@gmail.com>2021-05-31 21:51:18 -0400
commita28e664abd0d8912e4a2d85039fe282b9d37b994 (patch)
tree5640047f814d31879bd4005ff2be6da11290e9b1
parent0ca96e004c539e44242607fee5c4661edf27a5cd (diff)
ignore: check ignore rules before issuing stat calls
This seems like an obvious optimization but becomes critical when filesystem operations even as simple as stat can result in significant overheads; an example of this was a bespoke filesystem layer in Windows that hosted files remotely and would download them on-demand when particular filesystem operations occurred. Users of this system who ensured correct file-type fileters were being used could still get unnecessary file access resulting in large downloads. Fixes #1657, Closes #1660
-rw-r--r--CHANGELOG.md5
-rw-r--r--crates/ignore/src/walk.rs25
2 files changed, 23 insertions, 7 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e98fc5e9..a8c47c1b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,6 +27,11 @@ Security fixes:
This is the public facing issue tracking CVE-2021-3013. ripgrep's README
now contains a section describing how to report a vulnerability.
+Performance improvements:
+
+* [PERF #1657](https://github.com/BurntSushi/ripgrep/discussions/1657):
+ Check if a file should be ignored first before issuing stat calls.
+
Feature enhancements:
* Added or improved file type filtering for ASP, Bazel, dvc, FlatBuffers,
diff --git a/crates/ignore/src/walk.rs b/crates/ignore/src/walk.rs
index 31c1d9f1..4d4054a9 100644
--- a/crates/ignore/src/walk.rs
+++ b/crates/ignore/src/walk.rs
@@ -934,15 +934,23 @@ impl Walk {
if ent.depth() == 0 {
return Ok(false);
}
-
+ // We ensure that trivial skipping is done before any other potentially
+ // expensive operations (stat, filesystem other) are done. This seems
+ // like an obvious optimization but becomes critical when filesystem
+ // operations even as simple as stat can result in significant
+ // overheads; an example of this was a bespoke filesystem layer in
+ // Windows that hosted files remotely and would download them on-demand
+ // when particular filesystem operations occurred. Users of this system
+ // who ensured correct file-type fileters were being used could still
+ // get unnecessary file access resulting in large downloads.
+ if should_skip_entry(&self.ig, ent) {
+ return Ok(true);
+ }
if let Some(ref stdout) = self.skip {
if path_equals(ent, stdout)? {
return Ok(true);
}
}
- if should_skip_entry(&self.ig, ent) {
- return Ok(true);
- }
if self.max_filesize.is_some() && !ent.is_dir() {
return Ok(skip_filesize(
self.max_filesize.unwrap(),
@@ -1549,6 +1557,11 @@ impl<'s> Worker<'s> {
}
}
}
+ // N.B. See analogous call in the single-threaded implementation about
+ // why it's important for this to come before the checks below.
+ if should_skip_entry(ig, &dent) {
+ return WalkState::Continue;
+ }
if let Some(ref stdout) = self.skip {
let is_stdout = match path_equals(&dent, stdout) {
Ok(is_stdout) => is_stdout,
@@ -1558,7 +1571,6 @@ impl<'s> Worker<'s> {
return WalkState::Continue;
}
}
- let should_skip_path = should_skip_entry(ig, &dent);
let should_skip_filesize =
if self.max_filesize.is_some() && !dent.is_dir() {
skip_filesize(
@@ -1575,8 +1587,7 @@ impl<'s> Worker<'s> {
} else {
false
};
- if !should_skip_path && !should_skip_filesize && !should_skip_filtered
- {
+ if !should_skip_filesize && !should_skip_filtered {
self.send(Work { dent, ignore: ig.clone(), root_device });
}
WalkState::Continue