summaryrefslogtreecommitdiffstats
path: root/crates/core/args.rs
diff options
context:
space:
mode:
authornguyenvukhang <brew4k@gmail.com>2022-11-28 12:36:15 +0800
committerAndrew Gallant <jamslam@gmail.com>2023-07-09 10:14:03 -0400
commit6abb962f0d655a186972462d88d2de4e5caf4a33 (patch)
tree3a3970ac6cf20c6600c7517b8bb72b56ed660462 /crates/core/args.rs
parent6d95c130d5fb56a8641092a4808f33640bfa935c (diff)
cli: fix non-path sorting behavior
Previously, sorting worked by sorting the parents and then sorting the children within each parent. This was done during traversal, but it only works when sorting parents preserves the overall order. This generally only works for '--sort path' in ascending order. This commit fixes the rest of the sorting behavior by collecting all of the paths to search and then sorting them before searching. We only collect all of the paths when sorting was requested. Fixes #2243, Closes #2361
Diffstat (limited to 'crates/core/args.rs')
-rw-r--r--crates/core/args.rs145
1 files changed, 81 insertions, 64 deletions
diff --git a/crates/core/args.rs b/crates/core/args.rs
index 97347755..dc4cadb8 100644
--- a/crates/core/args.rs
+++ b/crates/core/args.rs
@@ -41,7 +41,7 @@ use crate::path_printer::{PathPrinter, PathPrinterBuilder};
use crate::search::{
PatternMatcher, Printer, SearchWorker, SearchWorkerBuilder,
};
-use crate::subject::SubjectBuilder;
+use crate::subject::{Subject, SubjectBuilder};
use crate::Result;
/// The command that ripgrep should execute based on the command line
@@ -324,6 +324,46 @@ impl Args {
.build())
}
+ /// Returns true if and only if `stat`-related sorting is required
+ pub fn needs_stat_sort(&self) -> bool {
+ return self.matches().sort_by().map_or(
+ false,
+ |sort_by| match sort_by.kind {
+ SortByKind::LastModified
+ | SortByKind::Created
+ | SortByKind::LastAccessed => sort_by.check().is_ok(),
+ _ => false,
+ },
+ );
+ }
+
+ /// Sort subjects if a sorter is specified, but only if the sort requires
+ /// stat calls. Non-stat related sorts are handled during file traversal
+ ///
+ /// This function assumes that it is known that a stat-related sort is
+ /// required, and does not check for it again.
+ ///
+ /// It is important that that precondition is fulfilled, since this function
+ /// consumes the subjects iterator, and is therefore a blocking function.
+ pub fn sort_by_stat<I>(&self, subjects: I) -> Vec<Subject>
+ where
+ I: Iterator<Item = Subject>,
+ {
+ let sorter = match self.matches().sort_by() {
+ Ok(v) => v,
+ Err(_) => return subjects.collect(),
+ };
+ use SortByKind::*;
+ let mut keyed = match sorter.kind {
+ LastModified => load_timestamps(subjects, |m| m.modified()),
+ LastAccessed => load_timestamps(subjects, |m| m.accessed()),
+ Created => load_timestamps(subjects, |m| m.created()),
+ _ => return subjects.collect(),
+ };
+ keyed.sort_by(|a, b| sort_by_option(&a.0, &b.0, sorter.reverse));
+ keyed.into_iter().map(|v| v.1).collect()
+ }
+
/// Return a parallel walker that may use additional threads.
pub fn walker_parallel(&self) -> Result<WalkParallel> {
Ok(self
@@ -404,44 +444,23 @@ impl SortBy {
Ok(())
}
- fn configure_walk_builder(self, builder: &mut WalkBuilder) {
- // This isn't entirely optimal. In particular, we will wind up issuing
- // a stat for many files redundantly. Aside from having potentially
- // inconsistent results with respect to sorting, this is also slow.
- // We could fix this here at the expense of memory by caching stat
- // calls. A better fix would be to find a way to push this down into
- // directory traversal itself, but that's a somewhat nasty change.
+ /// Load sorters only if they are applicable at the walk stage.
+ ///
+ /// In particular, sorts that involve `stat` calls are not loaded because
+ /// the walk inherently assumes that parent directories are aware of all its
+ /// decendent properties, but `stat` does not work that way.
+ fn configure_builder_sort(self, builder: &mut WalkBuilder) {
+ use SortByKind::*;
match self.kind {
- SortByKind::None => {}
- SortByKind::Path => {
- if self.reverse {
- builder.sort_by_file_name(|a, b| a.cmp(b).reverse());
- } else {
- builder.sort_by_file_name(|a, b| a.cmp(b));
- }
- }
- SortByKind::LastModified => {
- builder.sort_by_file_path(move |a, b| {
- sort_by_metadata_time(a, b, self.reverse, |md| {
- md.modified()
- })
- });
- }
- SortByKind::LastAccessed => {
- builder.sort_by_file_path(move |a, b| {
- sort_by_metadata_time(a, b, self.reverse, |md| {
- md.accessed()
- })
- });
+ Path if self.reverse => {
+ builder.sort_by_file_name(|a, b| a.cmp(b).reverse());
}
- SortByKind::Created => {
- builder.sort_by_file_path(move |a, b| {
- sort_by_metadata_time(a, b, self.reverse, |md| {
- md.created()
- })
- });
+ Path => {
+ builder.sort_by_file_name(|a, b| a.cmp(b));
}
- }
+ // these use `stat` calls and will be sorted in Args::sort_by_stat()
+ LastModified | LastAccessed | Created | None => {}
+ };
}
}
@@ -876,9 +895,7 @@ impl ArgMatches {
if !self.no_ignore() && !self.no_ignore_dot() {
builder.add_custom_ignore_filename(".rgignore");
}
- let sortby = self.sort_by()?;
- sortby.check()?;
- sortby.configure_walk_builder(&mut builder);
+ self.sort_by()?.configure_builder_sort(&mut builder);
Ok(builder)
}
}
@@ -1740,32 +1757,18 @@ fn u64_to_usize(arg_name: &str, value: Option<u64>) -> Result<Option<usize>> {
}
}
-/// Builds a comparator for sorting two files according to a system time
-/// extracted from the file's metadata.
-///
-/// If there was a problem extracting the metadata or if the time is not
-/// available, then both entries compare equal.
-fn sort_by_metadata_time<G>(
- p1: &Path,
- p2: &Path,
+/// Sorts by an optional parameter.
+//
+/// If parameter is found to be `None`, both entries compare equal.
+fn sort_by_option<T: Ord>(
+ p1: &Option<T>,
+ p2: &Option<T>,
reverse: bool,
- get_time: G,
-) -> cmp::Ordering
-where
- G: Fn(&fs::Metadata) -> io::Result<SystemTime>,
-{
- let t1 = match p1.metadata().and_then(|md| get_time(&md)) {
- Ok(t) => t,
- Err(_) => return cmp::Ordering::Equal,
- };
- let t2 = match p2.metadata().and_then(|md| get_time(&md)) {
- Ok(t) => t,
- Err(_) => return cmp::Ordering::Equal,
- };
- if reverse {
- t1.cmp(&t2).reverse()
- } else {
- t1.cmp(&t2)
+) -> cmp::Ordering {
+ match (p1, p2, reverse) {
+ (Some(p1), Some(p2), true) => p1.cmp(&p2).reverse(),
+ (Some(p1), Some(p2), false) => p1.cmp(&p2),
+ _ => cmp::Ordering::Equal,
}
}
@@ -1819,3 +1822,17 @@ fn current_dir() -> Result<PathBuf> {
)
.into())
}
+
+/// Tries to assign a timestamp to every `Subject` in the vector to help with
+/// sorting Subjects by time.
+fn load_timestamps<G>(
+ subjects: impl Iterator<Item = Subject>,
+ get_time: G,
+) -> Vec<(Option<SystemTime>, Subject)>
+where
+ G: Fn(&fs::Metadata) -> io::Result<SystemTime>,
+{
+ subjects
+ .map(|s| (s.path().metadata().and_then(|m| get_time(&m)).ok(), s))
+ .collect()
+}