diff options
author | Canop <cano.petrole@gmail.com> | 2020-11-30 22:23:07 +0100 |
---|---|---|
committer | Canop <cano.petrole@gmail.com> | 2020-11-30 22:23:07 +0100 |
commit | 4f0632a74bfc11e3eebcc04ae6fede1a690ea606 (patch) | |
tree | 5e40ca065448352f683af0d850fb9312de9b7f43 | |
parent | 2fd09de426e8f7623180a834244330a0ec3f069a (diff) |
better cache management for dir size computation
Makes it faster in 2 common cases:
- compute the root size when children already computed
- going up one level in the tree
-rw-r--r-- | CHANGELOG.md | 1 | ||||
-rw-r--r-- | README.md | 2 | ||||
-rw-r--r-- | src/app/app.rs | 2 | ||||
-rw-r--r-- | src/display/col.rs | 2 | ||||
-rw-r--r-- | src/file_sum/mod.rs | 2 | ||||
-rw-r--r-- | src/file_sum/sum_computation.rs | 83 | ||||
-rw-r--r-- | src/icon/vscode.rs | 1 | ||||
-rw-r--r-- | src/tree/tree.rs | 2 | ||||
-rw-r--r-- | website/docs/img/20201130-sdp.png | bin | 0 -> 151532 bytes | |||
-rw-r--r-- | website/docs/index.md | 2 |
10 files changed, 76 insertions, 21 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b72613..bba6247 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ### next - when sizes are displayed (eg on `br -s`) show size of root line and root filesystem info +- modified size cache management makes some size computations faster <a name="v1.0.7"></a> ### v1.0.7 - 2020-11-27 @@ -127,7 +127,7 @@ And you can add shortcuts, for example a <kbd>ctrl</kbd> sequence or a function If you want to display *sizes*, *dates* and *permissions*, do `br -sdp` which gets you this: -![replace ls](website/docs/img/20200704-sdp.png) +![replace ls](website/docs/img/20201130-sdp.png) You may also toggle options with a few keystrokes while inside broot. For example hitting a space, a `d` then enter shows you the dates. Or a space, then `h` then enter and you see hidden files. diff --git a/src/app/app.rs b/src/app/app.rs index 0ec3750..4b19ec0 100644 --- a/src/app/app.rs +++ b/src/app/app.rs @@ -516,7 +516,7 @@ impl App { loop { if !self.quitting { self.display_panels(w, &skin, con)?; - if self.do_pending_tasks(con, &mut dam)? { + if time!(Debug, "pending_tasks", self.do_pending_tasks(con, &mut dam)?) { let other_path = self.get_other_panel_path(); self.mut_panel().refresh_input_status(&other_path, con); self.display_panels(w, &skin, con)?; diff --git a/src/display/col.rs b/src/display/col.rs index 799207c..68446b2 100644 --- a/src/display/col.rs +++ b/src/display/col.rs @@ -99,8 +99,8 @@ pub type Cols = [Col;COLS_COUNT]; pub static DEFAULT_COLS: Cols = [ Col::Mark, Col::Git, - Col::Permission, Col::Date, + Col::Permission, Col::Size, Col::Count, Col::Branch, diff --git a/src/file_sum/mod.rs b/src/file_sum/mod.rs index 9cbd983..ffe2415 100644 --- a/src/file_sum/mod.rs +++ b/src/file_sum/mod.rs @@ -68,7 +68,7 @@ impl FileSum { Debug, "sum computation", path, - sum_computation::compute_dir_sum(path, dam), + sum_computation::compute_dir_sum(path, &mut sum_cache, dam), ); if let Some(sum) = sum { sum_cache.insert(PathBuf::from(path), sum); diff --git a/src/file_sum/sum_computation.rs b/src/file_sum/sum_computation.rs index f3e3cbe..6f65e87 100644 --- a/src/file_sum/sum_computation.rs +++ b/src/file_sum/sum_computation.rs @@ -4,6 +4,7 @@ use { crossbeam::channel, rayon::{ThreadPool, ThreadPoolBuilder}, std::{ + collections::HashMap, convert::TryInto, fs, path::{Path, PathBuf}, @@ -40,7 +41,7 @@ const THREADS_COUNT: usize = 6; /// varying depending on the OS: /// On unix, the computation is done on blocks of 512 bytes /// see https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.blocks -pub fn compute_dir_sum(path: &Path, dam: &Dam) -> Option<FileSum> { +pub fn compute_dir_sum(path: &Path, cache: &mut HashMap<PathBuf, FileSum>, dam: &Dam) -> Option<FileSum> { //debug!("compute size of dir {:?} --------------- ", path); lazy_static! { @@ -52,18 +53,64 @@ pub fn compute_dir_sum(path: &Path, dam: &Dam) -> Option<FileSum> { #[cfg(unix)] let nodes = Arc::new(Mutex::new(HashSet::<NodeId>::default())); + // busy is the number of directories which are either being processed or queued + // We use this count to determine when threads can stop waiting for tasks + let mut busy = 0; + let mut sum = compute_file_sum(path); + // this MPMC channel contains the directory paths which must be handled. // A None means there's nothing left and the thread may send its result and stop let (dirs_sender, dirs_receiver) = channel::unbounded(); + // the first level is managed a little differently: we look at the cache + // before adding. This enables faster computations in two cases: + // - for the root line (assuming it's computed after the content) + // - when we navigate up the tree + if let Ok(entries) = fs::read_dir(path) { + for e in entries.flatten() { + if let Ok(md) = e.metadata() { + if md.is_dir() { + let entry_path = e.path(); + // we check the cache + if let Some(entry_sum) = cache.get(&entry_path) { + sum += *entry_sum; + continue; + } + // we add the directory to the channel of dirs needing + // processing + busy += 1; + dirs_sender.send(Some(entry_path)).unwrap(); + } else { + + #[cfg(unix)] + if md.nlink() > 1 { + let mut nodes = nodes.lock().unwrap(); + let node_id = NodeId { + inode: md.ino(), + dev: md.dev(), + }; + if !nodes.insert(node_id) { + // it was already in the set + continue; + } + } + + } + sum += md_sum(&md); + } + } + } + + if busy == 0 { + return Some(sum); + } + + let busy = Arc::new(AtomicIsize::new(busy)); + // this MPMC channel is here for the threads to send their results // at end of computation let (thread_sum_sender, thread_sum_receiver) = channel::bounded(THREADS_COUNT); - // busy is the number of directories which are either being processed or queued - // We use this count to determine when threads can stop waiting for tasks - let busy = Arc::new(AtomicIsize::new(1)); - dirs_sender.send(Some(PathBuf::from(path))).unwrap(); // Each thread does a summation without merge and the data are merged // at the end (this avoids waiting for a mutex during computation) @@ -105,16 +152,7 @@ pub fn compute_dir_sum(path: &Path, dam: &Dam) -> Option<FileSum> { } } - - #[cfg(unix)] - let size = md.blocks() * 512; - - #[cfg(not(unix))] - let size = md.len(); - - let seconds = extract_seconds(&md); - let entry_sum = FileSum::new(size, false, 1, seconds); - thread_sum += entry_sum; + thread_sum += md_sum(&md); } else { // we can't measure much but we can count the file thread_sum.incr(); @@ -136,7 +174,6 @@ pub fn compute_dir_sum(path: &Path, dam: &Dam) -> Option<FileSum> { }); } // Wait for the threads to finish and consolidate their results - let mut sum = compute_file_sum(path); for _ in 0..THREADS_COUNT { match thread_sum_receiver.recv() { Ok(thread_sum) => { @@ -179,11 +216,13 @@ pub fn compute_file_sum(path: &Path) -> FileSum { } #[cfg(unix)] +#[inline(always)] fn extract_seconds(md: &fs::Metadata) -> u32 { md.mtime().try_into().unwrap_or(0) } #[cfg(not(unix))] +#[inline(always)] fn extract_seconds(md: &fs::Metadata) -> u32 { if let Ok(st) = md.modified() { if let Ok(d) = st.duration_since(std::time::UNIX_EPOCH) { @@ -195,3 +234,15 @@ fn extract_seconds(md: &fs::Metadata) -> u32 { 0 } + +#[inline(always)] +fn md_sum(md: &fs::Metadata) -> FileSum { + #[cfg(unix)] + let size = md.blocks() * 512; + + #[cfg(not(unix))] + let size = md.len(); + + let seconds = extract_seconds(&md); + FileSum::new(size, false, 1, seconds) +} diff --git a/src/icon/vscode.rs b/src/icon/vscode.rs index 2241bd1..8869a00 100644 --- a/src/icon/vscode.rs +++ b/src/icon/vscode.rs @@ -13,6 +13,7 @@ pub struct VSCodeIconPlugin { } impl VSCodeIconPlugin { + #[allow(dead_code)] fn sanity_check( part_to_icon_name_map: &HashMap<&str, &str>, icon_name_to_icon_codepoint_map: &HashMap<&str, u32>, diff --git a/src/tree/tree.rs b/src/tree/tree.rs index c7670e1..977d6ad 100644 --- a/src/tree/tree.rs +++ b/src/tree/tree.rs @@ -366,6 +366,8 @@ impl Tree { /// To compute the size of all of them, this should be called until /// has_dir_missing_sum returns false pub fn fetch_some_missing_dir_sum(&mut self, dam: &Dam) { + // we prefer to compute the root directory last: its computation + // is faster when its first level children are already computed for i in (0..self.lines.len()).rev() { if self.lines[i].sum.is_none() && self.lines[i].line_type == TreeLineType::Dir { self.lines[i].sum = FileSum::from_dir(&self.lines[i].path, dam); diff --git a/website/docs/img/20201130-sdp.png b/website/docs/img/20201130-sdp.png Binary files differnew file mode 100644 index 0000000..4be4127 --- /dev/null +++ b/website/docs/img/20201130-sdp.png diff --git a/website/docs/index.md b/website/docs/index.md index c1d5bb9..d1fe444 100644 --- a/website/docs/index.md +++ b/website/docs/index.md @@ -119,7 +119,7 @@ And you can add shortcuts, for example a <kbd>ctrl</kbd> sequence or a function If you want to display *sizes*, *dates* and *permissions*, do `br -sdp` which gets you this: -![replace ls](img/20200704-sdp.png) +![replace ls](img/20201130-sdp.png) You may also toggle options with a few keystrokes while inside broot. For example hitting a space, a `d` then enter shows you the dates. Or a space, then `h` then enter and you see hidden files. |