summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCanop <cano.petrole@gmail.com>2020-11-30 22:23:07 +0100
committerCanop <cano.petrole@gmail.com>2020-11-30 22:23:07 +0100
commit4f0632a74bfc11e3eebcc04ae6fede1a690ea606 (patch)
tree5e40ca065448352f683af0d850fb9312de9b7f43
parent2fd09de426e8f7623180a834244330a0ec3f069a (diff)
better cache management for dir size computation
Makes it faster in 2 common cases: - compute the root size when children already computed - going up one level in the tree
-rw-r--r--CHANGELOG.md1
-rw-r--r--README.md2
-rw-r--r--src/app/app.rs2
-rw-r--r--src/display/col.rs2
-rw-r--r--src/file_sum/mod.rs2
-rw-r--r--src/file_sum/sum_computation.rs83
-rw-r--r--src/icon/vscode.rs1
-rw-r--r--src/tree/tree.rs2
-rw-r--r--website/docs/img/20201130-sdp.pngbin0 -> 151532 bytes
-rw-r--r--website/docs/index.md2
10 files changed, 76 insertions, 21 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1b72613..bba6247 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,6 @@
### next
- when sizes are displayed (eg on `br -s`) show size of root line and root filesystem info
+- modified size cache management makes some size computations faster
<a name="v1.0.7"></a>
### v1.0.7 - 2020-11-27
diff --git a/README.md b/README.md
index 3232184..124faa0 100644
--- a/README.md
+++ b/README.md
@@ -127,7 +127,7 @@ And you can add shortcuts, for example a <kbd>ctrl</kbd> sequence or a function
If you want to display *sizes*, *dates* and *permissions*, do `br -sdp` which gets you this:
-![replace ls](website/docs/img/20200704-sdp.png)
+![replace ls](website/docs/img/20201130-sdp.png)
You may also toggle options with a few keystrokes while inside broot. For example hitting a space, a `d` then enter shows you the dates. Or a space, then `h` then enter and you see hidden files.
diff --git a/src/app/app.rs b/src/app/app.rs
index 0ec3750..4b19ec0 100644
--- a/src/app/app.rs
+++ b/src/app/app.rs
@@ -516,7 +516,7 @@ impl App {
loop {
if !self.quitting {
self.display_panels(w, &skin, con)?;
- if self.do_pending_tasks(con, &mut dam)? {
+ if time!(Debug, "pending_tasks", self.do_pending_tasks(con, &mut dam)?) {
let other_path = self.get_other_panel_path();
self.mut_panel().refresh_input_status(&other_path, con);
self.display_panels(w, &skin, con)?;
diff --git a/src/display/col.rs b/src/display/col.rs
index 799207c..68446b2 100644
--- a/src/display/col.rs
+++ b/src/display/col.rs
@@ -99,8 +99,8 @@ pub type Cols = [Col;COLS_COUNT];
pub static DEFAULT_COLS: Cols = [
Col::Mark,
Col::Git,
- Col::Permission,
Col::Date,
+ Col::Permission,
Col::Size,
Col::Count,
Col::Branch,
diff --git a/src/file_sum/mod.rs b/src/file_sum/mod.rs
index 9cbd983..ffe2415 100644
--- a/src/file_sum/mod.rs
+++ b/src/file_sum/mod.rs
@@ -68,7 +68,7 @@ impl FileSum {
Debug,
"sum computation",
path,
- sum_computation::compute_dir_sum(path, dam),
+ sum_computation::compute_dir_sum(path, &mut sum_cache, dam),
);
if let Some(sum) = sum {
sum_cache.insert(PathBuf::from(path), sum);
diff --git a/src/file_sum/sum_computation.rs b/src/file_sum/sum_computation.rs
index f3e3cbe..6f65e87 100644
--- a/src/file_sum/sum_computation.rs
+++ b/src/file_sum/sum_computation.rs
@@ -4,6 +4,7 @@ use {
crossbeam::channel,
rayon::{ThreadPool, ThreadPoolBuilder},
std::{
+ collections::HashMap,
convert::TryInto,
fs,
path::{Path, PathBuf},
@@ -40,7 +41,7 @@ const THREADS_COUNT: usize = 6;
/// varying depending on the OS:
/// On unix, the computation is done on blocks of 512 bytes
/// see https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.blocks
-pub fn compute_dir_sum(path: &Path, dam: &Dam) -> Option<FileSum> {
+pub fn compute_dir_sum(path: &Path, cache: &mut HashMap<PathBuf, FileSum>, dam: &Dam) -> Option<FileSum> {
//debug!("compute size of dir {:?} --------------- ", path);
lazy_static! {
@@ -52,18 +53,64 @@ pub fn compute_dir_sum(path: &Path, dam: &Dam) -> Option<FileSum> {
#[cfg(unix)]
let nodes = Arc::new(Mutex::new(HashSet::<NodeId>::default()));
+ // busy is the number of directories which are either being processed or queued
+ // We use this count to determine when threads can stop waiting for tasks
+ let mut busy = 0;
+ let mut sum = compute_file_sum(path);
+
// this MPMC channel contains the directory paths which must be handled.
// A None means there's nothing left and the thread may send its result and stop
let (dirs_sender, dirs_receiver) = channel::unbounded();
+ // the first level is managed a little differently: we look at the cache
+ // before adding. This enables faster computations in two cases:
+ // - for the root line (assuming it's computed after the content)
+ // - when we navigate up the tree
+ if let Ok(entries) = fs::read_dir(path) {
+ for e in entries.flatten() {
+ if let Ok(md) = e.metadata() {
+ if md.is_dir() {
+ let entry_path = e.path();
+ // we check the cache
+ if let Some(entry_sum) = cache.get(&entry_path) {
+ sum += *entry_sum;
+ continue;
+ }
+ // we add the directory to the channel of dirs needing
+ // processing
+ busy += 1;
+ dirs_sender.send(Some(entry_path)).unwrap();
+ } else {
+
+ #[cfg(unix)]
+ if md.nlink() > 1 {
+ let mut nodes = nodes.lock().unwrap();
+ let node_id = NodeId {
+ inode: md.ino(),
+ dev: md.dev(),
+ };
+ if !nodes.insert(node_id) {
+ // it was already in the set
+ continue;
+ }
+ }
+
+ }
+ sum += md_sum(&md);
+ }
+ }
+ }
+
+ if busy == 0 {
+ return Some(sum);
+ }
+
+ let busy = Arc::new(AtomicIsize::new(busy));
+
// this MPMC channel is here for the threads to send their results
// at end of computation
let (thread_sum_sender, thread_sum_receiver) = channel::bounded(THREADS_COUNT);
- // busy is the number of directories which are either being processed or queued
- // We use this count to determine when threads can stop waiting for tasks
- let busy = Arc::new(AtomicIsize::new(1));
- dirs_sender.send(Some(PathBuf::from(path))).unwrap();
// Each thread does a summation without merge and the data are merged
// at the end (this avoids waiting for a mutex during computation)
@@ -105,16 +152,7 @@ pub fn compute_dir_sum(path: &Path, dam: &Dam) -> Option<FileSum> {
}
}
-
- #[cfg(unix)]
- let size = md.blocks() * 512;
-
- #[cfg(not(unix))]
- let size = md.len();
-
- let seconds = extract_seconds(&md);
- let entry_sum = FileSum::new(size, false, 1, seconds);
- thread_sum += entry_sum;
+ thread_sum += md_sum(&md);
} else {
// we can't measure much but we can count the file
thread_sum.incr();
@@ -136,7 +174,6 @@ pub fn compute_dir_sum(path: &Path, dam: &Dam) -> Option<FileSum> {
});
}
// Wait for the threads to finish and consolidate their results
- let mut sum = compute_file_sum(path);
for _ in 0..THREADS_COUNT {
match thread_sum_receiver.recv() {
Ok(thread_sum) => {
@@ -179,11 +216,13 @@ pub fn compute_file_sum(path: &Path) -> FileSum {
}
#[cfg(unix)]
+#[inline(always)]
fn extract_seconds(md: &fs::Metadata) -> u32 {
md.mtime().try_into().unwrap_or(0)
}
#[cfg(not(unix))]
+#[inline(always)]
fn extract_seconds(md: &fs::Metadata) -> u32 {
if let Ok(st) = md.modified() {
if let Ok(d) = st.duration_since(std::time::UNIX_EPOCH) {
@@ -195,3 +234,15 @@ fn extract_seconds(md: &fs::Metadata) -> u32 {
0
}
+
+#[inline(always)]
+fn md_sum(md: &fs::Metadata) -> FileSum {
+ #[cfg(unix)]
+ let size = md.blocks() * 512;
+
+ #[cfg(not(unix))]
+ let size = md.len();
+
+ let seconds = extract_seconds(&md);
+ FileSum::new(size, false, 1, seconds)
+}
diff --git a/src/icon/vscode.rs b/src/icon/vscode.rs
index 2241bd1..8869a00 100644
--- a/src/icon/vscode.rs
+++ b/src/icon/vscode.rs
@@ -13,6 +13,7 @@ pub struct VSCodeIconPlugin {
}
impl VSCodeIconPlugin {
+ #[allow(dead_code)]
fn sanity_check(
part_to_icon_name_map: &HashMap<&str, &str>,
icon_name_to_icon_codepoint_map: &HashMap<&str, u32>,
diff --git a/src/tree/tree.rs b/src/tree/tree.rs
index c7670e1..977d6ad 100644
--- a/src/tree/tree.rs
+++ b/src/tree/tree.rs
@@ -366,6 +366,8 @@ impl Tree {
/// To compute the size of all of them, this should be called until
/// has_dir_missing_sum returns false
pub fn fetch_some_missing_dir_sum(&mut self, dam: &Dam) {
+ // we prefer to compute the root directory last: its computation
+ // is faster when its first level children are already computed
for i in (0..self.lines.len()).rev() {
if self.lines[i].sum.is_none() && self.lines[i].line_type == TreeLineType::Dir {
self.lines[i].sum = FileSum::from_dir(&self.lines[i].path, dam);
diff --git a/website/docs/img/20201130-sdp.png b/website/docs/img/20201130-sdp.png
new file mode 100644
index 0000000..4be4127
--- /dev/null
+++ b/website/docs/img/20201130-sdp.png
Binary files differ
diff --git a/website/docs/index.md b/website/docs/index.md
index c1d5bb9..d1fe444 100644
--- a/website/docs/index.md
+++ b/website/docs/index.md
@@ -119,7 +119,7 @@ And you can add shortcuts, for example a <kbd>ctrl</kbd> sequence or a function
If you want to display *sizes*, *dates* and *permissions*, do `br -sdp` which gets you this:
-![replace ls](img/20200704-sdp.png)
+![replace ls](img/20201130-sdp.png)
You may also toggle options with a few keystrokes while inside broot. For example hitting a space, a `d` then enter shows you the dates. Or a space, then `h` then enter and you see hidden files.