summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Nguyen <benjamin.van.nguyen@gmail.com>2023-11-16 20:49:22 -0800
committerBenjamin Nguyen <benjamin.van.nguyen@gmail.com>2023-11-16 20:49:22 -0800
commit1d47c8e71e96179ee9366ec72c40a6ab79188a71 (patch)
treeec9f5952424b1477588444384f12953e67fb40d9
parent3c68b84a392715a8ecfc210ce0728e1648cbd5f2 (diff)
parallel traversal
-rw-r--r--Cargo.toml8
-rw-r--r--src/main.rs19
-rw-r--r--src/tree/mod.rs147
-rw-r--r--src/tree/traverse.rs (renamed from src/tree/parallel.rs)22
4 files changed, 51 insertions, 145 deletions
diff --git a/Cargo.toml b/Cargo.toml
index bddb952..d372746 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,7 +15,7 @@ keywords = ["tree", "find", "ls", "du", "commandline"]
exclude = ["assets/*", "scripts/*", "example/*"]
readme = "README.md"
license = "MIT"
-rust-version = "1.73.0"
+rust-version = "1.74.0"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@@ -23,6 +23,12 @@ rust-version = "1.73.0"
name = "erd"
path = "src/main.rs"
+[lints.clippy]
+cast_precision_loss = "allow"
+struct_excessive_bools = "allow"
+wildcard_imports = "allow"
+obfuscated_if_else = "allow"
+
[dependencies]
ahash = "0.8.6"
ansi_term = "0.12.1"
diff --git a/src/main.rs b/src/main.rs
index 40924a0..b4b785a 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,10 +1,4 @@
#![cfg_attr(windows, feature(windows_by_handle))]
-#![allow(
- clippy::cast_precision_loss,
- clippy::struct_excessive_bools,
- clippy::wildcard_imports,
- clippy::obfuscated_if_else
-)]
use log::Log;
use std::process::ExitCode;
@@ -43,13 +37,14 @@ fn run() -> error::Result<()> {
.then_some(logging::LoggityLog::init())
.transpose()?;
- let file_tree = if ctx.threads > 1 {
- FileTree::init_parallel(&ctx)?
- } else {
- FileTree::init(&ctx)?
- };
+ let file_tree = FileTree::init(&ctx)?;
- //println!("{}", file_tree[file_tree.root_id()].get().size().value());
+ for node in file_tree.traverse() {
+ if let indextree::NodeEdge::Start(id) = node {
+ let node = file_tree[id].get();
+ println!("{} -> {}", node.path().display(), node.size().value());
+ }
+ }
if let Some(logger) = logger {
logger.flush();
diff --git a/src/tree/mod.rs b/src/tree/mod.rs
index 4b3ff80..1d3c2c1 100644
--- a/src/tree/mod.rs
+++ b/src/tree/mod.rs
@@ -1,16 +1,10 @@
use crate::{error::prelude::*, file::File, user::Context};
use ahash::{HashMap, HashSet};
-use ignore::Walk;
use indextree::{Arena, NodeId, Traverse};
-use std::{
- convert::TryFrom,
- fs,
- ops::Deref,
- path::PathBuf,
-};
+use std::{fs, ops::Deref, path::PathBuf};
-/// Utilities for parallel traversal and associated event publishing/consuming.
-mod parallel;
+/// Serial and parallel traversal routines.
+mod traverse;
/// Concerned with initializing [`Walk`] and [`WalkParallel`] from the user [`Context`].
mod walker;
@@ -25,14 +19,8 @@ pub struct FileTree {
/// Errors associated with [`FileTree`].
#[derive(Debug, thiserror::Error)]
pub enum TreeError {
- #[error("Failed to query the root directory")]
- RootDirMissing,
-
- #[error("Expected ancestor node to exist in arena")]
- ParentNode,
-
- #[error("Failed to compute directory size")]
- MissingDirSize,
+ #[error("Failed to extrapolate the root directory")]
+ RootDir,
}
impl FileTree {
@@ -44,108 +32,12 @@ impl FileTree {
self.root_id
}
- /// Initializes a [`FileTree`] completely on one thread.
- pub fn init(ctx: &Context) -> Result<Self> {
- let mut walker = Walk::try_from(ctx)?;
-
- let root_entry = walker
- .next()
- .ok_or(TreeError::RootDirMissing)
- .into_report(ErrorCategory::Internal)?;
-
- let root_node = root_entry
- .into_report(ErrorCategory::Internal)
- .and_then(|data| File::init(data, ctx).into_report(ErrorCategory::Internal))?;
-
- let mut arena = Arena::new();
- let root_node_id = arena.new_node(root_node);
- let mut current_dir_id = root_node_id;
-
- let mut dirsize_map = HashMap::default();
- let mut dir_stack = vec![];
-
- dirsize_map.insert(root_node_id, 0);
-
- // To prevent two or more files with the same underlying inode from
- // counted more than once which would lead to inaccurate disk usage.
- let mut inode_set = HashSet::default();
-
- for dent in walker {
- let node = match dent
- .into_report(ErrorCategory::Warning)
- .and_then(|data| File::init(data, ctx).into_report(ErrorCategory::Warning))
- {
- Ok(data) => data,
- Err(e) => {
- log::error!("{e}");
- continue;
- },
- };
-
- let size = match node.inode() {
- Ok(inode) => inode_set
- .insert(inode)
- .then_some(node.size().value())
- .unwrap_or(0),
- Err(e) => {
- log::error!("{e}");
- node.size().value()
- },
- };
-
- // Check if new node is directory before we transfer ownership to `arena`.
- let is_dir = node.file_type().is_some_and(|ft| ft.is_dir());
-
- let new_node_id = arena.new_node(node);
-
- current_dir_id.append(new_node_id, &mut arena);
-
- let parent_dir_id = new_node_id
- .ancestors(&arena)
- .nth(1) // skip self
- .ok_or(TreeError::ParentNode)
- .into_report(ErrorCategory::Internal)
- .context(error_source!())?;
-
- if let Some(parent_size) = dirsize_map.get_mut(&parent_dir_id) {
- *parent_size += size;
- } else {
- dirsize_map.insert(parent_dir_id, size);
- }
-
- if is_dir {
- dir_stack.push(new_node_id);
- current_dir_id = new_node_id;
- }
- }
-
- while let Some(node_id) = dir_stack.pop() {
- let node_size = dirsize_map
- .remove(&node_id)
- .ok_or(TreeError::MissingDirSize)
- .into_report(ErrorCategory::Internal)
- .context(error_source!())?;
-
- let parent_size = node_id
- .ancestors(&arena)
- .nth(1) // skip self
- .and_then(|parent_dir_id| dirsize_map.get_mut(&parent_dir_id))
- .ok_or(TreeError::ParentNode)
- .into_report(ErrorCategory::Internal)
- .context(error_source!())?;
-
- *parent_size += node_size;
- }
-
- Ok(Self::new(root_node_id, arena))
- }
-
/// Like [`FileTree::init`] but leverages parallelism for disk-reads and [`File`] initialization.
- pub fn init_parallel(ctx: &Context) -> Result<Self> {
+ pub fn init(ctx: &Context) -> Result<Self> {
let mut arena = Arena::new();
let mut branches = HashMap::<PathBuf, Vec<NodeId>>::default();
- parallel::run(ctx, |file| {
+ traverse::run(ctx, |file| {
let node_id = arena.new_node(file);
let file = arena[node_id].get();
let file_path = file.path();
@@ -172,19 +64,19 @@ impl FileTree {
.parent()
.and_then(|p| branches.get(p))
.and_then(|b| (b.len() == 1).then(|| b[0]))
- .ok_or(TreeError::RootDirMissing)
+ .ok_or(TreeError::RootDir)
.into_report(ErrorCategory::Internal)?;
- let mut dfs_queue = vec![root_id];
+ let mut dfs_stack = vec![root_id];
let mut inode_set = HashSet::default();
- 'outer: while let Some(node_id) = dfs_queue.last() {
+ 'outer: while let Some(node_id) = dfs_stack.last() {
let current_id = *node_id;
let current_node_path = arena[current_id].get().path();
let Some(children) = branches.get_mut(current_node_path) else {
- dfs_queue.pop();
+ dfs_stack.pop();
continue;
};
@@ -204,31 +96,34 @@ impl FileTree {
err
);
continue;
- }
+ },
};
(size, is_dir, inode)
};
if child_is_dir {
- dfs_queue.push(child_node_id);
+ dfs_stack.push(child_node_id);
continue 'outer;
}
if inode_set.insert(child_inode) {
*arena[current_id].get_mut().size_mut() += child_size;
}
+
+ *arena[current_id].get_mut().size_mut() += inode_set
+ .insert(child_inode)
+ .then_some(child_size)
+ .unwrap_or(0);
}
- dfs_queue.pop();
+ dfs_stack.pop();
if let Some(parent_id) = current_id.ancestors(&arena).skip(1).nth(0) {
- let current_dir_size = {
- arena[current_id].get().size().value()
- };
+ let current_dir_size = { arena[current_id].get().size().value() };
*arena[parent_id].get_mut().size_mut() += current_dir_size;
}
}
-
+
Ok(Self { root_id, arena })
}
diff --git a/src/tree/parallel.rs b/src/tree/traverse.rs
index 3a611f9..153ea6e 100644
--- a/src/tree/parallel.rs
+++ b/src/tree/traverse.rs
@@ -7,6 +7,16 @@ use std::{
thread,
};
+/// Errors that may arise whe reading from Disk.
+#[derive(Debug, thiserror::Error)]
+pub enum TraverseError {
+ #[error("Failed to query the root directory")]
+ RootDirMissing,
+}
+
+/// Parallel traversal algorithm. `op` takes in a single argument which is the [`File`] that is
+/// retrieved from disk, returning a [`Result`]. If `op` returns an `Err` then traversal will
+/// immediately conclude.
pub fn run<F>(ctx: &Context, mut op: F) -> Result<()>
where
F: FnMut(File) -> Result<()> + Send,
@@ -38,6 +48,12 @@ where
Ok(())
}
+pub enum TraversalState {
+ Error(Error),
+ Ongoing(File),
+ Done,
+}
+
pub struct Visitor<'a> {
tx: Sender<TraversalState>,
ctx: &'a Context,
@@ -48,12 +64,6 @@ pub struct VisitorBuilder<'a> {
ctx: &'a Context,
}
-pub enum TraversalState {
- Error(Error),
- Ongoing(File),
- Done,
-}
-
impl<'a> VisitorBuilder<'a> {
pub fn new(tx: Sender<TraversalState>, ctx: &'a Context) -> Self {
Self { tx, ctx }