diff options
author | Benjamin Nguyen <benjamin.van.nguyen@gmail.com> | 2023-11-16 20:49:22 -0800 |
---|---|---|
committer | Benjamin Nguyen <benjamin.van.nguyen@gmail.com> | 2023-11-16 20:49:22 -0800 |
commit | 1d47c8e71e96179ee9366ec72c40a6ab79188a71 (patch) | |
tree | ec9f5952424b1477588444384f12953e67fb40d9 | |
parent | 3c68b84a392715a8ecfc210ce0728e1648cbd5f2 (diff) |
parallel traversal
-rw-r--r-- | Cargo.toml | 8 | ||||
-rw-r--r-- | src/main.rs | 19 | ||||
-rw-r--r-- | src/tree/mod.rs | 147 | ||||
-rw-r--r-- | src/tree/traverse.rs (renamed from src/tree/parallel.rs) | 22 |
4 files changed, 51 insertions, 145 deletions
@@ -15,7 +15,7 @@ keywords = ["tree", "find", "ls", "du", "commandline"] exclude = ["assets/*", "scripts/*", "example/*"] readme = "README.md" license = "MIT" -rust-version = "1.73.0" +rust-version = "1.74.0" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -23,6 +23,12 @@ rust-version = "1.73.0" name = "erd" path = "src/main.rs" +[lints.clippy] +cast_precision_loss = "allow" +struct_excessive_bools = "allow" +wildcard_imports = "allow" +obfuscated_if_else = "allow" + [dependencies] ahash = "0.8.6" ansi_term = "0.12.1" diff --git a/src/main.rs b/src/main.rs index 40924a0..b4b785a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,4 @@ #![cfg_attr(windows, feature(windows_by_handle))] -#![allow( - clippy::cast_precision_loss, - clippy::struct_excessive_bools, - clippy::wildcard_imports, - clippy::obfuscated_if_else -)] use log::Log; use std::process::ExitCode; @@ -43,13 +37,14 @@ fn run() -> error::Result<()> { .then_some(logging::LoggityLog::init()) .transpose()?; - let file_tree = if ctx.threads > 1 { - FileTree::init_parallel(&ctx)? - } else { - FileTree::init(&ctx)? - }; + let file_tree = FileTree::init(&ctx)?; - //println!("{}", file_tree[file_tree.root_id()].get().size().value()); + for node in file_tree.traverse() { + if let indextree::NodeEdge::Start(id) = node { + let node = file_tree[id].get(); + println!("{} -> {}", node.path().display(), node.size().value()); + } + } if let Some(logger) = logger { logger.flush(); diff --git a/src/tree/mod.rs b/src/tree/mod.rs index 4b3ff80..1d3c2c1 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -1,16 +1,10 @@ use crate::{error::prelude::*, file::File, user::Context}; use ahash::{HashMap, HashSet}; -use ignore::Walk; use indextree::{Arena, NodeId, Traverse}; -use std::{ - convert::TryFrom, - fs, - ops::Deref, - path::PathBuf, -}; +use std::{fs, ops::Deref, path::PathBuf}; -/// Utilities for parallel traversal and associated event publishing/consuming. -mod parallel; +/// Serial and parallel traversal routines. +mod traverse; /// Concerned with initializing [`Walk`] and [`WalkParallel`] from the user [`Context`]. mod walker; @@ -25,14 +19,8 @@ pub struct FileTree { /// Errors associated with [`FileTree`]. #[derive(Debug, thiserror::Error)] pub enum TreeError { - #[error("Failed to query the root directory")] - RootDirMissing, - - #[error("Expected ancestor node to exist in arena")] - ParentNode, - - #[error("Failed to compute directory size")] - MissingDirSize, + #[error("Failed to extrapolate the root directory")] + RootDir, } impl FileTree { @@ -44,108 +32,12 @@ impl FileTree { self.root_id } - /// Initializes a [`FileTree`] completely on one thread. - pub fn init(ctx: &Context) -> Result<Self> { - let mut walker = Walk::try_from(ctx)?; - - let root_entry = walker - .next() - .ok_or(TreeError::RootDirMissing) - .into_report(ErrorCategory::Internal)?; - - let root_node = root_entry - .into_report(ErrorCategory::Internal) - .and_then(|data| File::init(data, ctx).into_report(ErrorCategory::Internal))?; - - let mut arena = Arena::new(); - let root_node_id = arena.new_node(root_node); - let mut current_dir_id = root_node_id; - - let mut dirsize_map = HashMap::default(); - let mut dir_stack = vec![]; - - dirsize_map.insert(root_node_id, 0); - - // To prevent two or more files with the same underlying inode from - // counted more than once which would lead to inaccurate disk usage. - let mut inode_set = HashSet::default(); - - for dent in walker { - let node = match dent - .into_report(ErrorCategory::Warning) - .and_then(|data| File::init(data, ctx).into_report(ErrorCategory::Warning)) - { - Ok(data) => data, - Err(e) => { - log::error!("{e}"); - continue; - }, - }; - - let size = match node.inode() { - Ok(inode) => inode_set - .insert(inode) - .then_some(node.size().value()) - .unwrap_or(0), - Err(e) => { - log::error!("{e}"); - node.size().value() - }, - }; - - // Check if new node is directory before we transfer ownership to `arena`. - let is_dir = node.file_type().is_some_and(|ft| ft.is_dir()); - - let new_node_id = arena.new_node(node); - - current_dir_id.append(new_node_id, &mut arena); - - let parent_dir_id = new_node_id - .ancestors(&arena) - .nth(1) // skip self - .ok_or(TreeError::ParentNode) - .into_report(ErrorCategory::Internal) - .context(error_source!())?; - - if let Some(parent_size) = dirsize_map.get_mut(&parent_dir_id) { - *parent_size += size; - } else { - dirsize_map.insert(parent_dir_id, size); - } - - if is_dir { - dir_stack.push(new_node_id); - current_dir_id = new_node_id; - } - } - - while let Some(node_id) = dir_stack.pop() { - let node_size = dirsize_map - .remove(&node_id) - .ok_or(TreeError::MissingDirSize) - .into_report(ErrorCategory::Internal) - .context(error_source!())?; - - let parent_size = node_id - .ancestors(&arena) - .nth(1) // skip self - .and_then(|parent_dir_id| dirsize_map.get_mut(&parent_dir_id)) - .ok_or(TreeError::ParentNode) - .into_report(ErrorCategory::Internal) - .context(error_source!())?; - - *parent_size += node_size; - } - - Ok(Self::new(root_node_id, arena)) - } - /// Like [`FileTree::init`] but leverages parallelism for disk-reads and [`File`] initialization. - pub fn init_parallel(ctx: &Context) -> Result<Self> { + pub fn init(ctx: &Context) -> Result<Self> { let mut arena = Arena::new(); let mut branches = HashMap::<PathBuf, Vec<NodeId>>::default(); - parallel::run(ctx, |file| { + traverse::run(ctx, |file| { let node_id = arena.new_node(file); let file = arena[node_id].get(); let file_path = file.path(); @@ -172,19 +64,19 @@ impl FileTree { .parent() .and_then(|p| branches.get(p)) .and_then(|b| (b.len() == 1).then(|| b[0])) - .ok_or(TreeError::RootDirMissing) + .ok_or(TreeError::RootDir) .into_report(ErrorCategory::Internal)?; - let mut dfs_queue = vec![root_id]; + let mut dfs_stack = vec![root_id]; let mut inode_set = HashSet::default(); - 'outer: while let Some(node_id) = dfs_queue.last() { + 'outer: while let Some(node_id) = dfs_stack.last() { let current_id = *node_id; let current_node_path = arena[current_id].get().path(); let Some(children) = branches.get_mut(current_node_path) else { - dfs_queue.pop(); + dfs_stack.pop(); continue; }; @@ -204,31 +96,34 @@ impl FileTree { err ); continue; - } + }, }; (size, is_dir, inode) }; if child_is_dir { - dfs_queue.push(child_node_id); + dfs_stack.push(child_node_id); continue 'outer; } if inode_set.insert(child_inode) { *arena[current_id].get_mut().size_mut() += child_size; } + + *arena[current_id].get_mut().size_mut() += inode_set + .insert(child_inode) + .then_some(child_size) + .unwrap_or(0); } - dfs_queue.pop(); + dfs_stack.pop(); if let Some(parent_id) = current_id.ancestors(&arena).skip(1).nth(0) { - let current_dir_size = { - arena[current_id].get().size().value() - }; + let current_dir_size = { arena[current_id].get().size().value() }; *arena[parent_id].get_mut().size_mut() += current_dir_size; } } - + Ok(Self { root_id, arena }) } diff --git a/src/tree/parallel.rs b/src/tree/traverse.rs index 3a611f9..153ea6e 100644 --- a/src/tree/parallel.rs +++ b/src/tree/traverse.rs @@ -7,6 +7,16 @@ use std::{ thread, }; +/// Errors that may arise whe reading from Disk. +#[derive(Debug, thiserror::Error)] +pub enum TraverseError { + #[error("Failed to query the root directory")] + RootDirMissing, +} + +/// Parallel traversal algorithm. `op` takes in a single argument which is the [`File`] that is +/// retrieved from disk, returning a [`Result`]. If `op` returns an `Err` then traversal will +/// immediately conclude. pub fn run<F>(ctx: &Context, mut op: F) -> Result<()> where F: FnMut(File) -> Result<()> + Send, @@ -38,6 +48,12 @@ where Ok(()) } +pub enum TraversalState { + Error(Error), + Ongoing(File), + Done, +} + pub struct Visitor<'a> { tx: Sender<TraversalState>, ctx: &'a Context, @@ -48,12 +64,6 @@ pub struct VisitorBuilder<'a> { ctx: &'a Context, } -pub enum TraversalState { - Error(Error), - Ongoing(File), - Done, -} - impl<'a> VisitorBuilder<'a> { pub fn new(tx: Sender<TraversalState>, ctx: &'a Context) -> Self { Self { tx, ctx } |