From 6e27b5dc17876f11c0e4f602b912e99c078da2dd Mon Sep 17 00:00:00 2001 From: sharkdp Date: Sat, 21 Sep 2019 14:00:59 +0200 Subject: Simplify structure --- Cargo.toml | 4 ++ src/lib.rs | 1 + src/main.rs | 4 +- src/unique_id.rs | 32 +++++++++++++++ src/walk.rs | 110 +++++++++++++++++++++++++++++++++++++++++++++++++ src/walk/mod.rs | 112 -------------------------------------------------- src/walk/unique_id.rs | 32 --------------- 7 files changed, 148 insertions(+), 147 deletions(-) create mode 100644 src/unique_id.rs create mode 100644 src/walk.rs delete mode 100644 src/walk/mod.rs delete mode 100644 src/walk/unique_id.rs diff --git a/Cargo.toml b/Cargo.toml index bf20b92..b45b1c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,10 @@ features = ["suggestions", "color", "wrap_help"] [dev-dependencies] tempdir = "0.3" +[[bin]] +name = "diskus" +path = "src/main.rs" + [profile.release] lto = true codegen-units = 1 diff --git a/src/lib.rs b/src/lib.rs index c57b22f..0867b2b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,6 +10,7 @@ //! let (size_in_bytes, errors) = walk.run(); //! ``` +mod unique_id; pub mod walk; pub use crate::walk::Walk; diff --git a/src/main.rs b/src/main.rs index c2d2163..7921df0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,3 @@ -mod walk; - use std::path::PathBuf; use clap::{crate_name, crate_version, App, AppSettings, Arg}; @@ -7,7 +5,7 @@ use humansize::file_size_opts::{self, FileSizeOpts}; use humansize::FileSize; use num_format::{Locale, ToFormattedString}; -use crate::walk::Walk; +use diskus::walk::{self, Walk}; fn print_result(size: u64, errors: &[walk::Err], size_format: &FileSizeOpts, verbose: bool) { if verbose { diff --git a/src/unique_id.rs b/src/unique_id.rs new file mode 100644 index 0000000..eb44e72 --- /dev/null +++ b/src/unique_id.rs @@ -0,0 +1,32 @@ +#[derive(Eq, PartialEq, Hash)] +pub struct UniqueID { + device: u64, + inode: u64, +} + +#[cfg(not(windows))] +pub fn generate_unique_id(metadata: &std::fs::Metadata) -> Option { + use std::os::unix::fs::MetadataExt; + // If the entry has more than one hard link, generate + // a unique ID consisting of device and inode in order + // not to count this entry twice. + if metadata.is_file() && metadata.nlink() > 1 { + Some(UniqueID { + device: metadata.dev(), + inode: metadata.ino(), + }) + } else { + None + } +} + +#[cfg(windows)] +pub fn generate_unique_id(_metadata: &std::fs::Metadata) -> Option { + // Windows-internal tools such as Powershell, Explorer or `dir` are not respecting hardlinks + // or junction points when determining the size of a directory. `diskus` does the same and + // counts such entries multiple times (on Unix systems, multiple hardlinks to a single file are + // counted just once). + // + // See: https://github.com/sharkdp/diskus/issues/32 + None +} diff --git a/src/walk.rs b/src/walk.rs new file mode 100644 index 0000000..25a2f73 --- /dev/null +++ b/src/walk.rs @@ -0,0 +1,110 @@ +use std::collections::HashSet; +use std::fs; +use std::path::PathBuf; +use std::thread; + +use crossbeam_channel as channel; + +use rayon::{self, prelude::*}; + +use crate::unique_id::{generate_unique_id, UniqueID}; + +pub enum Err { + NoMetadataForPath(PathBuf), + CouldNotReadDir(PathBuf), +} + +enum Message { + SizeEntry(Option, u64), + Error { err: Err }, +} + +fn walk(tx: channel::Sender, entries: &[PathBuf]) { + entries.into_par_iter().for_each_with(tx, |tx_ref, entry| { + if let Ok(metadata) = entry.symlink_metadata() { + let unique_id = generate_unique_id(&metadata); + + let size = metadata.len(); + + tx_ref.send(Message::SizeEntry(unique_id, size)).unwrap(); + + if metadata.is_dir() { + let mut children = vec![]; + match fs::read_dir(entry) { + Ok(child_entries) => { + for child_entry in child_entries { + if let Ok(child_entry) = child_entry { + children.push(child_entry.path()); + } + } + } + Err(_) => { + tx_ref + .send(Message::Error { + err: Err::CouldNotReadDir(entry.clone()), + }) + .unwrap(); + } + } + + walk(tx_ref.clone(), &children[..]); + }; + } else { + tx_ref + .send(Message::Error { + err: Err::NoMetadataForPath(entry.clone()), + }) + .unwrap(); + }; + }); +} + +pub struct Walk<'a> { + root_directories: &'a [PathBuf], + num_threads: usize, +} + +impl<'a> Walk<'a> { + pub fn new(root_directories: &'a [PathBuf], num_threads: usize) -> Walk { + Walk { + root_directories, + num_threads, + } + } + + pub fn run(&self) -> (u64, Vec) { + let (tx, rx) = channel::unbounded(); + + let receiver_thread = thread::spawn(move || { + let mut total = 0; + let mut ids = HashSet::new(); + let mut error_messages: Vec = Vec::new(); + for msg in rx { + match msg { + Message::SizeEntry(unique_id, size) => { + if let Some(unique_id) = unique_id { + // Only count this entry if the ID has not been seen + if ids.insert(unique_id) { + total += size; + } + } else { + total += size; + } + } + Message::Error { err } => { + error_messages.push(err); + } + } + } + (total, error_messages) + }); + + let pool = rayon::ThreadPoolBuilder::new() + .num_threads(self.num_threads) + .build() + .unwrap(); + pool.install(|| walk(tx, self.root_directories)); + + receiver_thread.join().unwrap() + } +} diff --git a/src/walk/mod.rs b/src/walk/mod.rs deleted file mode 100644 index d826096..0000000 --- a/src/walk/mod.rs +++ /dev/null @@ -1,112 +0,0 @@ -use std::collections::HashSet; -use std::fs; -use std::path::PathBuf; -use std::thread; - -use crossbeam_channel as channel; - -use rayon::{self, prelude::*}; - -mod unique_id; - -use unique_id::{generate_unique_id, UniqueID}; - -pub enum Err { - NoMetadataForPath(PathBuf), - CouldNotReadDir(PathBuf), -} - -enum Message { - SizeEntry(Option, u64), - Error { err: Err }, -} - -fn walk(tx: channel::Sender, entries: &[PathBuf]) { - entries.into_par_iter().for_each_with(tx, |tx_ref, entry| { - if let Ok(metadata) = entry.symlink_metadata() { - let unique_id = generate_unique_id(&metadata); - - let size = metadata.len(); - - tx_ref.send(Message::SizeEntry(unique_id, size)).unwrap(); - - if metadata.is_dir() { - let mut children = vec![]; - match fs::read_dir(entry) { - Ok(child_entries) => { - for child_entry in child_entries { - if let Ok(child_entry) = child_entry { - children.push(child_entry.path()); - } - } - } - Err(_) => { - tx_ref - .send(Message::Error { - err: Err::CouldNotReadDir(entry.clone()), - }) - .unwrap(); - } - } - - walk(tx_ref.clone(), &children[..]); - }; - } else { - tx_ref - .send(Message::Error { - err: Err::NoMetadataForPath(entry.clone()), - }) - .unwrap(); - }; - }); -} - -pub struct Walk<'a> { - root_directories: &'a [PathBuf], - num_threads: usize, -} - -impl<'a> Walk<'a> { - pub fn new(root_directories: &'a [PathBuf], num_threads: usize) -> Walk { - Walk { - root_directories, - num_threads, - } - } - - pub fn run(&self) -> (u64, Vec) { - let (tx, rx) = channel::unbounded(); - - let receiver_thread = thread::spawn(move || { - let mut total = 0; - let mut ids = HashSet::new(); - let mut error_messages: Vec = Vec::new(); - for msg in rx { - match msg { - Message::SizeEntry(unique_id, size) => { - if let Some(unique_id) = unique_id { - // Only count this entry if the ID has not been seen - if ids.insert(unique_id) { - total += size; - } - } else { - total += size; - } - } - Message::Error { err } => { - error_messages.push(err); - } - } - } - (total, error_messages) - }); - - let pool = rayon::ThreadPoolBuilder::new() - .num_threads(self.num_threads) - .build() - .unwrap(); - pool.install(|| walk(tx, self.root_directories)); - - receiver_thread.join().unwrap() - } -} diff --git a/src/walk/unique_id.rs b/src/walk/unique_id.rs deleted file mode 100644 index eb44e72..0000000 --- a/src/walk/unique_id.rs +++ /dev/null @@ -1,32 +0,0 @@ -#[derive(Eq, PartialEq, Hash)] -pub struct UniqueID { - device: u64, - inode: u64, -} - -#[cfg(not(windows))] -pub fn generate_unique_id(metadata: &std::fs::Metadata) -> Option { - use std::os::unix::fs::MetadataExt; - // If the entry has more than one hard link, generate - // a unique ID consisting of device and inode in order - // not to count this entry twice. - if metadata.is_file() && metadata.nlink() > 1 { - Some(UniqueID { - device: metadata.dev(), - inode: metadata.ino(), - }) - } else { - None - } -} - -#[cfg(windows)] -pub fn generate_unique_id(_metadata: &std::fs::Metadata) -> Option { - // Windows-internal tools such as Powershell, Explorer or `dir` are not respecting hardlinks - // or junction points when determining the size of a directory. `diskus` does the same and - // counts such entries multiple times (on Unix systems, multiple hardlinks to a single file are - // counted just once). - // - // See: https://github.com/sharkdp/diskus/issues/32 - None -} -- cgit v1.2.3