diff options
author | Jon Moroney <darakian@gmail.com> | 2022-03-21 18:18:35 -0700 |
---|---|---|
committer | Jon Moroney <darakian@gmail.com> | 2022-03-21 18:20:09 -0700 |
commit | 166401766a69ddb2f9180677ac381073f451c6a0 (patch) | |
tree | 4544f31ae4fd04200da8ab9d0564b0a7721cbbe2 | |
parent | e4c574ffd9855384cb35ec7534d3eba6b03ba867 (diff) |
First pass at directory exclusion logic
-rw-r--r-- | src/lib.rs | 23 | ||||
-rw-r--r-- | src/main.rs | 11 |
2 files changed, 22 insertions, 12 deletions
@@ -24,19 +24,17 @@ enum ChannelPackage { /// let directories = vec!["/home/jon", "/home/doe"]; /// let (files, errors) = ddh::deduplicate_dirs(directories); /// ``` -pub fn deduplicate_dirs<P: AsRef<Path> + Sync>( - search_dirs: Vec<P>, -) -> (Vec<Fileinfo>, Vec<(PathBuf, std::io::Error)>) { - deduplicate_dirs_with_min(search_dirs, 0) -} -pub fn deduplicate_dirs_with_min<P: AsRef<Path> + Sync>( - search_dirs: Vec<P>, min_size: u64) -> (Vec<Fileinfo>, Vec<(PathBuf, std::io::Error)>) { +pub fn deduplicate_dirs<P: AsRef<Path> + Sync>( + search_dirs: Vec<P>, + ignore_dirs: Vec<P>, + min_size: u64) -> (Vec<Fileinfo>, Vec<(PathBuf, std::io::Error)>) { let (sender, receiver) = channel(); + let ignore_paths = ignore_dirs.iter().map(|x| x.as_ref().canonicalize().unwrap()).collect(); search_dirs .par_iter() .for_each_with(sender, |s, search_dir| { - traverse_and_spawn(search_dir.as_ref(), s.clone(), min_size); + traverse_and_spawn(search_dir.as_ref(), &ignore_paths, s.clone(), min_size); }); let mut files_of_lengths: IntMap<u64, Vec<Fileinfo>> = IntMap::default(); let mut errors = Vec::new(); @@ -61,7 +59,10 @@ pub fn deduplicate_dirs_with_min<P: AsRef<Path> + Sync>( (complete_files, errors) } -fn traverse_and_spawn(current_path: impl AsRef<Path>, sender: Sender<ChannelPackage>, min_size: u64) { +fn traverse_and_spawn(current_path: impl AsRef<Path>, ignore_dirs: &Vec<PathBuf>, sender: Sender<ChannelPackage>, min_size: u64) { + if current_path.as_ref().canonicalize().is_ok() && ignore_dirs.iter().any(|x| current_path.as_ref().canonicalize().unwrap().starts_with(x)){ + return; + } let current_path_metadata = match fs::symlink_metadata(¤t_path) { Err(e) => { sender @@ -104,10 +105,10 @@ fn traverse_and_spawn(current_path: impl AsRef<Path>, sender: Sender<ChannelPack .is_file() }); files.par_iter().for_each_with(sender.clone(), |sender, x| { - traverse_and_spawn(&x.path(), sender.clone(), min_size) + traverse_and_spawn(&x.path(), ignore_dirs, sender.clone(), min_size) }); dirs.into_par_iter().for_each_with(sender, |sender, x| { - traverse_and_spawn(x.path().as_path(), sender.clone(), min_size); + traverse_and_spawn(x.path().as_path(), ignore_dirs, sender.clone(), min_size); }) } Err(e) => { diff --git a/src/main.rs b/src/main.rs index ecb9ac1..6dab7f8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -34,6 +34,14 @@ fn main() { .min_values(1) .required(true) .takes_value(true)) + .arg(Arg::new("ignore") + .short('i') + .long("ignore") + .value_name("Ignore") + .help("Directories to ignore") + .min_values(1) + .required(false) + .takes_value(true)) .arg(Arg::new("Blocksize") .short('b') .long("blocksize") @@ -74,10 +82,11 @@ fn main() { //let (sender, receiver) = channel(); let search_dirs: Vec<_> = arguments.values_of("directories").unwrap().collect(); + let ignore_dirs: Vec<_> = arguments.values_of("ignore").unwrap().collect(); let min_size: u64 = arguments.value_of("Minimum").unwrap().parse::<u64>().unwrap_or(0); let (complete_files, read_errors): (Vec<Fileinfo>, Vec<(_, _)>) = - ddh::deduplicate_dirs_with_min(search_dirs, min_size); + ddh::deduplicate_dirs(search_dirs, ignore_dirs, min_size); let (shared_files, unique_files): (Vec<&Fileinfo>, Vec<&Fileinfo>) = complete_files .par_iter() .partition(|&x| x.get_paths().len() > 1); |