summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Moroney <darakian@gmail.com>2022-03-21 18:18:35 -0700
committerJon Moroney <darakian@gmail.com>2022-03-21 18:20:09 -0700
commit166401766a69ddb2f9180677ac381073f451c6a0 (patch)
tree4544f31ae4fd04200da8ab9d0564b0a7721cbbe2
parente4c574ffd9855384cb35ec7534d3eba6b03ba867 (diff)
First pass at directory exclusion logic
-rw-r--r--src/lib.rs23
-rw-r--r--src/main.rs11
2 files changed, 22 insertions, 12 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 7cb6333..d5de6c9 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -24,19 +24,17 @@ enum ChannelPackage {
/// let directories = vec!["/home/jon", "/home/doe"];
/// let (files, errors) = ddh::deduplicate_dirs(directories);
/// ```
-pub fn deduplicate_dirs<P: AsRef<Path> + Sync>(
- search_dirs: Vec<P>,
-) -> (Vec<Fileinfo>, Vec<(PathBuf, std::io::Error)>) {
- deduplicate_dirs_with_min(search_dirs, 0)
-}
-pub fn deduplicate_dirs_with_min<P: AsRef<Path> + Sync>(
- search_dirs: Vec<P>, min_size: u64) -> (Vec<Fileinfo>, Vec<(PathBuf, std::io::Error)>) {
+pub fn deduplicate_dirs<P: AsRef<Path> + Sync>(
+ search_dirs: Vec<P>,
+ ignore_dirs: Vec<P>,
+ min_size: u64) -> (Vec<Fileinfo>, Vec<(PathBuf, std::io::Error)>) {
let (sender, receiver) = channel();
+ let ignore_paths = ignore_dirs.iter().map(|x| x.as_ref().canonicalize().unwrap()).collect();
search_dirs
.par_iter()
.for_each_with(sender, |s, search_dir| {
- traverse_and_spawn(search_dir.as_ref(), s.clone(), min_size);
+ traverse_and_spawn(search_dir.as_ref(), &ignore_paths, s.clone(), min_size);
});
let mut files_of_lengths: IntMap<u64, Vec<Fileinfo>> = IntMap::default();
let mut errors = Vec::new();
@@ -61,7 +59,10 @@ pub fn deduplicate_dirs_with_min<P: AsRef<Path> + Sync>(
(complete_files, errors)
}
-fn traverse_and_spawn(current_path: impl AsRef<Path>, sender: Sender<ChannelPackage>, min_size: u64) {
+fn traverse_and_spawn(current_path: impl AsRef<Path>, ignore_dirs: &Vec<PathBuf>, sender: Sender<ChannelPackage>, min_size: u64) {
+ if current_path.as_ref().canonicalize().is_ok() && ignore_dirs.iter().any(|x| current_path.as_ref().canonicalize().unwrap().starts_with(x)){
+ return;
+ }
let current_path_metadata = match fs::symlink_metadata(&current_path) {
Err(e) => {
sender
@@ -104,10 +105,10 @@ fn traverse_and_spawn(current_path: impl AsRef<Path>, sender: Sender<ChannelPack
.is_file()
});
files.par_iter().for_each_with(sender.clone(), |sender, x| {
- traverse_and_spawn(&x.path(), sender.clone(), min_size)
+ traverse_and_spawn(&x.path(), ignore_dirs, sender.clone(), min_size)
});
dirs.into_par_iter().for_each_with(sender, |sender, x| {
- traverse_and_spawn(x.path().as_path(), sender.clone(), min_size);
+ traverse_and_spawn(x.path().as_path(), ignore_dirs, sender.clone(), min_size);
})
}
Err(e) => {
diff --git a/src/main.rs b/src/main.rs
index ecb9ac1..6dab7f8 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -34,6 +34,14 @@ fn main() {
.min_values(1)
.required(true)
.takes_value(true))
+ .arg(Arg::new("ignore")
+ .short('i')
+ .long("ignore")
+ .value_name("Ignore")
+ .help("Directories to ignore")
+ .min_values(1)
+ .required(false)
+ .takes_value(true))
.arg(Arg::new("Blocksize")
.short('b')
.long("blocksize")
@@ -74,10 +82,11 @@ fn main() {
//let (sender, receiver) = channel();
let search_dirs: Vec<_> = arguments.values_of("directories").unwrap().collect();
+ let ignore_dirs: Vec<_> = arguments.values_of("ignore").unwrap().collect();
let min_size: u64 = arguments.value_of("Minimum").unwrap().parse::<u64>().unwrap_or(0);
let (complete_files, read_errors): (Vec<Fileinfo>, Vec<(_, _)>) =
- ddh::deduplicate_dirs_with_min(search_dirs, min_size);
+ ddh::deduplicate_dirs(search_dirs, ignore_dirs, min_size);
let (shared_files, unique_files): (Vec<&Fileinfo>, Vec<&Fileinfo>) = complete_files
.par_iter()
.partition(|&x| x.get_paths().len() > 1);