summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYvan Sraka <yvan@sraka.xyz>2021-10-22 21:32:55 +0200
committerYvan Sraka <yvan@sraka.xyz>2021-10-22 21:32:55 +0200
commit1c7550d6974943e19dd8dd763dcfc9b6da326f8b (patch)
treea66b3127b3b7992697d72fd560c733d389ce60fc
parent4961458f27f503bc38d4088fc175c84e5265616f (diff)
% cargo fmt
-rw-r--r--src/fileinfo.rs101
-rw-r--r--src/lib.rs207
-rw-r--r--src/main.rs294
3 files changed, 363 insertions, 239 deletions
diff --git a/src/fileinfo.rs b/src/fileinfo.rs
index 3ca67bb..7a62780 100644
--- a/src/fileinfo.rs
+++ b/src/fileinfo.rs
@@ -1,29 +1,29 @@
-use serde::ser::{Serialize, Serializer, SerializeStruct};
+use serde::ser::{Serialize, SerializeStruct, Serializer};
use siphasher::sip128::Hasher128;
-use std::hash::Hasher;
-use std::path::PathBuf;
use std::cmp::Ordering;
-use std::io::Read;
use std::fs::{self, Metadata};
+use std::hash::Hasher;
+use std::io::Read;
+use std::path::PathBuf;
const BLOCK_SIZE: usize = 4096;
#[derive(PartialEq)]
-pub enum HashMode{
+pub enum HashMode {
Full,
- Partial
+ Partial,
}
/// Serializable struct containing entries for a specific file. These structs will identify individual files as a collection of paths and associated hash and length data.
#[derive(Debug)]
-pub struct Fileinfo{
+pub struct Fileinfo {
full_hash: Option<u128>,
partial_hash: Option<u128>,
metadata: Metadata,
pub(crate) file_paths: Vec<PathBuf>,
}
-impl Fileinfo{
+impl Fileinfo {
/// Creates a new Fileinfo collection struct.
///
/// # Examples
@@ -42,8 +42,13 @@ impl Fileinfo{
/// Ok(())
/// }
/// ```
- pub fn new(full: Option<u128>, partial: Option<u128>, meta: Metadata, path: PathBuf) -> Self{
- Fileinfo{full_hash: full, partial_hash: partial, metadata: meta, file_paths: vec![path]}
+ pub fn new(full: Option<u128>, partial: Option<u128>, meta: Metadata, path: PathBuf) -> Self {
+ Fileinfo {
+ full_hash: full,
+ partial_hash: partial,
+ metadata: meta,
+ file_paths: vec![path],
+ }
}
/// Gets the length of the files in the current collection.
///
@@ -60,7 +65,7 @@ impl Fileinfo{
/// Ok(())
/// }
/// ```
- pub fn get_length(&self) -> u64{
+ pub fn get_length(&self) -> u64 {
self.metadata.len()
}
/// Gets the hash of the full file if available.
@@ -78,7 +83,7 @@ impl Fileinfo{
/// Ok(())
/// }
/// ```
- pub fn get_full_hash(&self) -> Option<u128>{
+ pub fn get_full_hash(&self) -> Option<u128> {
self.full_hash
}
pub(crate) fn set_full_hash(&mut self, hash: Option<u128>) {
@@ -99,7 +104,7 @@ impl Fileinfo{
/// Ok(())
/// }
/// ```
- pub fn get_partial_hash(&self) -> Option<u128>{
+ pub fn get_partial_hash(&self) -> Option<u128> {
self.partial_hash
}
pub(crate) fn set_partial_hash(&mut self, hash: Option<u128>) {
@@ -120,15 +125,15 @@ impl Fileinfo{
/// Ok(())
/// }
/// ```
- pub fn get_candidate_name(&self) -> &str{
+ pub fn get_candidate_name(&self) -> &str {
self.file_paths
- .get(0)
- .unwrap()
- .to_str()
- .unwrap()
- .rsplit('/')
- .next()
- .unwrap()
+ .get(0)
+ .unwrap()
+ .to_str()
+ .unwrap()
+ .rsplit('/')
+ .next()
+ .unwrap()
}
/// Gets all paths in the current collection. This can be used to get the names of each file with the string `rsplit("/")` method.
///
@@ -146,46 +151,42 @@ impl Fileinfo{
/// Ok(())
/// }
/// ```
- pub fn get_paths(&self) -> &Vec<PathBuf>{
+ pub fn get_paths(&self) -> &Vec<PathBuf> {
&self.file_paths
}
- pub fn generate_hash(&mut self, mode: HashMode) -> Option<u128>{
+ pub fn generate_hash(&mut self, mode: HashMode) -> Option<u128> {
let mut hasher = siphasher::sip128::SipHasher::new();
match fs::File::open(
self.file_paths
- .get(0)
- .expect("Cannot read file path from struct")
- ) {
+ .get(0)
+ .expect("Cannot read file path from struct"),
+ ) {
Ok(mut f) => {
/* We want a read call to be "large" for two reasons
1) Force filesystem read ahead behavior
2) Fewer system calls for a given file.
Currently 16KB */
- let mut hash_buffer = [0;BLOCK_SIZE * 4];
+ let mut hash_buffer = [0; BLOCK_SIZE * 4];
loop {
match f.read(&mut hash_buffer) {
- Ok(n) if n>0 => hasher.write(&hash_buffer),
- Ok(n) if n==0 => break,
- Err(_e) => {
- return None
- },
+ Ok(n) if n > 0 => hasher.write(&hash_buffer),
+ Ok(n) if n == 0 => break,
+ Err(_e) => return None,
_ => panic!("Negative length read in hashing"),
- }
- if mode == HashMode::Partial{
+ }
+ if mode == HashMode::Partial {
return Some(hasher.finish128().into());
}
}
Some(hasher.finish128().into())
}
- Err(_e) => {
- None
- }
+ Err(_e) => None,
}
}
}
-impl Serialize for Fileinfo{
+impl Serialize for Fileinfo {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
@@ -199,20 +200,20 @@ impl Serialize for Fileinfo{
}
}
-impl PartialEq for Fileinfo{
+impl PartialEq for Fileinfo {
fn eq(&self, other: &Fileinfo) -> bool {
- (self.get_length()==other.get_length())&&
- (self.partial_hash==other.partial_hash)&&
- (self.full_hash==other.full_hash)
+ (self.get_length() == other.get_length())
+ && (self.partial_hash == other.partial_hash)
+ && (self.full_hash == other.full_hash)
}
}
-impl Eq for Fileinfo{}
+impl Eq for Fileinfo {}
-impl PartialOrd for Fileinfo{
- fn partial_cmp(&self, other: &Fileinfo) -> Option<Ordering>{
- if self.full_hash.is_some() && other.full_hash.is_some(){
+impl PartialOrd for Fileinfo {
+ fn partial_cmp(&self, other: &Fileinfo) -> Option<Ordering> {
+ if self.full_hash.is_some() && other.full_hash.is_some() {
Some(self.full_hash.cmp(&other.full_hash))
- } else if self.partial_hash.is_some() && other.partial_hash.is_some(){
+ } else if self.partial_hash.is_some() && other.partial_hash.is_some() {
Some(self.partial_hash.cmp(&other.partial_hash))
} else {
Some(self.get_length().cmp(&other.get_length()))
@@ -220,11 +221,11 @@ impl PartialOrd for Fileinfo{
}
}
-impl Ord for Fileinfo{
+impl Ord for Fileinfo {
fn cmp(&self, other: &Fileinfo) -> Ordering {
- if self.full_hash.is_some() && other.full_hash.is_some(){
+ if self.full_hash.is_some() && other.full_hash.is_some() {
self.full_hash.cmp(&other.full_hash)
- } else if self.partial_hash.is_some() && other.partial_hash.is_some(){
+ } else if self.partial_hash.is_some() && other.partial_hash.is_some() {
self.partial_hash.cmp(&other.partial_hash)
} else {
self.get_length().cmp(&other.get_length())
diff --git a/src/lib.rs b/src/lib.rs
index 2466aee..f2cc3d4 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -5,14 +5,14 @@
pub mod fileinfo;
use fileinfo::{Fileinfo, HashMode};
-use std::fs::{self, DirEntry};
-use std::path::{PathBuf, Path};
-use rayon::prelude::*;
-use std::sync::mpsc::{Sender, channel};
-use std::collections::hash_map::{HashMap, Entry};
use nohash_hasher::IntMap;
+use rayon::prelude::*;
+use std::collections::hash_map::{Entry, HashMap};
+use std::fs::{self, DirEntry};
+use std::path::{Path, PathBuf};
+use std::sync::mpsc::{channel, Sender};
-enum ChannelPackage{
+enum ChannelPackage {
Success(Fileinfo),
Fail(PathBuf, std::io::Error),
}
@@ -24,28 +24,32 @@ enum ChannelPackage{
/// let directories = vec!["/home/jon", "/home/doe"];
/// let (files, errors) = ddh::deduplicate_dirs(directories);
/// ```
-pub fn deduplicate_dirs<P: AsRef<Path> + Sync>(search_dirs: Vec<P>) -> (Vec<Fileinfo>, Vec<(PathBuf, std::io::Error)>){
+pub fn deduplicate_dirs<P: AsRef<Path> + Sync>(
+ search_dirs: Vec<P>,
+) -> (Vec<Fileinfo>, Vec<(PathBuf, std::io::Error)>) {
let (sender, receiver) = channel();
- search_dirs.par_iter().for_each_with(sender, |s, search_dir| {
+ search_dirs
+ .par_iter()
+ .for_each_with(sender, |s, search_dir| {
traverse_and_spawn(search_dir.as_ref(), s.clone());
- });
+ });
let mut files_of_lengths: IntMap<u64, Vec<Fileinfo>> = IntMap::default();
let mut errors = Vec::new();
- receiver.iter().for_each(|pkg| {
- match pkg{
- ChannelPackage::Success(entry) => {
- match files_of_lengths.entry(entry.get_length()) {
- Entry::Vacant(e) => { e.insert(vec![entry]); },
- Entry::Occupied(mut e) => { e.get_mut().push(entry); }
- }
- },
- ChannelPackage::Fail(entry, error) => {
- errors.push((entry, error));
- },
+ receiver.iter().for_each(|pkg| match pkg {
+ ChannelPackage::Success(entry) => match files_of_lengths.entry(entry.get_length()) {
+ Entry::Vacant(e) => {
+ e.insert(vec![entry]);
+ }
+ Entry::Occupied(mut e) => {
+ e.get_mut().push(entry);
+ }
+ },
+ ChannelPackage::Fail(entry, error) => {
+ errors.push((entry, error));
}
-
});
- let complete_files: Vec<Fileinfo> = files_of_lengths.into_par_iter()
+ let complete_files: Vec<Fileinfo> = files_of_lengths
+ .into_par_iter()
.map(|x| differentiate_and_consolidate(x.0, x.1))
.flatten()
.collect();
@@ -54,118 +58,121 @@ pub fn deduplicate_dirs<P: AsRef<Path> + Sync>(search_dirs: Vec<P>) -> (Vec<File
fn traverse_and_spawn(current_path: impl AsRef<Path>, sender: Sender<ChannelPackage>) {
let current_path_metadata = match fs::symlink_metadata(&current_path) {
- Err(e) =>{
- sender.send(
- ChannelPackage::Fail(current_path.as_ref().to_path_buf(), e)
- ).expect("Error sending new ChannelPackage::Fail");
- return
- },
+ Err(e) => {
+ sender
+ .send(ChannelPackage::Fail(current_path.as_ref().to_path_buf(), e))
+ .expect("Error sending new ChannelPackage::Fail");
+ return;
+ }
Ok(meta) => meta,
};
let current_path = match fs::canonicalize(&current_path) {
Err(e) => {
- sender.send(
- ChannelPackage::Fail(current_path.as_ref().to_path_buf(), e)
- ).expect("Error sending new ChannelPackage::Fail");
- return
- },
+ sender
+ .send(ChannelPackage::Fail(current_path.as_ref().to_path_buf(), e))
+ .expect("Error sending new ChannelPackage::Fail");
+ return;
+ }
Ok(canonical_path) => canonical_path,
};
- match current_path_metadata{
+ match current_path_metadata {
meta if meta.is_file() => {
- sender.send(ChannelPackage::Success(
- Fileinfo::new(
- None,
- None,
- meta,
- current_path
- ))
- ).expect("Error sending new ChannelPackage::Success");
- },
- meta if meta.is_dir() => {
- match fs::read_dir(&current_path) {
- Ok(read_dir_results) => {
- let good_entries: Vec<_> = read_dir_results
- .filter(|x| x.is_ok())
- .map(|x| x.unwrap())
- .collect();
- let (files, dirs): (Vec<&DirEntry>, Vec<&DirEntry>) = good_entries.par_iter().partition(|&x|
- x.file_type()
+ sender
+ .send(ChannelPackage::Success(Fileinfo::new(
+ None,
+ None,
+ meta,
+ current_path,
+ )))
+ .expect("Error sending new ChannelPackage::Success");
+ }
+ meta if meta.is_dir() => match fs::read_dir(&current_path) {
+ Ok(read_dir_results) => {
+ let good_entries: Vec<_> = read_dir_results
+ .filter(|x| x.is_ok())
+ .map(|x| x.unwrap())
+ .collect();
+ let (files, dirs): (Vec<&DirEntry>, Vec<&DirEntry>) =
+ good_entries.par_iter().partition(|&x| {
+ x.file_type()
.expect("Error reading DirEntry file type")
.is_file()
- );
- files.par_iter().for_each_with(sender.clone(), |sender, x|
- traverse_and_spawn(&x.path(), sender.clone()));
- dirs.into_par_iter()
- .for_each_with(sender, |sender, x| {
- traverse_and_spawn(x.path().as_path(), sender.clone());
- })
- },
- Err(e) => {
- sender.send(
- ChannelPackage::Fail(current_path, e)
- ).expect("Error sending new ChannelPackage::Fail");
- },
- }
- },
- _ => {/*Symlinks not yet handled*/},
+ });
+ files.par_iter().for_each_with(sender.clone(), |sender, x| {
+ traverse_and_spawn(&x.path(), sender.clone())
+ });
+ dirs.into_par_iter().for_each_with(sender, |sender, x| {
+ traverse_and_spawn(x.path().as_path(), sender.clone());
+ })
+ }
+ Err(e) => {
+ sender
+ .send(ChannelPackage::Fail(current_path, e))
+ .expect("Error sending new ChannelPackage::Fail");
+ }
+ },
+ _ => { /*Symlinks not yet handled*/ }
}
}
-fn differentiate_and_consolidate(file_length: u64, mut files: Vec<Fileinfo>) -> Vec<Fileinfo>{
- if file_length==0 || files.is_empty(){
- return files
+fn differentiate_and_consolidate(file_length: u64, mut files: Vec<Fileinfo>) -> Vec<Fileinfo> {
+ if file_length == 0 || files.is_empty() {
+ return files;
}
- match files.len(){
+ match files.len() {
1 => return files,
- n if n>1 => {
+ n if n > 1 => {
files.par_iter_mut().for_each(|file_ref| {
let hash = file_ref.generate_hash(HashMode::Partial);
file_ref.set_partial_hash(hash);
});
- if file_length<=4096{
- files.par_iter_mut().for_each(|x|{
- x.set_full_hash(x.get_partial_hash()) ;
+ if file_length <= 4096 {
+ files.par_iter_mut().for_each(|x| {
+ x.set_full_hash(x.get_partial_hash());
});
- return dedupe(files)
+ return dedupe(files);
}
let mut partial_hashes: HashMap<Option<u128>, u64> = HashMap::new();
- files.iter().for_each(|f| {
- match partial_hashes.entry(f.get_partial_hash()){
- Entry::Vacant(e) => { e.insert(0); },
- Entry::Occupied(mut e) => {*e.get_mut()+=1;}
- }
- });
+ files
+ .iter()
+ .for_each(|f| match partial_hashes.entry(f.get_partial_hash()) {
+ Entry::Vacant(e) => {
+ e.insert(0);
+ }
+ Entry::Occupied(mut e) => {
+ *e.get_mut() += 1;
+ }
+ });
let dedupe_hashes: Vec<_> = partial_hashes
.into_iter()
- .filter(|x| x.1>0)
+ .filter(|x| x.1 > 0)
.map(|y| y.0)
.collect();
- files.par_iter_mut().for_each(|x|
- if dedupe_hashes.contains(&x.get_partial_hash()){
+ files.par_iter_mut().for_each(|x| {
+ if dedupe_hashes.contains(&x.get_partial_hash()) {
let hash = x.generate_hash(HashMode::Full);
x.set_full_hash(hash);
}
- );
- },
- _ => {panic!("Somehow a vector of negative length was created. Please report this as a bug");}
+ });
+ }
+ _ => {
+ panic!("Somehow a vector of negative length was created. Please report this as a bug");
+ }
}
dedupe(files)
}
-fn dedupe(mut files: Vec<Fileinfo>) -> Vec<Fileinfo>{
+fn dedupe(mut files: Vec<Fileinfo>) -> Vec<Fileinfo> {
let mut cache: HashMap<(Option<u128>, Option<u128>), &mut Fileinfo> = HashMap::new();
files.iter_mut().for_each(|file| {
- match cache.entry((file.get_partial_hash(), file.get_full_hash())){
- Entry::Vacant(e) => {
- e.insert(file);
- },
- Entry::Occupied(mut e) => {
- e.get_mut()
- .file_paths
- .append(&mut file.file_paths);
- }
- }
+ match cache.entry((file.get_partial_hash(), file.get_full_hash())) {
+ Entry::Vacant(e) => {
+ e.insert(file);
+ }
+ Entry::Occupied(mut e) => {
+ e.get_mut().file_paths.append(&mut file.file_paths);
+ }
+ }
});
files.retain(|x| !x.get_paths().is_empty());
files
diff --git a/src/main.rs b/src/main.rs
index 4f58962..23f3ab5 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,19 +1,19 @@
-use std::io::{stdin};
+use clap::{App, Arg};
+use ddh::fileinfo::Fileinfo;
+use rayon::prelude::*;
use std::fs::{self};
use std::io::prelude::*;
-use clap::{Arg, App};
-use rayon::prelude::*;
-use ddh::fileinfo::{Fileinfo};
-use std::path::{PathBuf};
+use std::io::stdin;
+use std::path::PathBuf;
#[derive(Debug, Copy, Clone)]
-pub enum PrintFmt{
+pub enum PrintFmt {
Standard,
Json,
Off,
}
-pub enum Verbosity{
+pub enum Verbosity {
Quiet,
Duplicates,
All,
@@ -64,136 +64,252 @@ fn main() {
.get_matches();
//let (sender, receiver) = channel();
- let search_dirs: Vec<_> = arguments.values_of("directories").unwrap()
- .collect();
+ let search_dirs: Vec<_> = arguments.values_of("directories").unwrap().collect();
- let (complete_files, read_errors): (Vec<Fileinfo>, Vec<(_, _)>) = ddh::deduplicate_dirs(search_dirs);
- let (shared_files, unique_files): (Vec<&Fileinfo>, Vec<&Fileinfo>) = complete_files.par_iter().partition(|&x| x.get_paths().len()>1);
- process_full_output(&shared_files, &unique_files, &complete_files, &read_errors, &arguments);
+ let (complete_files, read_errors): (Vec<Fileinfo>, Vec<(_, _)>) =
+ ddh::deduplicate_dirs(search_dirs);
+ let (shared_files, unique_files): (Vec<&Fileinfo>, Vec<&Fileinfo>) = complete_files
+ .par_iter()
+ .partition(|&x| x.get_paths().len() > 1);
+ process_full_output(
+ &shared_files,
+ &unique_files,
+ &complete_files,
+ &read_errors,
+ &arguments,
+ );
}
-fn process_full_output(shared_files: &[&Fileinfo], unique_files: &[&Fileinfo], complete_files: &[Fileinfo], error_paths: &[(PathBuf, std::io::Error)], arguments: &clap::ArgMatches) {
- let blocksize = match arguments.value_of("Blocksize").unwrap_or(""){"B" => "Bytes", "K" => "Kilobytes", "M" => "Megabytes", "G" => "Gigabytes", _ => "Megabytes"};
- let display_power = match blocksize{"Bytes" => 0, "Kilobytes" => 1, "Megabytes" => 2, "Gigabytes" => 3, _ => 2};
- let display_divisor = 1024u64.pow(display_power);
- let fmt = match arguments.value_of("Format").unwrap_or(""){
+fn process_full_output(
+ shared_files: &[&Fileinfo],
+ unique_files: &[&Fileinfo],
+ complete_files: &[Fileinfo],
+ error_paths: &[(PathBuf, std::io::Error)],
+ arguments: &clap::ArgMatches,
+) {
+ let blocksize = match arguments.value_of("Blocksize").unwrap_or("") {
+ "B" => "Bytes",
+ "K" => "Kilobytes",
+ "M" => "Megabytes",
+ "G" => "Gigabytes",
+ _ => "Megabytes",
+ };
+ let display_power = match blocksize {
+ "Bytes" => 0,
+ "Kilobytes" => 1,
+ "Megabytes" => 2,
+ "Gigabytes" => 3,
+ _ => 2,
+ };
+ let display_divisor = 1024u64.pow(display_power);
+ let fmt = match arguments.value_of("Format").unwrap_or("") {
"standard" => PrintFmt::Standard,
"json" => PrintFmt::Json,
- _ => PrintFmt::Standard};
- let verbosity = match arguments.value_of("Verbosity").unwrap_or(""){
+ _ => PrintFmt::Standard,
+ };
+ let verbosity = match arguments.value_of("Verbosity").unwrap_or("") {
"quiet" => Verbosity::Quiet,
"duplicates" => Verbosity::Duplicates,
"all" => Verbosity::All,
- _ => Verbosity::Quiet};
+ _ => Verbosity::Quiet,
+ };
- println!("{} Total files (with duplicates): {} {}", complete_files.par_iter()
- .map(|x| x.get_paths().len() as u64)
- .sum::<u64>(),
- complete_files.par_iter()
- .map(|x| (x.get_paths().len() as u64)*x.get_length())
- .sum::<u64>()/(display_divisor),
- blocksize);
- println!("{} Total files (without duplicates): {} {}", complete_files.len(), complete_files.par_iter()
- .map(|x| x.get_length())
- .sum::<u64>()/(display_divisor),
- blocksize);
- println!("{} Single instance files: {} {}",unique_files.len(), unique_files.par_iter()
- .map(|x| x.get_length())
- .sum::<u64>()/(display_divisor),
- blocksize);
- println!("{} Shared instance files: {} {} ({} instances)", shared_files.len(), shared_files.par_iter()
- .map(|x| x.get_length())
- .sum::<u64>()/(display_divisor),
- blocksize, shared_files.par_iter()
- .map(|x| x.get_paths().len() as u64)
- .sum::<u64>());
+ println!(
+ "{} Total files (with duplicates): {} {}",
+ complete_files
+ .par_iter()
+ .map(|x| x.get_paths().len() as u64)
+ .sum::<u64>(),
+ complete_files
+ .par_iter()
+ .map(|x| (x.get_paths().len() as u64) * x.get_length())
+ .sum::<u64>()
+ / (display_divisor),
+ blocksize
+ );
+ println!(
+ "{} Total files (without duplicates): {} {}",
+ complete_files.len(),
+ complete_files
+ .par_iter()
+ .map(|x| x.get_length())
+ .sum::<u64>()
+ / (display_divisor),
+ blocksize
+ );
+ println!(
+ "{} Single instance files: {} {}",
+ unique_files.len(),
+ unique_files.par_iter().map(|x| x.get_length()).sum::<u64>() / (display_divisor),
+ blocksize
+ );
+ println!(
+ "{} Shared instance files: {} {} ({} instances)",
+ shared_files.len(),
+ shared_files.par_iter().map(|x| x.get_length()).sum::<u64>() / (display_divisor),
+ blocksize,
+ shared_files
+ .par_iter()
+ .map(|x| x.get_paths().len() as u64)
+ .sum::<u64>()
+ );
match (fmt, verbosity) {
- (_, Verbosity::Quiet) => {},
+ (_, Verbosity::Quiet) => {}
(PrintFmt::Standard, Verbosity::Duplicates) => {
- println!("Shared instance files and instance locations"); shared_files.iter().for_each(|x| {
- println!("instances of {} with file length {}:", x.get_candidate_name(), x.get_length());
- x.get_paths().par_iter().for_each(|y| println!("\t{}", y.canonicalize().unwrap().to_str().unwrap()));})
- },
+ println!("Shared instance files and instance locations");
+ shared_files.iter().for_each(|x| {
+ println!(
+ "instances of {} with file length {}:",
+ x.get_candidate_name(),
+ x.get_length()
+ );
+ x.get_paths()
+ .par_iter()
+ .for_each(|y| println!("\t{}", y.canonicalize().unwrap().to_str().unwrap()));
+ })
+ }
(PrintFmt::Standard, Verbosity::All) => {
- println!("Single instance files"); unique_files.par_iter()
- .for_each(|x| println!("{}", x.get_paths().iter().next().unwrap().canonicalize().unwrap().to_str().unwrap()));
- println!("Shared instance files and instance locations"); shared_files.iter().for_each(|x| {
- println!("instances of {} with file length {}:", x.get_candidate_name(), x.get_length());
- x.get_paths().par_iter().for_each(|y| println!("\t{}", y.canonicalize().unwrap().to_str().unwrap()));});
- error_paths.iter().for_each(|x|{
- println!("Could not process {:#?} due to error {:#?}", x.0, x.1.kind());
+ println!("Single instance files");
+ unique_files.par_iter().for_each(|x| {
+ println!(
+ "{}",
+ x.get_paths()
+ .iter()
+ .next()
+ .unwrap()
+ .canonicalize()
+ .unwrap()
+ .to_str()
+ .unwrap()
+ )
+ });
+ println!("Shared instance files and instance locations");
+ shared_files.iter().for_each(|x| {
+ println!(
+ "instances of {} with file length {}:",
+ x.get_candidate_name(),
+ x.get_length()
+ );
+ x.get_paths()
+ .par_iter()
+ .for_each(|y| println!("\t{}", y.canonicalize().unwrap().to_str().unwrap()));
+ });
+ error_paths.iter().for_each(|x| {
+ println!(
+ "Could not process {:#?} due to error {:#?}",
+ x.0,
+ x.1.kind()
+ );
})
- },
+ }
(PrintFmt::Json, Verbosity::Duplicates) => {
- println!("{}", serde_json::to_string(shared_files).unwrap_or_else(|_| "".to_string()));
- },
+ println!(
+ "{}",
+ serde_json::to_string(shared_files).unwrap_or_else(|_| "".to_string())
+ );
+ }
(PrintFmt::Json, Verbosity::All) => {
- println!("{}", serde_json::to_string(complete_files).unwrap_or_else(|_| "".to_string()));
- },
- _ => {},
+ println!(
+ "{}",
+ serde_json::to_string(complete_files).unwrap_or_else(|_| "".to_string())
+ );
+ }
+ _ => {}
}
- match arguments.value_of("Output").unwrap_or("Results.txt"){
- "no" => {},
+ match arguments.value_of("Output").unwrap_or("Results.txt") {
+ "no" => {}
destination_string => {
match fs::File::open(destination_string) {
- Ok(_f) => {
+ Ok(_f) => {
println!("---");
println!("File {} already exists.", destination_string);
println!("Overwrite? Y/N");
let mut input = String::new();
match stdin().read_line(&mut input) {
- Ok(_n) => {
- match input.chars().next().unwrap_or(' ') {
- 'n' | 'N' => {println!("Exiting."); return;}
- 'y' | 'Y' => {println!("Over writing {}", destination_string);}
- _ => {println!("Exiting."); return;}
+ Ok(_n) => match input.chars().next().unwrap_or(' ') {
+ 'n' | 'N' => {
+ println!("Exiting.");
+ return;
}
- }
- Err(_e) => {println!("Error encountered reading user input. Err: {}", _e);},
- }
- },
- Err(_e) => {
- match fs::File::create(destination_string) {
- Ok(_f) => {},
+ 'y' | 'Y' => {
+ println!("Over writing {}", destination_string);
+ }
+ _ => {
+ println!("Exiting.");
+ return;
+ }
+ },
Err(_e) => {
- println!("Error encountered opening file {}. Err: {}", destination_string, _e);
- println!("Exiting."); return;
+ println!("Error encountered reading user input. Err: {}", _e);
}
}
+ }
+ Err(_e) => match fs::File::create(destination_string) {
+ Ok(_f) => {}
+ Err(_e) => {
+ println!(
+ "Error encountered opening file {}. Err: {}",
+ destination_string, _e
+ );
+ println!("Exiting.");
+ return;
+ }
},
}
- write_results_to_file(fmt, &shared_files, &unique_files, &complete_files, destination_string);
- },
+ write_results_to_file(
+ fmt,
+ &shared_files,
+ &unique_files,
+ &complete_files,
+ destination_string,
+ );
+ }
}
}
-fn write_results_to_file(fmt: PrintFmt, shared_files: &[&Fileinfo], unique_files: &[&Fileinfo], complete_files: &[Fileinfo], file: &str) {
+fn write_results_to_file(
+ fmt: PrintFmt,
+ shared_files: &[&Fileinfo],
+ unique_files: &[&Fileinfo],
+ complete_files: &[Fileinfo],
+ file: &str,
+) {
let mut output = fs::File::create(file).expect("Error opening output file for writing");
match fmt {
PrintFmt::Standard => {
output.write_fmt(format_args!("Duplicates:\n")).unwrap();
- for file in shared_files.iter(){
+ for file in shared_files.iter() {
let title = file.get_candidate_name();
output.write_fmt(format_args!("{}\n", title)).unwrap();
- for entry in file.get_paths().iter(){
- output.write_fmt(format_args!("\t{}\n", entry.as_path().to_str().unwrap())).unwrap();
+ for entry in file.get_paths().iter() {
+ output
+ .write_fmt(format_args!("\t{}\n", entry.as_path().to_str().unwrap()))
+ .unwrap();
}
}
output.write_fmt(format_args!("Singletons:\n")).unwrap();
- for file in unique_files.iter(){
+ for file in unique_files.iter() {
let title = file.get_candidate_name();
output.write_fmt(format_args!("{}\n", title)).unwrap();
- for entry in file.get_paths().iter(){
- output.write_fmt(format_args!("\t{}\n", entry.as_path().to_str().unwrap())).unwrap();
+ for entry in file.get_paths().iter() {
+ output
+ .write_fmt(format_args!("\t{}\n", entry.as_path().to_str().unwrap()))
+ .unwrap();
}
}
- },
+ }
PrintFmt::Json => {
- output.write_fmt(format_args!("{}", serde_json::to_string(complete_files).unwrap_or_else(|_| "Error deserializing".to_string()))).unwrap();
- },
- PrintFmt::Off =>{return},
+ output
+ .write_fmt(format_args!(
+ "{}",
+ serde_json::to_string(complete_files)
+ .unwrap_or_else(|_| "Error deserializing".to_string())
+ ))
+ .unwrap();
+ }
+ PrintFmt::Off => return,
}
println!("{:#?} results written to {}", fmt, file);
}