Merge pull request #35 from darakian/factor-out-fileinfo

Factor out fileinfo to its own file
author: Jon Moroney <darakian@gmail.com> 2020-08-02 15:41:03 -0700
committer: GitHub <noreply@github.com> 2020-08-02 15:41:03 -0700
commit: 89b2e8d439854f1379d97a8f159ae7c3c8a18933 (patch)
tree: 14974e15f898aadd7f624b95c269316e93023657
parent: 7035276acddb21e9b13bc4350fd8d35c0e26ddc6 (diff)
parent: cb44786864148be2664e9fc6489ebf0029080cdb (diff)
3 files changed, 214 insertions, 211 deletions
diff --git a/src/fileinfo.rs b/src/fileinfo.rs
new file mode 100644
index 0000000..a0088f4
--- /dev/null
+++ b/src/fileinfo.rs
@@ -0,0 +1,211 @@
+use serde::ser::{Serialize, Serializer, SerializeStruct};
+use siphasher::sip128::Hasher128;
+use std::hash::Hasher;
+use std::path::PathBuf;
+use std::cmp::Ordering;
+use std::io::Read;
+use std::fs;
+
+const BLOCK_SIZE: usize = 4096;
+
+#[derive(PartialEq)]
+pub enum HashMode{
+    Full,
+    Partial
+}
+
+/// Serializable struct containing entries for a specific file. These structs will identify individual files as a collection of paths and associated hash and length data.
+#[derive(Debug)]
+pub struct Fileinfo{
+    full_hash: Option<u128>,
+    partial_hash: Option<u128>,
+    file_length: u64,
+    pub(crate) file_paths: Vec<PathBuf>,
+}
+
+impl Fileinfo{
+    /// Creates a new Fileinfo collection struct.
+    ///
+    /// # Examples
+    /// ```
+    /// use std::path::Path;
+    /// use ddh::fileinfo::Fileinfo;
+    ///
+    /// Fileinfo::new(
+    ///         None,
+    ///         None,
+    ///         3,
+    ///         Path::new("./foo/bar.txt").to_path_buf()
+    ///         );
+    /// ```
+    pub fn new(full_hash: Option<u128>, partial_hash: Option<u128>, length: u64, path: PathBuf) -> Self{
+        Fileinfo{full_hash: full_hash, partial_hash: partial_hash, file_length: length, file_paths: vec![path]}
+    }
+    /// Gets the length of the files in the current collection.
+    ///
+    /// # Examples
+    /// ```
+    /// use std::path::Path;
+    /// use ddh::fileinfo::Fileinfo;
+    ///
+    /// let fi = Fileinfo::new(None, None, 3, Path::new("./foo/bar.txt").to_path_buf());
+    /// let len = fi.get_length();
+    /// assert_eq!(3, len);
+    /// ```
+    pub fn get_length(&self) -> u64{
+        self.file_length
+    }
+    /// Gets the hash of the full file if available.
+    ///
+    /// # Examples
+    /// ```
+    /// use std::path::Path;
+    /// use ddh::fileinfo::Fileinfo;
+    ///
+    /// let fi = Fileinfo::new(Some(123), None, 3, Path::new("./foo/bar.txt").to_path_buf());
+    /// let f_hash = fi.get_full_hash();
+    /// assert_eq!(Some(123), f_hash);
+    /// ```
+    pub fn get_full_hash(&self) -> Option<u128>{
+        self.full_hash
+    }
+    pub(crate) fn set_full_hash(&mut self, hash: Option<u128>) -> (){
+        self.full_hash = hash
+    }
+    /// Gets the hash of the partially read file if available.
+    ///
+    /// # Examples
+    /// ```
+    /// use std::path::Path;
+    /// use ddh::fileinfo::Fileinfo;
+    ///
+    /// let fi = Fileinfo::new(None, Some(123), 3, Path::new("./foo/bar.txt").to_path_buf());
+    /// let p_hash = fi.get_partial_hash();
+    /// assert_eq!(Some(123), p_hash);
+    /// ```
+    pub fn get_partial_hash(&self) -> Option<u128>{
+        self.partial_hash
+    }
+    pub(crate) fn set_partial_hash(&mut self, hash: Option<u128>) -> (){
+        self.partial_hash = hash
+    }
+    /// Gets a candidate name. This will be the name of the first file inserted into the collection and so can vary.
+    ///
+    /// # Examples
+    /// ```
+    /// use std::path::Path;
+    /// use ddh::fileinfo::Fileinfo;
+    ///
+    /// let fi = Fileinfo::new(None, None, 3, Path::new("./foo/bar.txt").to_path_buf());
+    /// let some_name = fi.get_candidate_name();
+    /// assert_eq!("bar.txt", some_name)
+    /// ```
+    pub fn get_candidate_name(&self) -> &str{
+        self.file_paths
+        .iter()
+        .next()
+        .unwrap()
+        .to_str()
+        .unwrap()
+        .rsplit("/")
+        .next()
+        .unwrap()
+    }
+    /// Gets all paths in the current collection. This can be used to get the names of each file with the string `rsplit("/")` method.
+    ///
+    /// # Examples
+    /// ```
+    /// use std::path::Path;
+    /// use ddh::fileinfo::Fileinfo;
+    ///
+    /// let fi = Fileinfo::new(None, None, 3, Path::new("./foo/bar.txt").to_path_buf());
+    /// let all_files = fi.get_paths();
+    /// assert_eq!(&vec![Path::new("./foo/bar.txt").to_path_buf()],
+    ///            all_files);
+    /// ```
+    pub fn get_paths(&self) -> &Vec<PathBuf>{
+        return &self.file_paths
+    }
+
+    pub fn generate_hash(&mut self, mode: HashMode) -> Option<u128>{
+        let mut hasher = siphasher::sip128::SipHasher::new();
+        match fs::File::open(
+            self.file_paths
+            .iter()
+            .next()
+            .expect("Cannot read file path from struct")
+            ) {
+            Ok(mut f) => {
+                /* We want a read call to be "large" for two reasons
+                1) Force filesystem read ahead behavior
+                2) Fewer system calls for a given file.
+                Currently 16KB  */
+                let mut hash_buffer = [0;BLOCK_SIZE * 4];
+                loop {
+                    match f.read(&mut hash_buffer) {
+                        Ok(n) if n>0 => hasher.write(&hash_buffer),
+                        Ok(n) if n==0 => break,
+                        Err(_e) => {
+                            return None
+                        },
+                        _ => panic!("Negative length read in hashing"),
+                        }
+                    if mode == HashMode::Partial{
+                        return Some(hasher.finish128().into());
+                    }
+                }
+                return Some(hasher.finish128().into());
+            }
+            Err(_e) => {
+                return None
+            }
+        }
+    }
+}
+
+impl Serialize for Fileinfo{
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        let mut state = serializer.serialize_struct("Fileinfo", 4)?;
+        state.serialize_field("partial_hash", &self.partial_hash)?;
+        state.serialize_field("full_hash", &self.full_hash)?;
+        state.serialize_field("file_length", &self.file_length)?;
+        state.serialize_field("file_paths", &self.file_paths)?;
+        state.end()
+    }
+}
+
+impl PartialEq for Fileinfo{
+    fn eq(&self, other: &Fileinfo) -> bool {
+        (self.file_length==other.file_length)&&
+        (self.partial_hash==other.partial_hash)&&
+        (self.full_hash==other.full_hash)
+    }
+}
+impl Eq for Fileinfo{}
+
+impl PartialOrd for Fileinfo{
+    fn partial_cmp(&self, other: &Fileinfo) -> Option<Ordering>{
+         if self.full_hash.is_some() && other.full_hash.is_some(){
+            Some(self.full_hash.cmp(&other.full_hash))
+        } else if self.partial_hash.is_some() && other.partial_hash.is_some(){
+            Some(self.partial_hash.cmp(&other.partial_hash))
+        } else {
+            Some(self.file_length.cmp(&other.file_length))
+        }
+    }
+}
+
+impl Ord for Fileinfo{
+    fn cmp(&self, other: &Fileinfo) -> Ordering {
+        if self.full_hash.is_some() && other.full_hash.is_some(){
+            self.full_hash.cmp(&other.full_hash)
+        } else if self.partial_hash.is_some() && other.partial_hash.is_some(){
+            self.partial_hash.cmp(&other.partial_hash)
+        } else {
+            self.file_length.cmp(&other.file_length)
+        }
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 567b0c1..7878f44 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2,230 +2,22 @@
 //!
 //! `ddh` is a collection of functions and structs to aid in analysing filesystem directories.
 
-pub mod utils;
+pub mod fileinfo;
+use fileinfo::{Fileinfo, HashMode};
 
-use std::hash::{Hasher};
 use std::fs::{self, DirEntry};
-use std::io::{Read};
 use std::path::{PathBuf, Path};
-use std::cmp::Ordering;
-use serde::ser::{Serialize, Serializer, SerializeStruct};
-use siphasher::sip128::Hasher128;
 use rayon::prelude::*;
 use std::sync::mpsc::{Sender, channel};
 use std::collections::hash_map::{HashMap, Entry};
 use std::io::{Error, ErrorKind};
 use nohash_hasher::IntMap;
 
-const BLOCK_SIZE: usize = 4096;
-
-#[derive(PartialEq)]
-enum HashMode{
-    Full,
-    Partial
-}
-
 enum ChannelPackage{
     Success(Fileinfo),
     Fail(PathBuf, std::io::Error),
 }
 
-/// Serializable struct containing entries for a specific file. These structs will identify individual files as a collection of paths and associated hash and length data.
-#[derive(Debug)]
-pub struct Fileinfo{
-    full_hash: Option<u128>,
-    partial_hash: Option<u128>,
-    file_length: u64,
-    file_paths: Vec<PathBuf>,
-}
-
-impl Fileinfo{
-    /// Creates a new Fileinfo collection struct.
-    ///
-    /// # Examples
-    /// ```
-    /// use std::path::Path;
-    /// use ddh::Fileinfo;
-    ///
-    /// Fileinfo::new(
-    ///         None,
-    ///         None,
-    ///         3,
-    ///         Path::new("./foo/bar.txt").to_path_buf()
-    ///         );
-    /// ```
-    pub fn new(full_hash: Option<u128>, partial_hash: Option<u128>, length: u64, path: PathBuf) -> Self{
-        Fileinfo{full_hash: full_hash, partial_hash: partial_hash, file_length: length, file_paths: vec![path]}
-    }
-    /// Gets the length of the files in the current collection.
-    ///
-    /// # Examples
-    /// ```
-    /// use std::path::Path;
-    /// use ddh::Fileinfo;
-    ///
-    /// let fi = Fileinfo::new(None, None, 3, Path::new("./foo/bar.txt").to_path_buf());
-    /// let len = fi.get_length();
-    /// assert_eq!(3, len);
-    /// ```
-    pub fn get_length(&self) -> u64{
-        self.file_length
-    }
-    /// Gets the hash of the full file if available.
-    ///
-    /// # Examples
-    /// ```
-    /// use std::path::Path;
-    /// use ddh::Fileinfo;
-    ///
-    /// let fi = Fileinfo::new(Some(123), None, 3, Path::new("./foo/bar.txt").to_path_buf());
-    /// let f_hash = fi.get_full_hash();
-    /// assert_eq!(Some(123), f_hash);
-    /// ```
-    pub fn get_full_hash(&self) -> Option<u128>{
-        self.full_hash
-    }
-    fn set_full_hash(&mut self, hash: Option<u128>) -> (){
-        self.full_hash = hash
-    }
-    /// Gets the hash of the partially read file if available.
-    ///
-    /// # Examples
-    /// ```
-    /// use std::path::Path;
-    /// use ddh::Fileinfo;
-    ///
-    /// let fi = Fileinfo::new(None, Some(123), 3, Path::new("./foo/bar.txt").to_path_buf());
-    /// let p_hash = fi.get_partial_hash();
-    /// assert_eq!(Some(123), p_hash);
-    /// ```
-    pub fn get_partial_hash(&self) -> Option<u128>{
-        self.partial_hash
-    }
-    fn set_partial_hash(&mut self, hash: Option<u128>) -> (){
-        self.partial_hash = hash
-    }
-    /// Gets a candidate name. This will be the name of the first file inserted into the collection and so can vary.
-    ///
-    /// # Examples
-    /// ```
-    /// use std::path::Path;
-    /// use ddh::Fileinfo;
-    ///
-    /// let fi = Fileinfo::new(None, None, 3, Path::new("./foo/bar.txt").to_path_buf());
-    /// let some_name = fi.get_candidate_name();
-    /// assert_eq!("bar.txt", some_name)
-    /// ```
-    pub fn get_candidate_name(&self) -> &str{
-        self.file_paths
-        .iter()
-        .next()
-        .unwrap()
-        .to_str()
-        .unwrap()
-        .rsplit("/")
-        .next()
-        .unwrap()
-    }
-    /// Gets all paths in the current collection. This can be used to get the names of each file with the string `rsplit("/")` method.
-    ///
-    /// # Examples
-    /// ```
-    /// use std::path::Path;
-    /// use ddh::Fileinfo;
-    ///
-    /// let fi = Fileinfo::new(None, None, 3, Path::new("./foo/bar.txt").to_path_buf());
-    /// let all_files = fi.get_paths();
-    /// assert_eq!(&vec![Path::new("./foo/bar.txt").to_path_buf()],
-    ///            all_files);
-    /// ```
-    pub fn get_paths(&self) -> &Vec<PathBuf>{
-        return &self.file_paths
-    }
-
-    fn generate_hash(&mut self, mode: HashMode) -> Option<u128>{
-        let mut hasher = siphasher::sip128::SipHasher::new();
-        match fs::File::open(
-            self.file_paths
-            .iter()
-            .next()
-            .expect("Cannot read file path from struct")
-            ) {
-            Ok(mut f) => {
-                /* We want a read call to be "large" for two reasons
-                1) Force filesystem read ahead behavior
-                2) Fewer system calls for a given file.
-                Currently 16KB  */
-                let mut hash_buffer = [0;BLOCK_SIZE * 4];
-                loop {
-                    match f.read(&mut hash_buffer) {
-                        Ok(n) if n>0 => hasher.write(&hash_buffer),
-                        Ok(n) if n==0 => break,
-                        Err(_e) => {
-                            return None
-                        },
-                        _ => panic!("Negative length read in hashing"),
-                        }
-                    if mode == HashMode::Partial{
-                        return Some(hasher.finish128().into());
-                    }
-                }
-                return Some(hasher.finish128().into());
-            }
-            Err(_e) => {
-                return None
-            }
-        }
-    }
-}
-
-impl Serialize for Fileinfo{
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: Serializer,
-    {
-        let mut state = serializer.serialize_struct("Fileinfo", 4)?;
-        state.serialize_field("partial_hash", &self.partial_hash)?;
-        state.serialize_field("full_hash", &self.full_hash)?;
-        state.serialize_field("file_length", &self.file_length)?;
-        state.serialize_field("file_paths", &self.file_paths)?;
-        state.end()
-    }
-}
-
-impl PartialEq for Fileinfo{
-    fn eq(&self, other: &Fileinfo) -> bool {
-        (self.file_length==other.file_length)&&
-        (self.partial_hash==other.partial_hash)&&
-        (self.full_hash==other.full_hash)
-    }
-}
-impl Eq for Fileinfo{}
-
-impl PartialOrd for Fileinfo{
-    fn partial_cmp(&self, other: &Fileinfo) -> Option<Ordering>{
-         if self.full_hash.is_some() && other.full_hash.is_some(){
-            Some(self.full_hash.cmp(&other.full_hash))
-        } else if self.partial_hash.is_some() && other.partial_hash.is_some(){
-            Some(self.partial_hash.cmp(&other.partial_hash))
-        } else {
-            Some(self.file_length.cmp(&other.file_length))
-        }
-    }
-}
-
-impl Ord for Fileinfo{
-    fn cmp(&self, other: &Fileinfo) -> Ordering {
-        if self.full_hash.is_some() && other.full_hash.is_some(){
-            self.full_hash.cmp(&other.full_hash)
-        } else if self.partial_hash.is_some() && other.partial_hash.is_some(){
-            self.partial_hash.cmp(&other.partial_hash)
-        } else {
-            self.file_length.cmp(&other.file_length)
-        }
-    }
-}
-
 /// Constructs a list of unique files from a list of directories.
 ///
 /// # Examples
diff --git a/src/main.rs b/src/main.rs
index adb56b7..3fcbfc6 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -3,7 +3,7 @@ use std::fs::{self};
 use std::io::prelude::*;
 use clap::{Arg, App};
 use rayon::prelude::*;
-use ddh::{Fileinfo};
+use ddh::fileinfo::{Fileinfo};
 use std::path::{PathBuf};
 
 #[derive(Debug, Copy, Clone)]
author	Jon Moroney <darakian@gmail.com>	2020-08-02 15:41:03 -0700
committer	GitHub <noreply@github.com>	2020-08-02 15:41:03 -0700
commit	89b2e8d439854f1379d97a8f159ae7c3c8a18933 (patch)
tree	14974e15f898aadd7f624b95c269316e93023657
parent	7035276acddb21e9b13bc4350fd8d35c0e26ddc6 (diff)
parent	cb44786864148be2664e9fc6489ebf0029080cdb (diff)