summaryrefslogtreecommitdiffstats
path: root/src/fileinfo.rs
blob: 3b24823d96399e175652ff8ba7c41a14fdf85a46 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
use serde::ser::{Serialize, Serializer, SerializeStruct};
use siphasher::sip128::Hasher128;
use std::hash::Hasher;
use std::path::PathBuf;
use std::cmp::Ordering;
use std::io::Read;
use std::fs::{self, Metadata};

const BLOCK_SIZE: usize = 4096;

#[derive(PartialEq)]
pub enum HashMode{
    Full,
    Partial
}

/// Serializable struct containing entries for a specific file. These structs will identify individual files as a collection of paths and associated hash and length data.
#[derive(Debug)]
pub struct Fileinfo{
    full_hash: Option<u128>,
    partial_hash: Option<u128>,
    metadata: Metadata,
    pub(crate) file_paths: Vec<PathBuf>,
}

impl Fileinfo{
    /// Creates a new Fileinfo collection struct.
    ///
    /// # Examples
    /// ```no_run
    /// use std::path::Path;
    /// use ddh::fileinfo::Fileinfo;
    /// use std::fs;
    ///
    /// fn main() -> std::io::Result<()> {
    /// Fileinfo::new(
    ///         None,
    ///         None,
    ///         fs::metadata("./foo/bar.txt")?,
    ///         Path::new("./foo/bar.txt").to_path_buf()
    ///         );
    /// Ok(())
    /// }
    /// ```
    pub fn new(full_hash: Option<u128>, partial_hash: Option<u128>, meta: Metadata, path: PathBuf) -> Self{
        Fileinfo{full_hash: full_hash, partial_hash: partial_hash, metadata: meta, file_paths: vec![path]}
    }
    /// Gets the length of the files in the current collection.
    ///
    /// # Examples
    /// ```no_run
    /// use std::path::Path;
    /// use ddh::fileinfo::Fileinfo;
    /// use std::fs;
    ///
    /// fn main() -> std::io::Result<()> {
    /// let fi = Fileinfo::new(None, None, fs::metadata("./foo/bar.txt")?, Path::new("./foo/bar.txt").to_path_buf());
    /// let len = fi.get_length();
    /// assert_eq!(3, len);
    /// Ok(())
    /// }
    /// ```
    pub fn get_length(&self) -> u64{
        self.metadata.len()
    }
    /// Gets the hash of the full file if available.
    ///
    /// # Examples
    /// ```no_run
    /// use std::path::Path;
    /// use ddh::fileinfo::Fileinfo;
    /// use std::fs;
    ///
    /// fn main() -> std::io::Result<()> {
    /// let fi = Fileinfo::new(Some(123), None, fs::metadata("./foo/bar.txt")?, Path::new("./foo/bar.txt").to_path_buf());
    /// let f_hash = fi.get_full_hash();
    /// assert_eq!(Some(123), f_hash);
    /// Ok(())
    /// }
    /// ```
    pub fn get_full_hash(&self) -> Option<u128>{
        self.full_hash
    }
    pub(crate) fn set_full_hash(&mut self, hash: Option<u128>) -> (){
        self.full_hash = hash
    }
    /// Gets the hash of the partially read file if available.
    ///
    /// # Examples
    /// ```no_run
    /// use std::path::Path;
    /// use ddh::fileinfo::Fileinfo;
    /// use std::fs;
    ///
    /// fn main() -> std::io::Result<()> {
    /// let fi = Fileinfo::new(None, Some(123), fs::metadata("./foo/bar.txt")?, Path::new("./foo/bar.txt").to_path_buf());
    /// let p_hash = fi.get_partial_hash();
    /// assert_eq!(Some(123), p_hash);
    /// Ok(())
    /// }
    /// ```
    pub fn get_partial_hash(&self) -> Option<u128>{
        self.partial_hash
    }
    pub(crate) fn set_partial_hash(&mut self, hash: Option<u128>) -> (){
        self.partial_hash = hash
    }
    /// Gets a candidate name. This will be the name of the first file inserted into the collection and so can vary.
    ///
    /// # Examples
    /// ```no_run
    /// use std::path::Path;
    /// use ddh::fileinfo::Fileinfo;
    /// use std::fs;
    ///
    /// fn main() -> std::io::Result<()> {
    /// let fi = Fileinfo::new(None, None, fs::metadata("./foo/bar.txt")?, Path::new("./foo/bar.txt").to_path_buf());
    /// let some_name = fi.get_candidate_name();
    /// assert_eq!("bar.txt", some_name);
    /// Ok(())
    /// }
    /// ```
    pub fn get_candidate_name(&self) -> &str{
        self.file_paths
        .iter()
        .next()
        .unwrap()
        .to_str()
        .unwrap()
        .rsplit("/")
        .next()
        .unwrap()
    }
    /// Gets all paths in the current collection. This can be used to get the names of each file with the string `rsplit("/")` method.
    ///
    /// # Examples
    /// ```no_run
    /// use std::path::Path;
    /// use ddh::fileinfo::Fileinfo;
    /// use std::fs;
    ///
    /// fn main() -> std::io::Result<()> {
    /// let fi = Fileinfo::new(None, None, fs::metadata("./foo/bar.txt")?, Path::new("./foo/bar.txt").to_path_buf());
    /// let all_files = fi.get_paths();
    /// assert_eq!(&vec![Path::new("./foo/bar.txt").to_path_buf()],
    ///            all_files);
    /// Ok(())
    /// }
    /// ```
    pub fn get_paths(&self) -> &Vec<PathBuf>{
        return &self.file_paths
    }

    pub fn generate_hash(&mut self, mode: HashMode) -> Option<u128>{
        let mut hasher = siphasher::sip128::SipHasher::new();
        match fs::File::open(
            self.file_paths
            .iter()
            .next()
            .expect("Cannot read file path from struct")
            ) {
            Ok(mut f) => {
                /* We want a read call to be "large" for two reasons
                1) Force filesystem read ahead behavior
                2) Fewer system calls for a given file.
                Currently 16KB  */
                let mut hash_buffer = [0;BLOCK_SIZE * 4];
                loop {
                    match f.read(&mut hash_buffer) {
                        Ok(n) if n>0 => hasher.write(&hash_buffer),
                        Ok(n) if n==0 => break,
                        Err(_e) => {
                            return None
                        },
                        _ => panic!("Negative length read in hashing"),
                        }
                    if mode == HashMode::Partial{
                        return Some(hasher.finish128().into());
                    }
                }
                return Some(hasher.finish128().into());
            }
            Err(_e) => {
                return None
            }
        }
    }
}

impl Serialize for Fileinfo{
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        let mut state = serializer.serialize_struct("Fileinfo", 4)?;
        state.serialize_field("partial_hash", &self.partial_hash)?;
        state.serialize_field("full_hash", &self.full_hash)?;
        state.serialize_field("file_length", &self.get_length())?;
        state.serialize_field("file_paths", &self.file_paths)?;
        state.end()
    }
}

impl PartialEq for Fileinfo{
    fn eq(&self, other: &Fileinfo) -> bool {
        (self.get_length()==other.get_length())&&
        (self.partial_hash==other.partial_hash)&&
        (self.full_hash==other.full_hash)
    }
}
impl Eq for Fileinfo{}

impl PartialOrd for Fileinfo{
    fn partial_cmp(&self, other: &Fileinfo) -> Option<Ordering>{
         if self.full_hash.is_some() && other.full_hash.is_some(){
            Some(self.full_hash.cmp(&other.full_hash))
        } else if self.partial_hash.is_some() && other.partial_hash.is_some(){
            Some(self.partial_hash.cmp(&other.partial_hash))
        } else {
            Some(self.get_length().cmp(&other.get_length()))
        }
    }
}

impl Ord for Fileinfo{
    fn cmp(&self, other: &Fileinfo) -> Ordering {
        if self.full_hash.is_some() && other.full_hash.is_some(){
            self.full_hash.cmp(&other.full_hash)
        } else if self.partial_hash.is_some() && other.partial_hash.is_some(){
            self.partial_hash.cmp(&other.partial_hash)
        } else {
            self.get_length().cmp(&other.get_length())
        }
    }
}