ddh/
fileinfo.rs

1use serde::ser::{Serialize, SerializeStruct, Serializer};
2use siphasher::sip128::Hasher128;
3use std::cmp::Ordering;
4use std::fs::{self, Metadata};
5use std::hash::Hasher;
6use std::io::Read;
7use std::path::PathBuf;
8
9const BLOCK_SIZE: usize = 4096;
10
11#[derive(PartialEq)]
12pub enum HashMode {
13    Full,
14    Partial,
15}
16
17/// Serializable struct containing entries for a specific file. These structs will identify individual files as a collection of paths and associated hash and length data.
18#[derive(Debug)]
19pub struct Fileinfo {
20    full_hash: Option<u128>,
21    partial_hash: Option<u128>,
22    metadata: Metadata,
23    pub(crate) file_paths: Vec<PathBuf>,
24}
25
26impl Fileinfo {
27    /// Creates a new Fileinfo collection struct.
28    ///
29    /// # Examples
30    /// ```no_run
31    /// use std::path::Path;
32    /// use ddh::fileinfo::Fileinfo;
33    /// use std::fs;
34    ///
35    /// fn main() -> std::io::Result<()> {
36    /// Fileinfo::new(
37    ///         None,
38    ///         None,
39    ///         fs::metadata("./foo/bar.txt")?,
40    ///         Path::new("./foo/bar.txt").to_path_buf()
41    ///         );
42    /// Ok(())
43    /// }
44    /// ```
45    pub fn new(full: Option<u128>, partial: Option<u128>, meta: Metadata, path: PathBuf) -> Self {
46        Fileinfo {
47            full_hash: full,
48            partial_hash: partial,
49            metadata: meta,
50            file_paths: vec![path],
51        }
52    }
53    /// Gets the length of the files in the current collection.
54    ///
55    /// # Examples
56    /// ```no_run
57    /// use std::path::Path;
58    /// use ddh::fileinfo::Fileinfo;
59    /// use std::fs;
60    ///
61    /// fn main() -> std::io::Result<()> {
62    /// let fi = Fileinfo::new(None, None, fs::metadata("./foo/bar.txt")?, Path::new("./foo/bar.txt").to_path_buf());
63    /// let len = fi.get_length();
64    /// assert_eq!(3, len);
65    /// Ok(())
66    /// }
67    /// ```
68    pub fn get_length(&self) -> u64 {
69        self.metadata.len()
70    }
71    /// Gets the hash of the full file if available.
72    ///
73    /// # Examples
74    /// ```no_run
75    /// use std::path::Path;
76    /// use ddh::fileinfo::Fileinfo;
77    /// use std::fs;
78    ///
79    /// fn main() -> std::io::Result<()> {
80    /// let fi = Fileinfo::new(Some(123), None, fs::metadata("./foo/bar.txt")?, Path::new("./foo/bar.txt").to_path_buf());
81    /// let f_hash = fi.get_full_hash();
82    /// assert_eq!(Some(123), f_hash);
83    /// Ok(())
84    /// }
85    /// ```
86    pub fn get_full_hash(&self) -> Option<u128> {
87        self.full_hash
88    }
89    pub(crate) fn set_full_hash(&mut self, hash: Option<u128>) {
90        self.full_hash = hash
91    }
92    /// Gets the hash of the partially read file if available.
93    ///
94    /// # Examples
95    /// ```no_run
96    /// use std::path::Path;
97    /// use ddh::fileinfo::Fileinfo;
98    /// use std::fs;
99    ///
100    /// fn main() -> std::io::Result<()> {
101    /// let fi = Fileinfo::new(None, Some(123), fs::metadata("./foo/bar.txt")?, Path::new("./foo/bar.txt").to_path_buf());
102    /// let p_hash = fi.get_partial_hash();
103    /// assert_eq!(Some(123), p_hash);
104    /// Ok(())
105    /// }
106    /// ```
107    pub fn get_partial_hash(&self) -> Option<u128> {
108        self.partial_hash
109    }
110    pub(crate) fn set_partial_hash(&mut self, hash: Option<u128>) {
111        self.partial_hash = hash
112    }
113    /// Gets a candidate name. This will be the name of the first file inserted into the collection and so can vary.
114    ///
115    /// # Examples
116    /// ```no_run
117    /// use std::path::Path;
118    /// use ddh::fileinfo::Fileinfo;
119    /// use std::fs;
120    ///
121    /// fn main() -> std::io::Result<()> {
122    /// let fi = Fileinfo::new(None, None, fs::metadata("./foo/bar.txt")?, Path::new("./foo/bar.txt").to_path_buf());
123    /// let some_name = fi.get_candidate_name();
124    /// assert_eq!("bar.txt", some_name);
125    /// Ok(())
126    /// }
127    /// ```
128    pub fn get_candidate_name(&self) -> &str {
129        self.file_paths
130            .get(0)
131            .unwrap()
132            .to_str()
133            .unwrap()
134            .rsplit('/')
135            .next()
136            .unwrap()
137    }
138    /// Gets all paths in the current collection. This can be used to get the names of each file with the string `rsplit("/")` method.
139    ///
140    /// # Examples
141    /// ```no_run
142    /// use std::path::Path;
143    /// use ddh::fileinfo::Fileinfo;
144    /// use std::fs;
145    ///
146    /// fn main() -> std::io::Result<()> {
147    /// let fi = Fileinfo::new(None, None, fs::metadata("./foo/bar.txt")?, Path::new("./foo/bar.txt").to_path_buf());
148    /// let all_files = fi.get_paths();
149    /// assert_eq!(&vec![Path::new("./foo/bar.txt").to_path_buf()],
150    ///            all_files);
151    /// Ok(())
152    /// }
153    /// ```
154    pub fn get_paths(&self) -> &Vec<PathBuf> {
155        &self.file_paths
156    }
157
158    pub fn generate_hash(&mut self, mode: HashMode) -> Option<u128> {
159        let mut hasher = siphasher::sip128::SipHasher::new();
160        match fs::File::open(
161            self.file_paths
162                .get(0)
163                .expect("Cannot read file path from struct"),
164        ) {
165            Ok(mut f) => {
166                /* We want a read call to be "large" for two reasons
167                1) Force filesystem read ahead behavior
168                2) Fewer system calls for a given file.
169                Currently 16KB  */
170                let mut hash_buffer = [0; BLOCK_SIZE * 4];
171                loop {
172                    match f.read(&mut hash_buffer) {
173                        Ok(n) if n > 0 => hasher.write(&hash_buffer),
174                        Ok(n) if n == 0 => break,
175                        Err(_e) => return None,
176                        _ => panic!("Negative length read in hashing"),
177                    }
178                    if mode == HashMode::Partial {
179                        return Some(hasher.finish128().into());
180                    }
181                }
182                Some(hasher.finish128().into())
183            }
184            Err(_e) => None,
185        }
186    }
187}
188
189impl Serialize for Fileinfo {
190    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
191    where
192        S: Serializer,
193    {
194        let mut state = serializer.serialize_struct("Fileinfo", 4)?;
195        state.serialize_field("partial_hash", &self.partial_hash)?;
196        state.serialize_field("full_hash", &self.full_hash)?;
197        state.serialize_field("file_length", &self.get_length())?;
198        state.serialize_field("file_paths", &self.file_paths)?;
199        state.end()
200    }
201}
202
203impl PartialEq for Fileinfo {
204    fn eq(&self, other: &Fileinfo) -> bool {
205        (self.get_length() == other.get_length())
206            && (self.partial_hash == other.partial_hash)
207            && (self.full_hash == other.full_hash)
208    }
209}
210impl Eq for Fileinfo {}
211
212impl PartialOrd for Fileinfo {
213    fn partial_cmp(&self, other: &Fileinfo) -> Option<Ordering> {
214        if self.full_hash.is_some() && other.full_hash.is_some() {
215            Some(self.full_hash.cmp(&other.full_hash))
216        } else if self.partial_hash.is_some() && other.partial_hash.is_some() {
217            Some(self.partial_hash.cmp(&other.partial_hash))
218        } else {
219            Some(self.get_length().cmp(&other.get_length()))
220        }
221    }
222}
223
224impl Ord for Fileinfo {
225    fn cmp(&self, other: &Fileinfo) -> Ordering {
226        if self.full_hash.is_some() && other.full_hash.is_some() {
227            self.full_hash.cmp(&other.full_hash)
228        } else if self.partial_hash.is_some() && other.partial_hash.is_some() {
229            self.partial_hash.cmp(&other.partial_hash)
230        } else {
231            self.get_length().cmp(&other.get_length())
232        }
233    }
234}