ddh/fileinfo.rs
1use serde::ser::{Serialize, SerializeStruct, Serializer};
2use siphasher::sip128::Hasher128;
3use std::cmp::Ordering;
4use std::fs::{self, Metadata};
5use std::hash::Hasher;
6use std::io::Read;
7use std::path::PathBuf;
8
9const BLOCK_SIZE: usize = 4096;
10
11#[derive(PartialEq)]
12pub enum HashMode {
13 Full,
14 Partial,
15}
16
17/// Serializable struct containing entries for a specific file. These structs will identify individual files as a collection of paths and associated hash and length data.
18#[derive(Debug)]
19pub struct Fileinfo {
20 full_hash: Option<u128>,
21 partial_hash: Option<u128>,
22 metadata: Metadata,
23 pub(crate) file_paths: Vec<PathBuf>,
24}
25
26impl Fileinfo {
27 /// Creates a new Fileinfo collection struct.
28 ///
29 /// # Examples
30 /// ```no_run
31 /// use std::path::Path;
32 /// use ddh::fileinfo::Fileinfo;
33 /// use std::fs;
34 ///
35 /// fn main() -> std::io::Result<()> {
36 /// Fileinfo::new(
37 /// None,
38 /// None,
39 /// fs::metadata("./foo/bar.txt")?,
40 /// Path::new("./foo/bar.txt").to_path_buf()
41 /// );
42 /// Ok(())
43 /// }
44 /// ```
45 pub fn new(full: Option<u128>, partial: Option<u128>, meta: Metadata, path: PathBuf) -> Self {
46 Fileinfo {
47 full_hash: full,
48 partial_hash: partial,
49 metadata: meta,
50 file_paths: vec![path],
51 }
52 }
53 /// Gets the length of the files in the current collection.
54 ///
55 /// # Examples
56 /// ```no_run
57 /// use std::path::Path;
58 /// use ddh::fileinfo::Fileinfo;
59 /// use std::fs;
60 ///
61 /// fn main() -> std::io::Result<()> {
62 /// let fi = Fileinfo::new(None, None, fs::metadata("./foo/bar.txt")?, Path::new("./foo/bar.txt").to_path_buf());
63 /// let len = fi.get_length();
64 /// assert_eq!(3, len);
65 /// Ok(())
66 /// }
67 /// ```
68 pub fn get_length(&self) -> u64 {
69 self.metadata.len()
70 }
71 /// Gets the hash of the full file if available.
72 ///
73 /// # Examples
74 /// ```no_run
75 /// use std::path::Path;
76 /// use ddh::fileinfo::Fileinfo;
77 /// use std::fs;
78 ///
79 /// fn main() -> std::io::Result<()> {
80 /// let fi = Fileinfo::new(Some(123), None, fs::metadata("./foo/bar.txt")?, Path::new("./foo/bar.txt").to_path_buf());
81 /// let f_hash = fi.get_full_hash();
82 /// assert_eq!(Some(123), f_hash);
83 /// Ok(())
84 /// }
85 /// ```
86 pub fn get_full_hash(&self) -> Option<u128> {
87 self.full_hash
88 }
89 pub(crate) fn set_full_hash(&mut self, hash: Option<u128>) {
90 self.full_hash = hash
91 }
92 /// Gets the hash of the partially read file if available.
93 ///
94 /// # Examples
95 /// ```no_run
96 /// use std::path::Path;
97 /// use ddh::fileinfo::Fileinfo;
98 /// use std::fs;
99 ///
100 /// fn main() -> std::io::Result<()> {
101 /// let fi = Fileinfo::new(None, Some(123), fs::metadata("./foo/bar.txt")?, Path::new("./foo/bar.txt").to_path_buf());
102 /// let p_hash = fi.get_partial_hash();
103 /// assert_eq!(Some(123), p_hash);
104 /// Ok(())
105 /// }
106 /// ```
107 pub fn get_partial_hash(&self) -> Option<u128> {
108 self.partial_hash
109 }
110 pub(crate) fn set_partial_hash(&mut self, hash: Option<u128>) {
111 self.partial_hash = hash
112 }
113 /// Gets a candidate name. This will be the name of the first file inserted into the collection and so can vary.
114 ///
115 /// # Examples
116 /// ```no_run
117 /// use std::path::Path;
118 /// use ddh::fileinfo::Fileinfo;
119 /// use std::fs;
120 ///
121 /// fn main() -> std::io::Result<()> {
122 /// let fi = Fileinfo::new(None, None, fs::metadata("./foo/bar.txt")?, Path::new("./foo/bar.txt").to_path_buf());
123 /// let some_name = fi.get_candidate_name();
124 /// assert_eq!("bar.txt", some_name);
125 /// Ok(())
126 /// }
127 /// ```
128 pub fn get_candidate_name(&self) -> &str {
129 self.file_paths
130 .get(0)
131 .unwrap()
132 .to_str()
133 .unwrap()
134 .rsplit('/')
135 .next()
136 .unwrap()
137 }
138 /// Gets all paths in the current collection. This can be used to get the names of each file with the string `rsplit("/")` method.
139 ///
140 /// # Examples
141 /// ```no_run
142 /// use std::path::Path;
143 /// use ddh::fileinfo::Fileinfo;
144 /// use std::fs;
145 ///
146 /// fn main() -> std::io::Result<()> {
147 /// let fi = Fileinfo::new(None, None, fs::metadata("./foo/bar.txt")?, Path::new("./foo/bar.txt").to_path_buf());
148 /// let all_files = fi.get_paths();
149 /// assert_eq!(&vec![Path::new("./foo/bar.txt").to_path_buf()],
150 /// all_files);
151 /// Ok(())
152 /// }
153 /// ```
154 pub fn get_paths(&self) -> &Vec<PathBuf> {
155 &self.file_paths
156 }
157
158 pub fn generate_hash(&mut self, mode: HashMode) -> Option<u128> {
159 let mut hasher = siphasher::sip128::SipHasher::new();
160 match fs::File::open(
161 self.file_paths
162 .get(0)
163 .expect("Cannot read file path from struct"),
164 ) {
165 Ok(mut f) => {
166 /* We want a read call to be "large" for two reasons
167 1) Force filesystem read ahead behavior
168 2) Fewer system calls for a given file.
169 Currently 16KB */
170 let mut hash_buffer = [0; BLOCK_SIZE * 4];
171 loop {
172 match f.read(&mut hash_buffer) {
173 Ok(n) if n > 0 => hasher.write(&hash_buffer),
174 Ok(n) if n == 0 => break,
175 Err(_e) => return None,
176 _ => panic!("Negative length read in hashing"),
177 }
178 if mode == HashMode::Partial {
179 return Some(hasher.finish128().into());
180 }
181 }
182 Some(hasher.finish128().into())
183 }
184 Err(_e) => None,
185 }
186 }
187}
188
189impl Serialize for Fileinfo {
190 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
191 where
192 S: Serializer,
193 {
194 let mut state = serializer.serialize_struct("Fileinfo", 4)?;
195 state.serialize_field("partial_hash", &self.partial_hash)?;
196 state.serialize_field("full_hash", &self.full_hash)?;
197 state.serialize_field("file_length", &self.get_length())?;
198 state.serialize_field("file_paths", &self.file_paths)?;
199 state.end()
200 }
201}
202
203impl PartialEq for Fileinfo {
204 fn eq(&self, other: &Fileinfo) -> bool {
205 (self.get_length() == other.get_length())
206 && (self.partial_hash == other.partial_hash)
207 && (self.full_hash == other.full_hash)
208 }
209}
210impl Eq for Fileinfo {}
211
212impl PartialOrd for Fileinfo {
213 fn partial_cmp(&self, other: &Fileinfo) -> Option<Ordering> {
214 if self.full_hash.is_some() && other.full_hash.is_some() {
215 Some(self.full_hash.cmp(&other.full_hash))
216 } else if self.partial_hash.is_some() && other.partial_hash.is_some() {
217 Some(self.partial_hash.cmp(&other.partial_hash))
218 } else {
219 Some(self.get_length().cmp(&other.get_length()))
220 }
221 }
222}
223
224impl Ord for Fileinfo {
225 fn cmp(&self, other: &Fileinfo) -> Ordering {
226 if self.full_hash.is_some() && other.full_hash.is_some() {
227 self.full_hash.cmp(&other.full_hash)
228 } else if self.partial_hash.is_some() && other.partial_hash.is_some() {
229 self.partial_hash.cmp(&other.partial_hash)
230 } else {
231 self.get_length().cmp(&other.get_length())
232 }
233 }
234}