backup_deduplicator/data/hash.rs
1use std::fmt;
2use std::fmt::Display;
3use std::path::Path;
4use std::str::FromStr;
5use serde::{Deserialize, Serialize, Serializer};
6use serde::de::Error;
7use const_format::concatcp;
8use crate::stages::build::intermediary_build_data::BuildFile;
9use crate::path::FilePath;
10#[cfg(any(feature = "hash-sha2", feature = "hash-sha1", feature = "hash-xxh"))]
11use crate::utils;
12
13
14/// `GeneralHashType` is an enum that represents the different types of hash functions that can be used.
15///
16/// The following hash functions are supported: SHA512, SHA256, SHA1, XXH64, XXH32, and NULL.
17///
18/// The `hasher` method returns a new instance of a `GeneralHasher` trait object that corresponds to the hash type.
19/// The `hasher` can then be used to compute a hash of that kind.
20///
21/// # Traits
22/// * `FromStr` - to allow parsing a string into a `GeneralHashType`.
23/// * `Display` - to allow formatting a `GeneralHashType` into a string.
24///
25/// # Examples
26/// ```
27/// use std::str::FromStr;
28/// use backup_deduplicator::hash::GeneralHashType;
29///
30/// #[cfg(feature = "hash-sha2")]
31/// {
32/// let hash_type = GeneralHashType::from_str("SHA256").unwrap();
33/// let mut hasher = hash_type.hasher();
34/// hasher.update(b"Hello, world!".as_slice());
35///
36/// assert_eq!(hash_type.to_string(), "SHA256");
37///
38/// let hash = hasher.finalize();
39/// assert_eq!(hash.to_string(), "SHA256:315f5bdb76d078c43b8ac0064e4a0164612b1fce77c869345bfc94c75894edd3");
40/// assert_eq!(hash_type, GeneralHashType::SHA256);
41/// }
42///
43/// ```
44///
45/// # See also
46/// * [GeneralHash] - representation of a hash value.
47/// * [GeneralHasher] - trait for computing hash values.
48///
49/// # Features
50/// * `hash-sha2` - enables the SHA512 and SHA256 hash functions.
51/// * `hash-sha1` - enables the SHA1 hash function.
52/// * `hash-xxh` - enables the XXH64 and XXH32 hash functions.
53#[derive(Debug, Hash, PartialEq, Clone, Copy, Serialize, Deserialize)]
54pub enum GeneralHashType {
55 #[cfg(feature = "hash-sha2")]
56 SHA512,
57 #[cfg(feature = "hash-sha2")]
58 SHA256,
59 #[cfg(feature = "hash-sha1")]
60 SHA1,
61 #[cfg(feature = "hash-xxh")]
62 XXH64,
63 #[cfg(feature = "hash-xxh")]
64 XXH32,
65 NULL,
66}
67
68impl GeneralHashType {
69 /// Returns a new instance of a `GeneralHasher` trait object that corresponds to the hash type.
70 /// The `hasher` can then be used to compute a hash of that kind.
71 ///
72 /// # Returns
73 /// A new instance of a `GeneralHasher` trait object.
74 ///
75 /// # Examples
76 /// See the example in the `GeneralHashType` documentation.
77 ///
78 /// # Features
79 /// * `hash-sha2` - enables the SHA512 and SHA256 hash functions.
80 /// * `hash-sha1` - enables the SHA1 hash function.
81 /// * `hash-xxh` - enables the XXH64 and XXH32 hash functions.
82 pub fn hasher(&self) -> Box<dyn GeneralHasher> {
83 match self {
84 #[cfg(feature = "hash-sha2")]
85 GeneralHashType::SHA512 => Box::new(sha2::Sha512Hasher::new()),
86 #[cfg(feature = "hash-sha2")]
87 GeneralHashType::SHA256 => Box::new(sha2::Sha256Hasher::new()),
88 #[cfg(feature = "hash-sha1")]
89 GeneralHashType::SHA1 => Box::new(sha1::Sha1Hasher::new()),
90 #[cfg(feature = "hash-xxh")]
91 GeneralHashType::XXH64 => Box::new(xxh::Xxh64Hasher::new()),
92 #[cfg(feature = "hash-xxh")]
93 GeneralHashType::XXH32 => Box::new(xxh::Xxh32Hasher::new()),
94 GeneralHashType::NULL => Box::new(null::NullHasher::new()),
95 }
96 }
97}
98
99impl GeneralHashType {
100 /// Returns the available hash types as a string.
101 ///
102 /// # Returns
103 /// The available hash types as a string.
104 ///
105 /// # Examples
106 /// ```
107 /// use backup_deduplicator::hash::GeneralHashType;
108 ///
109 /// let supported = GeneralHashType::supported_algorithms();
110 /// println!("Supported algorithms: {}", supported);
111 /// ```
112 pub const fn supported_algorithms() -> &'static str {
113 const SHA2: &'static str = if cfg!(feature = "hash-sha2") { "SHA512, SHA256, " } else { "" };
114 const SHA1: &'static str = if cfg!(feature = "hash-sha1") { "SHA1, " } else { "" };
115 const XXH: &'static str = if cfg!(feature = "hash-xxh") { "XXH64, XXH32, " } else { "" };
116 const NULL: &'static str = "NULL";
117
118 concatcp!(SHA2, SHA1, XXH, NULL)
119 }
120}
121
122impl FromStr for GeneralHashType {
123 /// Error type for parsing a `GeneralHashType` from a string.
124 type Err = &'static str;
125
126 /// Parses a string into a `GeneralHashType`.
127 ///
128 /// # Arguments
129 /// * `s` - The string to parse.
130 ///
131 /// # Returns
132 /// The `GeneralHashType` that corresponds to the string or an error.
133 ///
134 /// # Errors
135 /// Returns an error if the string does not correspond to a `GeneralHashType`.
136 /// Returns the available hash types in the error message.
137 fn from_str(s: &str) -> Result<Self, Self::Err> {
138 match s.to_uppercase().as_str() {
139 #[cfg(feature = "hash-sha2")]
140 "SHA512" => Ok(GeneralHashType::SHA512),
141 #[cfg(feature = "hash-sha2")]
142 "SHA256" => Ok(GeneralHashType::SHA256),
143 #[cfg(feature = "hash-sha1")]
144 "SHA1" => Ok(GeneralHashType::SHA1),
145 #[cfg(feature = "hash-xxh")]
146 "XXH64" => Ok(GeneralHashType::XXH64),
147 #[cfg(feature = "hash-xxh")]
148 "XXH32" => Ok(GeneralHashType::XXH32),
149 "NULL" => Ok(GeneralHashType::NULL),
150 _ => Err(GeneralHashType::supported_algorithms()),
151 }
152 }
153}
154
155impl Display for GeneralHashType {
156 /// Converts a `GeneralHashType` into a string.
157 ///
158 /// # Arguments
159 /// * `f` - The formatter to write to.
160 ///
161 /// # Returns
162 /// A result indicating whether the operation was successful.
163 ///
164 /// # Errors
165 /// Never
166 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
167 match self {
168 #[cfg(feature = "hash-sha2")]
169 GeneralHashType::SHA512 => write!(f, "SHA512"),
170 #[cfg(feature = "hash-sha2")]
171 GeneralHashType::SHA256 => write!(f, "SHA256"),
172 #[cfg(feature = "hash-sha1")]
173 GeneralHashType::SHA1 => write!(f, "SHA1"),
174 #[cfg(feature = "hash-xxh")]
175 GeneralHashType::XXH64 => write!(f, "XXH64"),
176 #[cfg(feature = "hash-xxh")]
177 GeneralHashType::XXH32 => write!(f, "XXH32"),
178 GeneralHashType::NULL => write!(f, "NULL"),
179 }
180 }
181}
182
183/// `GeneralHash` is an enum that represents a hash value.
184///
185/// The hash value is stored as a byte array of a fixed size.
186/// The size of the byte array depends on the hash function used.
187///
188/// The following hash functions are supported: SHA512, SHA256, SHA1, XXH64, XXH32, and NULL.
189///
190/// The `hash_type` method returns the type of the hash function used.
191/// The `hasher` method returns a new instance of a `GeneralHasher` trait object that corresponds to the hash type.
192/// The `hasher` can then be used to compute a hash of that kind.
193///
194/// # Traits
195/// * `Display` - to allow formatting a `GeneralHash` into a string.
196/// * `FromStr` - to allow parsing a string into a `GeneralHash`.
197/// * `Serialize` - to allow serializing a `GeneralHash` into a string.
198/// * `Deserialize` - to allow deserializing a `GeneralHash` from a string.
199///
200/// # Examples
201/// ```
202/// use std::str::FromStr;
203/// use backup_deduplicator::hash::{GeneralHash, GeneralHashType};
204///
205/// #[cfg(feature = "hash-sha2")]
206/// {
207/// let hash = GeneralHash::from_str("SHA256:315f5bdb76d078c43b8ac0064e4a0164612b1fce77c869345bfc94c75894edd3").unwrap();
208///
209/// let mut hasher = GeneralHashType::SHA256.hasher();
210/// hasher.update(b"Hello, world!".as_slice());
211/// let new_hash = hasher.finalize();
212///
213/// assert_eq!(hash, new_hash);
214/// assert_eq!(hash.to_string(), new_hash.to_string());
215/// }
216/// ```
217///
218/// # See also
219/// * [GeneralHashType] - representation of the different types of hash functions.
220/// * [GeneralHasher] - trait for computing hash values.
221///
222#[derive(Debug, Hash, PartialEq, Eq, Clone, PartialOrd)]
223pub enum GeneralHash {
224 #[cfg(feature = "hash-sha2")]
225 SHA512([u8; 64]),
226 #[cfg(feature = "hash-sha2")]
227 SHA256([u8; 32]),
228 #[cfg(feature = "hash-sha1")]
229 SHA1([u8; 20]),
230 #[cfg(feature = "hash-xxh")]
231 XXH64([u8; 8]),
232 #[cfg(feature = "hash-xxh")]
233 XXH32([u8; 4]),
234 NULL,
235}
236
237impl Display for GeneralHash {
238 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
239 let capacity = match self {
240 #[cfg(feature = "hash-sha2")]
241 GeneralHash::SHA512(_) => 128,
242 #[cfg(feature = "hash-sha2")]
243 GeneralHash::SHA256(_) => 64,
244 #[cfg(feature = "hash-sha1")]
245 GeneralHash::SHA1(_) => 40,
246 #[cfg(feature = "hash-xxh")]
247 GeneralHash::XXH64(_) => 16,
248 #[cfg(feature = "hash-xxh")]
249 GeneralHash::XXH32(_) => 8,
250 GeneralHash::NULL => 0,
251 };
252
253 let mut hex = String::with_capacity(capacity + 1 + 6);
254
255 hex.push_str((self.hash_type().to_string() + ":").as_str());
256
257 match self {
258 #[cfg(feature = "hash-sha2")]
259 GeneralHash::SHA512(data) => for byte in data {
260 hex.push_str(&format!("{:02x}", byte));
261 },
262 #[cfg(feature = "hash-sha2")]
263 GeneralHash::SHA256(data) => for byte in data {
264 hex.push_str(&format!("{:02x}", byte));
265 },
266 #[cfg(feature = "hash-sha1")]
267 GeneralHash::SHA1(data) => for byte in data {
268 hex.push_str(&format!("{:02x}", byte));
269 },
270 #[cfg(feature = "hash-xxh")]
271 GeneralHash::XXH64(data) => for byte in data {
272 hex.push_str(&format!("{:02x}", byte));
273 },
274 #[cfg(feature = "hash-xxh")]
275 GeneralHash::XXH32(data) => for byte in data {
276 hex.push_str(&format!("{:02x}", byte));
277 },
278 GeneralHash::NULL => {
279 hex.push_str("00");
280 }
281 }
282
283 write!(f, "{}", hex)
284 }
285}
286
287impl Serialize for GeneralHash {
288 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: Serializer {
289 serializer.serialize_str(self.to_string().as_str())
290 }
291}
292
293impl FromStr for GeneralHash {
294 // Error type for parsing a `GeneralHash` from a string.
295 type Err = &'static str;
296
297 /// Parses a string into a `GeneralHash`.
298 ///
299 /// # Arguments
300 /// * `hex` - The string to parse, in the format `hash_type:hash_data (hex)`.
301 ///
302 /// # Returns
303 /// The `GeneralHash` that corresponds to the string or an error.
304 ///
305 /// # Errors
306 /// Returns an error if the string does not correspond to a `GeneralHash`.
307 /// * If the hash type is not recognized.
308 /// * If the hash data is not valid (wrong length or non-hex string).
309 fn from_str(hex: &str) -> Result<Self, Self::Err> {
310 let mut iter = hex.split(':');
311 let hash_type = GeneralHashType::from_str(iter.next().ok_or_else(|| "No hash type")?).map_err(|_| "Failed to parse hash type")?;
312
313 #[cfg(any(feature = "hash-sha2", feature = "hash-sha1", feature = "hash-xxh"))]
314 let data = match hash_type {
315 GeneralHashType::NULL => Vec::new(),
316 _ => {
317 let data = iter.next().ok_or_else(|| "No hash data")?;
318 utils::decode_hex(data).map_err(|_| "Failed to decode hash data")?
319 }
320 };
321
322 let mut hash = GeneralHash::from_type(hash_type);
323 match &mut hash {
324 #[cfg(feature = "hash-sha2")]
325 GeneralHash::SHA512(target_data) => {
326 if data.len() != 64 {
327 return Err("Invalid data length");
328 }
329 target_data.copy_from_slice(&data);
330 },
331 #[cfg(feature = "hash-sha2")]
332 GeneralHash::SHA256(target_data) => {
333 if data.len() != 32 {
334 return Err("Invalid data length");
335 }
336 target_data.copy_from_slice(&data);
337 },
338 #[cfg(feature = "hash-sha1")]
339 GeneralHash::SHA1(target_data) => {
340 if data.len() != 20 {
341 return Err("Invalid data length");
342 }
343 target_data.copy_from_slice(&data);
344 },
345 #[cfg(feature = "hash-xxh")]
346 GeneralHash::XXH64(target_data) => {
347 if data.len() != 8 {
348 return Err("Invalid data length");
349 }
350 target_data.copy_from_slice(&data);
351 },
352 #[cfg(feature = "hash-xxh")]
353 GeneralHash::XXH32(target_data) => {
354 if data.len() != 4 {
355 return Err("Invalid data length");
356 }
357 target_data.copy_from_slice(&data);
358 },
359 GeneralHash::NULL => {}
360 }
361 Ok(hash)
362 }
363}
364
365impl<'de> Deserialize<'de> for GeneralHash {
366 /// Deserializes a `GeneralHash` from a string.
367 ///
368 /// # Arguments
369 /// * `deserializer` - The deserializer to use.
370 ///
371 /// # Returns
372 /// The deserialized `GeneralHash`.
373 ///
374 /// # Errors
375 /// If the string could not be deserialized.
376 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
377 where
378 D: serde::Deserializer<'de> {
379 let hex = String::deserialize(deserializer)?;
380 GeneralHash::from_str(hex.as_str()).map_err(D::Error::custom)
381 }
382}
383
384impl GeneralHash {
385 /// Returns the hash value as a byte array.
386 ///
387 /// # Returns
388 /// A reference to the byte array that represents the hash value.
389 pub fn as_bytes(&self) -> &[u8] {
390 match self {
391 #[cfg(feature = "hash-sha2")]
392 GeneralHash::SHA512(data) => data,
393 #[cfg(feature = "hash-sha2")]
394 GeneralHash::SHA256(data) => data,
395 #[cfg(feature = "hash-sha1")]
396 GeneralHash::SHA1(data) => data,
397 #[cfg(feature = "hash-xxh")]
398 GeneralHash::XXH64(data) => data,
399 #[cfg(feature = "hash-xxh")]
400 GeneralHash::XXH32(data) => data,
401 GeneralHash::NULL => &[0; 0],
402 }
403 }
404
405 #[cfg(feature = "hash-sha2")]
406 /// Returns a new instance of a SHA512 hash value.
407 pub fn new_sha512() -> Self { Self::from_type(GeneralHashType::SHA512) }
408
409 #[cfg(feature = "hash-sha2")]
410 /// Returns a new instance of a SHA256 hash value.
411 pub fn new_sha256() -> Self { Self::from_type(GeneralHashType::SHA256) }
412
413 #[cfg(feature = "hash-sha1")]
414 /// Returns a new instance of a SHA1 hash value.
415 pub fn new_sha1() -> Self { Self::from_type(GeneralHashType::SHA1) }
416
417 #[cfg(feature = "hash-xxh")]
418 /// Returns a new instance of a XXH64 hash value.
419 pub fn new_xxh64() -> Self { Self::from_type(GeneralHashType::XXH64) }
420
421 #[cfg(feature = "hash-xxh")]
422 /// Returns a new instance of a XXH32 hash value.
423 pub fn new_xxh32() -> Self { Self::from_type(GeneralHashType::XXH32) }
424
425 /// Returns the type of the hash function used.
426 ///
427 /// # Returns
428 /// The type of the hash function used.
429 ///
430 /// # Examples
431 /// ```
432 /// use backup_deduplicator::hash::{GeneralHash, GeneralHashType};
433 ///
434 /// #[cfg(feature = "hash-sha2")]
435 /// {
436 /// let hash = GeneralHash::new_sha256();
437 //
438 // let m = match hash.hash_type() {
439 // GeneralHashType::SHA256 => true,
440 // _ => false,
441 // };
442 //
443 // assert!(m);
444 /// }
445 /// ```
446 pub fn hash_type(&self) -> GeneralHashType {
447 match self {
448 #[cfg(feature = "hash-sha2")]
449 GeneralHash::SHA512(_) => GeneralHashType::SHA512,
450 #[cfg(feature = "hash-sha2")]
451 GeneralHash::SHA256(_) => GeneralHashType::SHA256,
452 #[cfg(feature = "hash-sha1")]
453 GeneralHash::SHA1(_) => GeneralHashType::SHA1,
454 #[cfg(feature = "hash-xxh")]
455 GeneralHash::XXH64(_) => GeneralHashType::XXH64,
456 #[cfg(feature = "hash-xxh")]
457 GeneralHash::XXH32(_) => GeneralHashType::XXH32,
458 GeneralHash::NULL => GeneralHashType::NULL,
459 }
460 }
461
462 /// Returns a new instance of a `GeneralHash` with the specified hash type.
463 ///
464 /// # Arguments
465 /// * `hash_type` - The type of the hash function to use.
466 ///
467 /// # Returns
468 /// A new instance of a `GeneralHash` with the specified hash type.
469 pub fn from_type(hash_type: GeneralHashType) -> Self {
470 match hash_type {
471 #[cfg(feature = "hash-sha2")]
472 GeneralHashType::SHA512 => GeneralHash::SHA512([0; 64]),
473 #[cfg(feature = "hash-sha2")]
474 GeneralHashType::SHA256 => GeneralHash::SHA256([0; 32]),
475 #[cfg(feature = "hash-sha1")]
476 GeneralHashType::SHA1 => GeneralHash::SHA1([0; 20]),
477 #[cfg(feature = "hash-xxh")]
478 GeneralHashType::XXH64 => GeneralHash::XXH64([0; 8]),
479 #[cfg(feature = "hash-xxh")]
480 GeneralHashType::XXH32 => GeneralHash::XXH32([0; 4]),
481 GeneralHashType::NULL => GeneralHash::NULL,
482 }
483 }
484
485 /// Returns a new instance of a `GeneralHash` with the specified hash type.
486 ///
487 /// # Arguments
488 /// * `hash_type` - The type of the hash function to use.
489 ///
490 /// # Returns
491 /// A new instance of a `GeneralHash` with the specified hash type.
492 ///
493 /// # See also
494 /// * [GeneralHashType] - representation of the different types of hash functions.
495 pub fn hasher(&self) -> Box<dyn GeneralHasher> {
496 self.hash_type().hasher()
497 }
498
499 /// Computes the hash value of the specified data.
500 ///
501 /// # Arguments
502 /// * `reader` - The data to hash (supplied as `std::io::Read`).
503 ///
504 /// # Returns
505 /// The size of the data that was hashed.
506 ///
507 /// # Errors
508 /// Returns an error if the data could not be read.
509 pub fn hash_file<T>(&mut self, mut reader: T) -> anyhow::Result<u64>
510 where T: std::io::Read {
511
512 let mut hasher = self.hasher();
513 let mut buffer = [0; 4096];
514 let mut content_size = 0;
515
516 loop {
517 let bytes_read = reader.read(&mut buffer)?;
518 content_size += bytes_read as u64;
519 if bytes_read == 0 {
520 break;
521 }
522 hasher.update(&buffer[..bytes_read]);
523 }
524
525 *self = hasher.finalize();
526
527 Ok(content_size)
528 }
529
530 /// Computes the hash value of file iterator/directory.
531 ///
532 /// # Arguments
533 /// * `children` - The iterator of files to hash.
534 ///
535 /// # Returns
536 /// The count of files that were hashed.
537 ///
538 /// # Errors
539 /// Does not return an error. Might return an error in the future.
540 pub fn hash_directory<'a>(&mut self, children: impl Iterator<Item = &'a BuildFile>) -> anyhow::Result<u64> {
541 let mut hasher = self.hasher();
542
543 let mut content_size = 0;
544
545 for child in children {
546 content_size += 1;
547 hasher.update(child.get_content_hash().as_bytes());
548 }
549
550 *self = hasher.finalize();
551
552 Ok(content_size)
553 }
554
555 /// Computes the hash value of the specified path.
556 ///
557 /// # Arguments
558 /// * `path` - The path to hash.
559 ///
560 /// # Returns
561 /// Does not return a value.
562 ///
563 /// # Errors
564 /// Does not return an error. Might return an error in the future.
565 pub fn hash_path(&mut self, path: &Path) -> anyhow::Result<()> {
566 let mut hasher = self.hasher();
567
568 hasher.update(path.as_os_str().as_encoded_bytes());
569
570 *self = hasher.finalize();
571
572 Ok(())
573 }
574
575 /// Computes the hash value of the specified file path.
576 ///
577 /// # Arguments
578 /// * `path` - The file path to hash.
579 ///
580 /// # Returns
581 /// Does not return a value.
582 ///
583 /// # Errors
584 /// Does not return an error. Might return an error in the future.
585 pub fn hash_filepath(&mut self, path: &FilePath) -> anyhow::Result<()> {
586 let mut hasher = self.hasher();
587
588 for component in &path.path {
589 hasher.update(component.path.as_os_str().as_encoded_bytes());
590 }
591
592 *self = hasher.finalize();
593
594 Ok(())
595 }
596}
597
598/// `GeneralHasher` is a trait for computing hash values.
599///
600/// # Methods
601/// * `new` - creates a new instance of a `GeneralHasher`.
602/// * `update` - updates the hash value with the specified data.
603/// * `finalize` - finalizes the hash value and returns the result.
604///
605/// # Examples
606/// See the example in the `GeneralHash` documentation.
607///
608/// # See also
609/// * [GeneralHash] - representation of a hash value.
610/// * [GeneralHashType] - representation of the different types of hash functions.
611pub trait GeneralHasher {
612 /// Creates a new instance of a `GeneralHasher`.
613 ///
614 /// # Returns
615 /// A new instance of a `GeneralHasher`.
616 fn new() -> Self where Self: Sized;
617
618 /// Updates the hash value with the specified data.
619 ///
620 /// # Arguments
621 /// * `data` - The data to hash.
622 fn update(&mut self, data: &[u8]);
623
624 /// Finalizes the hash value and returns the result.
625 /// Consumes the `GeneralHasher` instance.
626 ///
627 /// # Returns
628 /// The hash value.
629 fn finalize(self: Box<Self>) -> GeneralHash;
630}
631
632#[cfg(feature = "hash-sha1")]
633/// `GeneralHasher` implementation for the SHA1 crate
634mod sha1;
635#[cfg(feature = "hash-sha2")]
636/// `GeneralHasher` implementation for the SHA2 crate
637mod sha2;
638#[cfg(feature = "hash-xxh")]
639/// `GeneralHasher` implementation for the XXH crate
640mod xxh;
641/// `GeneralHasher` implementation for the NULL hash function
642mod null;