backup_deduplicator/data/
hash.rs

1use std::fmt;
2use std::fmt::Display;
3use std::path::Path;
4use std::str::FromStr;
5use serde::{Deserialize, Serialize, Serializer};
6use serde::de::Error;
7use const_format::concatcp;
8use crate::stages::build::intermediary_build_data::BuildFile;
9use crate::path::FilePath;
10#[cfg(any(feature = "hash-sha2", feature = "hash-sha1", feature = "hash-xxh"))]
11use crate::utils;
12
13
14/// `GeneralHashType` is an enum that represents the different types of hash functions that can be used.
15///
16/// The following hash functions are supported: SHA512, SHA256, SHA1, XXH64, XXH32, and NULL.
17///
18/// The `hasher` method returns a new instance of a `GeneralHasher` trait object that corresponds to the hash type.
19/// The `hasher` can then be used to compute a hash of that kind.
20///
21/// # Traits
22/// * `FromStr` - to allow parsing a string into a `GeneralHashType`.
23/// * `Display` - to allow formatting a `GeneralHashType` into a string.
24///
25/// # Examples
26/// ```
27/// use std::str::FromStr;
28/// use backup_deduplicator::hash::GeneralHashType;
29///
30/// #[cfg(feature = "hash-sha2")]
31/// {
32/// let hash_type = GeneralHashType::from_str("SHA256").unwrap();
33/// let mut hasher = hash_type.hasher();
34/// hasher.update(b"Hello, world!".as_slice());
35///
36/// assert_eq!(hash_type.to_string(), "SHA256");
37///
38/// let hash = hasher.finalize();
39/// assert_eq!(hash.to_string(), "SHA256:315f5bdb76d078c43b8ac0064e4a0164612b1fce77c869345bfc94c75894edd3");
40/// assert_eq!(hash_type, GeneralHashType::SHA256);
41/// }
42///
43/// ```
44///
45/// # See also
46/// * [GeneralHash] - representation of a hash value.
47/// * [GeneralHasher] - trait for computing hash values.
48///
49/// # Features
50/// * `hash-sha2` - enables the SHA512 and SHA256 hash functions.
51/// * `hash-sha1` - enables the SHA1 hash function.
52/// * `hash-xxh` - enables the XXH64 and XXH32 hash functions.
53#[derive(Debug, Hash, PartialEq, Clone, Copy, Serialize, Deserialize)]
54pub enum GeneralHashType {
55    #[cfg(feature = "hash-sha2")]
56    SHA512,
57    #[cfg(feature = "hash-sha2")]
58    SHA256,
59    #[cfg(feature = "hash-sha1")]
60    SHA1,
61    #[cfg(feature = "hash-xxh")]
62    XXH64,
63    #[cfg(feature = "hash-xxh")]
64    XXH32,
65    NULL,
66}
67
68impl GeneralHashType {
69    /// Returns a new instance of a `GeneralHasher` trait object that corresponds to the hash type.
70    /// The `hasher` can then be used to compute a hash of that kind.
71    ///
72    /// # Returns
73    /// A new instance of a `GeneralHasher` trait object.
74    ///
75    /// # Examples
76    /// See the example in the `GeneralHashType` documentation.
77    ///
78    /// # Features
79    /// * `hash-sha2` - enables the SHA512 and SHA256 hash functions.
80    /// * `hash-sha1` - enables the SHA1 hash function.
81    /// * `hash-xxh` - enables the XXH64 and XXH32 hash functions.
82    pub fn hasher(&self) -> Box<dyn GeneralHasher> {
83        match self {
84            #[cfg(feature = "hash-sha2")]
85            GeneralHashType::SHA512 => Box::new(sha2::Sha512Hasher::new()),
86            #[cfg(feature = "hash-sha2")]
87            GeneralHashType::SHA256 => Box::new(sha2::Sha256Hasher::new()),
88            #[cfg(feature = "hash-sha1")]
89            GeneralHashType::SHA1 => Box::new(sha1::Sha1Hasher::new()),
90            #[cfg(feature = "hash-xxh")]
91            GeneralHashType::XXH64 => Box::new(xxh::Xxh64Hasher::new()),
92            #[cfg(feature = "hash-xxh")]
93            GeneralHashType::XXH32 => Box::new(xxh::Xxh32Hasher::new()),
94            GeneralHashType::NULL => Box::new(null::NullHasher::new()),
95        }
96    }
97}
98
99impl GeneralHashType {
100    /// Returns the available hash types as a string.
101    /// 
102    /// # Returns
103    /// The available hash types as a string.
104    /// 
105    /// # Examples
106    /// ```
107    /// use backup_deduplicator::hash::GeneralHashType;
108    /// 
109    /// let supported = GeneralHashType::supported_algorithms();
110    /// println!("Supported algorithms: {}", supported);
111    /// ```
112    pub const fn supported_algorithms() -> &'static str {
113        const SHA2: &'static str = if cfg!(feature = "hash-sha2") { "SHA512, SHA256, " } else { "" };
114        const SHA1: &'static str = if cfg!(feature = "hash-sha1") { "SHA1, " } else { "" };
115        const XXH: &'static str = if cfg!(feature = "hash-xxh") { "XXH64, XXH32, " } else { "" };
116        const NULL: &'static str = "NULL";
117        
118        concatcp!(SHA2, SHA1, XXH, NULL)
119    }
120}
121
122impl FromStr for GeneralHashType {
123    /// Error type for parsing a `GeneralHashType` from a string.
124    type Err = &'static str;
125
126    /// Parses a string into a `GeneralHashType`.
127    /// 
128    /// # Arguments
129    /// * `s` - The string to parse.
130    /// 
131    /// # Returns
132    /// The `GeneralHashType` that corresponds to the string or an error.
133    /// 
134    /// # Errors
135    /// Returns an error if the string does not correspond to a `GeneralHashType`.
136    /// Returns the available hash types in the error message.
137    fn from_str(s: &str) -> Result<Self, Self::Err> {
138        match s.to_uppercase().as_str() {
139            #[cfg(feature = "hash-sha2")]
140            "SHA512" => Ok(GeneralHashType::SHA512),
141            #[cfg(feature = "hash-sha2")]
142            "SHA256" => Ok(GeneralHashType::SHA256),
143            #[cfg(feature = "hash-sha1")]
144            "SHA1" => Ok(GeneralHashType::SHA1),
145            #[cfg(feature = "hash-xxh")]
146            "XXH64" => Ok(GeneralHashType::XXH64),
147            #[cfg(feature = "hash-xxh")]
148            "XXH32" => Ok(GeneralHashType::XXH32),
149            "NULL" => Ok(GeneralHashType::NULL),
150            _ => Err(GeneralHashType::supported_algorithms()),
151        }
152    }
153}
154
155impl Display for GeneralHashType {
156    /// Converts a `GeneralHashType` into a string.
157    /// 
158    /// # Arguments
159    /// * `f` - The formatter to write to.
160    /// 
161    /// # Returns
162    /// A result indicating whether the operation was successful.
163    /// 
164    /// # Errors
165    /// Never
166    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
167        match self {
168            #[cfg(feature = "hash-sha2")]
169            GeneralHashType::SHA512 => write!(f, "SHA512"),
170            #[cfg(feature = "hash-sha2")]
171            GeneralHashType::SHA256 => write!(f, "SHA256"),
172            #[cfg(feature = "hash-sha1")]
173            GeneralHashType::SHA1 => write!(f, "SHA1"),
174            #[cfg(feature = "hash-xxh")]
175            GeneralHashType::XXH64 => write!(f, "XXH64"),
176            #[cfg(feature = "hash-xxh")]
177            GeneralHashType::XXH32 => write!(f, "XXH32"),
178            GeneralHashType::NULL => write!(f, "NULL"),
179        }
180    }
181}
182
183/// `GeneralHash` is an enum that represents a hash value.
184///
185/// The hash value is stored as a byte array of a fixed size.
186/// The size of the byte array depends on the hash function used.
187///
188/// The following hash functions are supported: SHA512, SHA256, SHA1, XXH64, XXH32, and NULL.
189///
190/// The `hash_type` method returns the type of the hash function used.
191/// The `hasher` method returns a new instance of a `GeneralHasher` trait object that corresponds to the hash type.
192/// The `hasher` can then be used to compute a hash of that kind.
193///
194/// # Traits
195/// * `Display` - to allow formatting a `GeneralHash` into a string.
196/// * `FromStr` - to allow parsing a string into a `GeneralHash`.
197/// * `Serialize` - to allow serializing a `GeneralHash` into a string.
198/// * `Deserialize` - to allow deserializing a `GeneralHash` from a string.
199///
200/// # Examples
201/// ```
202/// use std::str::FromStr;
203/// use backup_deduplicator::hash::{GeneralHash, GeneralHashType};
204///
205/// #[cfg(feature = "hash-sha2")]
206/// {
207/// let hash = GeneralHash::from_str("SHA256:315f5bdb76d078c43b8ac0064e4a0164612b1fce77c869345bfc94c75894edd3").unwrap();
208///
209/// let mut hasher = GeneralHashType::SHA256.hasher();
210/// hasher.update(b"Hello, world!".as_slice());
211/// let new_hash = hasher.finalize();
212///
213/// assert_eq!(hash, new_hash);
214/// assert_eq!(hash.to_string(), new_hash.to_string());
215/// }
216/// ```
217///
218/// # See also
219/// * [GeneralHashType] - representation of the different types of hash functions.
220/// * [GeneralHasher] - trait for computing hash values.
221///
222#[derive(Debug, Hash, PartialEq, Eq, Clone, PartialOrd)]
223pub enum GeneralHash {
224    #[cfg(feature = "hash-sha2")]
225    SHA512([u8; 64]),
226    #[cfg(feature = "hash-sha2")]
227    SHA256([u8; 32]),
228    #[cfg(feature = "hash-sha1")]
229    SHA1([u8; 20]),
230    #[cfg(feature = "hash-xxh")]
231    XXH64([u8; 8]),
232    #[cfg(feature = "hash-xxh")]
233    XXH32([u8; 4]),
234    NULL,
235}
236
237impl Display for GeneralHash {
238    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
239        let capacity = match self {
240            #[cfg(feature = "hash-sha2")]
241            GeneralHash::SHA512(_) => 128,
242            #[cfg(feature = "hash-sha2")]
243            GeneralHash::SHA256(_) => 64,
244            #[cfg(feature = "hash-sha1")]
245            GeneralHash::SHA1(_) => 40,
246            #[cfg(feature = "hash-xxh")]
247            GeneralHash::XXH64(_) => 16,
248            #[cfg(feature = "hash-xxh")]
249            GeneralHash::XXH32(_) => 8,
250            GeneralHash::NULL => 0,
251        };
252
253        let mut hex = String::with_capacity(capacity + 1 + 6);
254
255        hex.push_str((self.hash_type().to_string() + ":").as_str());
256
257        match self {
258            #[cfg(feature = "hash-sha2")]
259            GeneralHash::SHA512(data) => for byte in data {
260                hex.push_str(&format!("{:02x}", byte));
261            },
262            #[cfg(feature = "hash-sha2")]
263            GeneralHash::SHA256(data) => for byte in data {
264                hex.push_str(&format!("{:02x}", byte));
265            },
266            #[cfg(feature = "hash-sha1")]
267            GeneralHash::SHA1(data) => for byte in data {
268                hex.push_str(&format!("{:02x}", byte));
269            },
270            #[cfg(feature = "hash-xxh")]
271            GeneralHash::XXH64(data) => for byte in data {
272                hex.push_str(&format!("{:02x}", byte));
273            },
274            #[cfg(feature = "hash-xxh")]
275            GeneralHash::XXH32(data) => for byte in data {
276                hex.push_str(&format!("{:02x}", byte));
277            },
278            GeneralHash::NULL => {
279                hex.push_str("00");
280            }
281        }
282
283        write!(f, "{}", hex)
284    }
285}
286
287impl Serialize for GeneralHash {
288    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: Serializer {
289        serializer.serialize_str(self.to_string().as_str())
290    }
291}
292
293impl FromStr for GeneralHash {
294    // Error type for parsing a `GeneralHash` from a string.
295    type Err = &'static str;
296
297    /// Parses a string into a `GeneralHash`.
298    /// 
299    /// # Arguments
300    /// * `hex` - The string to parse, in the format `hash_type:hash_data (hex)`.
301    /// 
302    /// # Returns
303    /// The `GeneralHash` that corresponds to the string or an error.
304    /// 
305    /// # Errors
306    /// Returns an error if the string does not correspond to a `GeneralHash`.
307    /// * If the hash type is not recognized.
308    /// * If the hash data is not valid (wrong length or non-hex string).
309    fn from_str(hex: &str) -> Result<Self, Self::Err> {
310        let mut iter = hex.split(':');
311        let hash_type = GeneralHashType::from_str(iter.next().ok_or_else(|| "No hash type")?).map_err(|_| "Failed to parse hash type")?;
312        
313        #[cfg(any(feature = "hash-sha2", feature = "hash-sha1", feature = "hash-xxh"))]
314        let data = match hash_type { 
315            GeneralHashType::NULL => Vec::new(),
316            _ => {
317                let data = iter.next().ok_or_else(|| "No hash data")?;
318                utils::decode_hex(data).map_err(|_| "Failed to decode hash data")?
319            }
320        };
321        
322        let mut hash = GeneralHash::from_type(hash_type);
323        match &mut hash {
324            #[cfg(feature = "hash-sha2")]
325            GeneralHash::SHA512(target_data) => {
326                if data.len() != 64 {
327                    return Err("Invalid data length");
328                }
329                target_data.copy_from_slice(&data);
330            },
331            #[cfg(feature = "hash-sha2")]
332            GeneralHash::SHA256(target_data) => {
333                if data.len() != 32 {
334                    return Err("Invalid data length");
335                }
336                target_data.copy_from_slice(&data);
337            },
338            #[cfg(feature = "hash-sha1")]
339            GeneralHash::SHA1(target_data) => {
340                if data.len() != 20 {
341                    return Err("Invalid data length");
342                }
343                target_data.copy_from_slice(&data);
344            },
345            #[cfg(feature = "hash-xxh")]
346            GeneralHash::XXH64(target_data) => {
347                if data.len() != 8 {
348                    return Err("Invalid data length");
349                }
350                target_data.copy_from_slice(&data);
351            },
352            #[cfg(feature = "hash-xxh")]
353            GeneralHash::XXH32(target_data) => {
354                if data.len() != 4 {
355                    return Err("Invalid data length");
356                }
357                target_data.copy_from_slice(&data);
358            },
359            GeneralHash::NULL => {}
360        }
361        Ok(hash)
362    }
363}
364
365impl<'de> Deserialize<'de> for GeneralHash {
366    /// Deserializes a `GeneralHash` from a string.
367    /// 
368    /// # Arguments
369    /// * `deserializer` - The deserializer to use.
370    /// 
371    /// # Returns
372    /// The deserialized `GeneralHash`.
373    /// 
374    /// # Errors
375    /// If the string could not be deserialized.
376    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
377        where
378            D: serde::Deserializer<'de> {
379        let hex = String::deserialize(deserializer)?;
380        GeneralHash::from_str(hex.as_str()).map_err(D::Error::custom)
381    }
382}
383
384impl GeneralHash {
385    /// Returns the hash value as a byte array.
386    ///
387    /// # Returns
388    /// A reference to the byte array that represents the hash value.
389    pub fn as_bytes(&self) -> &[u8] {
390        match self {
391            #[cfg(feature = "hash-sha2")]
392            GeneralHash::SHA512(data) => data,
393            #[cfg(feature = "hash-sha2")]
394            GeneralHash::SHA256(data) => data,
395            #[cfg(feature = "hash-sha1")]
396            GeneralHash::SHA1(data) => data,
397            #[cfg(feature = "hash-xxh")]
398            GeneralHash::XXH64(data) => data,
399            #[cfg(feature = "hash-xxh")]
400            GeneralHash::XXH32(data) => data,
401            GeneralHash::NULL => &[0; 0],
402        }
403    }
404
405    #[cfg(feature = "hash-sha2")]
406    /// Returns a new instance of a SHA512 hash value.
407    pub fn new_sha512() -> Self { Self::from_type(GeneralHashType::SHA512) }
408    
409    #[cfg(feature = "hash-sha2")]
410    /// Returns a new instance of a SHA256 hash value.
411    pub fn new_sha256() -> Self { Self::from_type(GeneralHashType::SHA256) }
412    
413    #[cfg(feature = "hash-sha1")]
414    /// Returns a new instance of a SHA1 hash value.
415    pub fn new_sha1() -> Self { Self::from_type(GeneralHashType::SHA1) }
416    
417    #[cfg(feature = "hash-xxh")]
418    /// Returns a new instance of a XXH64 hash value.
419    pub fn new_xxh64() -> Self { Self::from_type(GeneralHashType::XXH64) }
420    
421    #[cfg(feature = "hash-xxh")]
422    /// Returns a new instance of a XXH32 hash value.
423    pub fn new_xxh32() -> Self { Self::from_type(GeneralHashType::XXH32) }
424
425    /// Returns the type of the hash function used.
426    ///
427    /// # Returns
428    /// The type of the hash function used.
429    ///
430    /// # Examples
431    /// ```
432    /// use backup_deduplicator::hash::{GeneralHash, GeneralHashType};
433    ///
434    /// #[cfg(feature = "hash-sha2")]
435    /// {
436    ///    let hash = GeneralHash::new_sha256();
437    // 
438    //     let m = match hash.hash_type() {
439    //         GeneralHashType::SHA256 => true,
440    //         _ => false,
441    //     };
442    // 
443    //     assert!(m);
444    /// }
445    /// ```
446    pub fn hash_type(&self) -> GeneralHashType {
447        match self {
448            #[cfg(feature = "hash-sha2")]
449            GeneralHash::SHA512(_) => GeneralHashType::SHA512,
450            #[cfg(feature = "hash-sha2")]
451            GeneralHash::SHA256(_) => GeneralHashType::SHA256,
452            #[cfg(feature = "hash-sha1")]
453            GeneralHash::SHA1(_) => GeneralHashType::SHA1,
454            #[cfg(feature = "hash-xxh")]
455            GeneralHash::XXH64(_) => GeneralHashType::XXH64,
456            #[cfg(feature = "hash-xxh")]
457            GeneralHash::XXH32(_) => GeneralHashType::XXH32,
458            GeneralHash::NULL => GeneralHashType::NULL,
459        }
460    }
461
462    /// Returns a new instance of a `GeneralHash` with the specified hash type.
463    ///
464    /// # Arguments
465    /// * `hash_type` - The type of the hash function to use.
466    ///
467    /// # Returns
468    /// A new instance of a `GeneralHash` with the specified hash type.
469    pub fn from_type(hash_type: GeneralHashType) -> Self {
470        match hash_type {
471            #[cfg(feature = "hash-sha2")]
472            GeneralHashType::SHA512 => GeneralHash::SHA512([0; 64]),
473            #[cfg(feature = "hash-sha2")]
474            GeneralHashType::SHA256 => GeneralHash::SHA256([0; 32]),
475            #[cfg(feature = "hash-sha1")]
476            GeneralHashType::SHA1 => GeneralHash::SHA1([0; 20]),
477            #[cfg(feature = "hash-xxh")]
478            GeneralHashType::XXH64 => GeneralHash::XXH64([0; 8]),
479            #[cfg(feature = "hash-xxh")]
480            GeneralHashType::XXH32 => GeneralHash::XXH32([0; 4]),
481            GeneralHashType::NULL => GeneralHash::NULL,
482        }
483    }
484
485    /// Returns a new instance of a `GeneralHash` with the specified hash type.
486    ///
487    /// # Arguments
488    /// * `hash_type` - The type of the hash function to use.
489    ///
490    /// # Returns
491    /// A new instance of a `GeneralHash` with the specified hash type.
492    ///
493    /// # See also
494    /// * [GeneralHashType] - representation of the different types of hash functions.
495    pub fn hasher(&self) -> Box<dyn GeneralHasher> {
496        self.hash_type().hasher()
497    }
498
499    /// Computes the hash value of the specified data.
500    ///
501    /// # Arguments
502    /// * `reader` - The data to hash (supplied as `std::io::Read`).
503    ///
504    /// # Returns
505    /// The size of the data that was hashed.
506    ///
507    /// # Errors
508    /// Returns an error if the data could not be read.
509    pub fn hash_file<T>(&mut self, mut reader: T) -> anyhow::Result<u64>
510        where T: std::io::Read {
511
512        let mut hasher = self.hasher();
513        let mut buffer = [0; 4096];
514        let mut content_size = 0;
515
516        loop {
517            let bytes_read = reader.read(&mut buffer)?;
518            content_size += bytes_read as u64;
519            if bytes_read == 0 {
520                break;
521            }
522            hasher.update(&buffer[..bytes_read]);
523        }
524
525        *self = hasher.finalize();
526
527        Ok(content_size)
528    }
529
530    /// Computes the hash value of file iterator/directory.
531    ///
532    /// # Arguments
533    /// * `children` - The iterator of files to hash.
534    ///
535    /// # Returns
536    /// The count of files that were hashed.
537    ///
538    /// # Errors
539    /// Does not return an error. Might return an error in the future.
540    pub fn hash_directory<'a>(&mut self, children: impl Iterator<Item = &'a BuildFile>) -> anyhow::Result<u64> {
541        let mut hasher = self.hasher();
542
543        let mut content_size = 0;
544
545        for child in children {
546            content_size += 1;
547            hasher.update(child.get_content_hash().as_bytes());
548        }
549
550        *self = hasher.finalize();
551
552        Ok(content_size)
553    }
554
555    /// Computes the hash value of the specified path.
556    ///
557    /// # Arguments
558    /// * `path` - The path to hash.
559    ///
560    /// # Returns
561    /// Does not return a value.
562    ///
563    /// # Errors
564    /// Does not return an error. Might return an error in the future.
565    pub fn hash_path(&mut self, path: &Path) -> anyhow::Result<()> {
566        let mut hasher = self.hasher();
567
568        hasher.update(path.as_os_str().as_encoded_bytes());
569
570        *self = hasher.finalize();
571
572        Ok(())
573    }
574
575    /// Computes the hash value of the specified file path.
576    ///
577    /// # Arguments
578    /// * `path` - The file path to hash.
579    ///
580    /// # Returns
581    /// Does not return a value.
582    ///
583    /// # Errors
584    /// Does not return an error. Might return an error in the future.
585    pub fn hash_filepath(&mut self, path: &FilePath) -> anyhow::Result<()> {
586        let mut hasher = self.hasher();
587
588        for component in &path.path {
589            hasher.update(component.path.as_os_str().as_encoded_bytes());
590        }
591
592        *self = hasher.finalize();
593
594        Ok(())
595    }
596}
597
598/// `GeneralHasher` is a trait for computing hash values.
599///
600/// # Methods
601/// * `new` - creates a new instance of a `GeneralHasher`.
602/// * `update` - updates the hash value with the specified data.
603/// * `finalize` - finalizes the hash value and returns the result.
604///
605/// # Examples
606/// See the example in the `GeneralHash` documentation.
607///
608/// # See also
609/// * [GeneralHash] - representation of a hash value.
610/// * [GeneralHashType] - representation of the different types of hash functions.
611pub trait GeneralHasher {
612    /// Creates a new instance of a `GeneralHasher`.
613    ///
614    /// # Returns
615    /// A new instance of a `GeneralHasher`.
616    fn new() -> Self where Self: Sized;
617
618    /// Updates the hash value with the specified data.
619    ///
620    /// # Arguments
621    /// * `data` - The data to hash.
622    fn update(&mut self, data: &[u8]);
623
624    /// Finalizes the hash value and returns the result.
625    /// Consumes the `GeneralHasher` instance.
626    ///
627    /// # Returns
628    /// The hash value.
629    fn finalize(self: Box<Self>) -> GeneralHash;
630}
631
632#[cfg(feature = "hash-sha1")]
633/// `GeneralHasher` implementation for the SHA1 crate
634mod sha1;
635#[cfg(feature = "hash-sha2")]
636/// `GeneralHasher` implementation for the SHA2 crate
637mod sha2;
638#[cfg(feature = "hash-xxh")]
639/// `GeneralHasher` implementation for the XXH crate
640mod xxh;
641/// `GeneralHasher` implementation for the NULL hash function
642mod null;