Skip to main content

libhasher/
lib.rs

1//! # libhasher
2//! A simple library for hashing files and text with a variety of algorithms, including non-cryptographic ones.
3//! It also supports progress bars for large files and the ability to use Blake3's `mmap` feature for even faster hashing of large files.
4
5use anyhow::{anyhow, Result};
6use digest::{Digest, DynDigest};
7use indicatif::{ProgressBar, ProgressStyle};
8use noncrypto_digests::{Fnv, Xxh32, Xxh3_128, Xxh3_64, Xxh64};
9use std::{fs, io::Read, mem, path::Path};
10
11#[derive(Debug)]
12/// The result of hashing a file
13pub struct HashResult {
14    /// The file that was hashed
15    pub filename: String,
16    /// The resulting hash
17    pub hash: String,
18}
19
20fn get_progress_bar(progress: bool, len: u64, path: &Path, min_len: Option<u64>) -> ProgressBar {
21    // Set a minimum size of 256MB
22    let min_len = min_len.unwrap_or(256 * 1024 * 1024_u64);
23    if progress && len >= min_len {
24        let pb = ProgressBar::new(len);
25        pb.set_message(path.display().to_string());
26        pb.set_style(ProgressStyle::with_template("{spinner:.blue} {msg} [{wide_bar:.cyan/blue}] {bytes}/{total_bytes} ({bytes_per_sec}, {eta})")
27                .unwrap()
28                .progress_chars("█▉▊▋▌▍▎▏ "));
29        pb
30    } else {
31        ProgressBar::hidden()
32    }
33}
34
35// This code should never be reached by normal means, so no coverage is needed
36#[cfg(not(tarpaulin_include))]
37/// Trait to allow for dynamic dispatch of different hashers.
38/// This is necessary because the `Digest` trait does not support dynamic dispatch,
39/// and we want to be able to use different hash algorithms with the same interface.
40pub trait DynHasher: Send {
41    /// Update the hasher with the given data
42    fn update(&mut self, data: &[u8]);
43    /// Finalize the hasher and return the resulting hash as a byte vector
44    fn finalize(&mut self) -> Vec<u8>;
45
46    /// Only supported for blake3 with the `mmap` and `rayon` features enabled.
47    /// All other hashers return an error by default.
48    fn update_mmap_rayon(&mut self, _path: &std::path::Path) -> Result<(), anyhow::Error> {
49        Err(anyhow::anyhow!(
50            "update_mmap_rayon is only supported for blake3 \
51             with the 'mmap' and 'rayon' features enabled"
52        ))
53    }
54}
55
56struct DigestHasher(Box<dyn DynDigest + Send>);
57
58#[cfg(not(tarpaulin_include))]
59impl DynHasher for DigestHasher {
60    fn update(&mut self, data: &[u8]) {
61        self.0.update(data);
62    }
63    fn finalize(&mut self) -> Vec<u8> {
64        self.0.finalize_reset().into()
65    }
66}
67
68struct Blake3Hasher(blake3::Hasher);
69
70#[cfg(not(tarpaulin_include))]
71impl DynHasher for Blake3Hasher {
72    fn update(&mut self, data: &[u8]) {
73        self.0.update(data);
74    }
75    fn finalize(&mut self) -> Vec<u8> {
76        let digest = self.0.finalize();
77        self.0.reset();
78        digest.as_bytes().to_vec()
79    }
80
81    fn update_mmap_rayon(&mut self, path: &std::path::Path) -> Result<(), anyhow::Error> {
82        self.0.update_mmap_rayon(path)?;
83        Ok(())
84    }
85}
86
87struct NonCryptoHasher<H: Digest + Default + Send>(H);
88
89#[cfg(not(tarpaulin_include))]
90impl<H: Digest + Default + Send> DynHasher for NonCryptoHasher<H> {
91    fn update(&mut self, data: &[u8]) {
92        Digest::update(&mut self.0, data);
93    }
94    fn finalize(&mut self) -> Vec<u8> {
95        mem::take(&mut self.0).finalize().to_vec()
96    }
97}
98
99/// A dynamic Hasher struct to handle all supported algorithms
100pub struct Hasher {
101    /// The hasher object, only used internaly
102    hasher: Box<dyn DynHasher>,
103}
104
105impl Hasher {
106    /// Create a hasher for the given algorithm if it's supported
107    ///
108    /// Will raise an error on an unsupported algorithm
109    ///
110    /// # Examples
111    ///
112    /// ```
113    /// use libhasher::Hasher;
114    ///
115    /// let mut hasher = Hasher::new("blake3").unwrap();
116    /// ```
117    pub fn new(algo: &str) -> Result<Self> {
118        let hasher: Box<dyn DynHasher> = match algo {
119            "md5" => Box::new(DigestHasher(Box::new(md5::Md5::new()))),
120            "sha1" => Box::new(DigestHasher(Box::new(sha1::Sha1::new()))),
121            "sha256" => Box::new(DigestHasher(Box::new(sha2::Sha256::new()))),
122            "sha512" => Box::new(DigestHasher(Box::new(sha2::Sha512::new()))),
123            "sha3_256" => Box::new(DigestHasher(Box::new(sha3::Sha3_256::new()))),
124            "sha3_512" => Box::new(DigestHasher(Box::new(sha3::Sha3_512::new()))),
125            "blake2" => Box::new(DigestHasher(Box::new(blake2::Blake2b512::new()))),
126            "blake3" => Box::new(Blake3Hasher(blake3::Hasher::new())),
127            "fnv" => Box::new(NonCryptoHasher(Fnv::default())),
128            "xxh32" => Box::new(NonCryptoHasher(Xxh32::default())),
129            "xxh64" => Box::new(NonCryptoHasher(Xxh64::default())),
130            "xxh3_64" => Box::new(NonCryptoHasher(Xxh3_64::default())),
131            "xxh3_128" => Box::new(NonCryptoHasher(Xxh3_128::default())),
132            _ => return Err(anyhow!("Unsupported hash algorithm: {}", algo)),
133        };
134
135        Ok(Hasher { hasher })
136    }
137
138    /// A low-level way to directly update the internal hasher.
139    /// Only use if you know what you're doing!
140    ///
141    /// # Examples
142    ///
143    /// ```
144    /// use libhasher::Hasher;
145    ///
146    /// let mut hasher = Hasher::new("blake3").unwrap();
147    /// hasher.update("Hello, World".as_bytes());
148    /// ```
149    pub fn update(&mut self, data: &[u8]) {
150        self.hasher.update(data);
151    }
152
153    /// A low-level way to directly finalize the internal hasher.
154    /// Only use if you know what you're doing!
155    ///
156    /// # Examples
157    ///
158    /// ```
159    /// use libhasher::Hasher;
160    /// use hex;
161    ///
162    /// let mut hasher = Hasher::new("blake3").unwrap();
163    /// hasher.update("Hello, World".as_bytes());
164    /// let hash = hasher.finalize();
165    /// println!("{}", hex::encode(hash));
166    /// ```
167    pub fn finalize(&mut self) -> Vec<u8> {
168        self.hasher.finalize()
169    }
170
171    /// High-level way to hash text
172    ///
173    /// # Examples
174    ///
175    /// ```
176    /// use libhasher::Hasher;
177    ///
178    /// let mut hasher = Hasher::new("blake3").unwrap();
179    /// // This can also be an `&String`
180    /// let result = hasher.hash_text("Hello, World").unwrap();
181    ///
182    /// println!("{}", result);
183    /// ```
184    pub fn hash_text(&mut self, text: &str) -> Result<String> {
185        self.update(text.as_bytes());
186        Ok(hex::encode(self.finalize()))
187    }
188
189    /// An internal way to hash a file with Blake3's mmap and rayon features
190    ///
191    /// Not publicly exposed, but accessible if `mmap` is set to `true` on
192    /// `hash_file` or `hash_file_progressbar`
193    fn hash_file_mmap(&mut self, path: &Path) -> Result<HashResult> {
194        self.hasher.update_mmap_rayon(path)?;
195        let hash = self.finalize();
196        Ok(HashResult {
197            filename: path.display().to_string(),
198            hash: hex::encode(hash),
199        })
200    }
201
202    /// Internal hasher. Separating the hashing from the functions provides better maintainability
203    fn hash_reader(&mut self, reader: &mut impl Read, pb: &ProgressBar) -> Result<Vec<u8>> {
204        let mut buf = [0u8; 65536];
205        loop {
206            let n = reader.read(&mut buf)?;
207            if n == 0 {
208                break;
209            }
210            pb.inc(n as u64);
211            self.update(&buf[..n])
212        }
213        pb.finish_and_clear();
214        Ok(self.finalize())
215    }
216
217    /// Hash a file with an exposed progress bar. Useful for large files
218    ///
219    /// Hashes a `path`, optionally showing `progress`. Allows you to use
220    /// the Blake3 `mmap` feature as well.
221    ///
222    /// If the Hasher's algorithm does not support `mmap` (only blake3 supports `mmap`),
223    /// it will quietly fall back to not using it
224    ///
225    /// If `min_len` is specified, a progress bar will not display unless the file
226    /// is larger than `min_len` bytes, default 256MB
227    ///
228    /// # Examples
229    ///
230    /// ```
231    /// use libhasher::Hasher;
232    /// use std::path::PathBuf;
233    ///
234    /// // We'll use SHA256 this time
235    /// let mut hasher = Hasher::new("sha256").unwrap();
236    /// let result = hasher.hash_file_progressbar(&PathBuf::from("Cargo.toml"), true, true, None).unwrap();
237    ///
238    /// println!("{}", result.hash);
239    /// ```
240    pub fn hash_file_progressbar(
241        &mut self,
242        path: &Path,
243        progress: bool,
244        mmap: bool,
245        min_len: Option<u64>,
246    ) -> Result<HashResult> {
247        if mmap {
248            if let Ok(result) = self.hash_file_mmap(path) {
249                return Ok(result);
250            }
251        }
252
253        let mut file = fs::File::open(path)?;
254        let pb = get_progress_bar(progress, file.metadata()?.len(), path, min_len);
255        let hash = self.hash_reader(&mut file, &pb)?;
256
257        Ok(HashResult {
258            filename: path.display().to_string(),
259            hash: hex::encode(hash),
260        })
261    }
262
263    /// Hash a file with an exposed progress bar. Useful for large files
264    ///
265    /// Hashes a `path`. Allows you to use the Blake3 `mmap` feature as well.
266    ///
267    /// If the Hasher's algorithm does not support `mmap` (only blake3 supports `mmap`),
268    /// it will quietly fall back to not using it
269    ///
270    /// # Examples
271    ///
272    /// ```
273    /// use libhasher::Hasher;
274    /// use std::path::PathBuf;
275    ///
276    /// // We'll use SHA256 this time
277    /// let mut hasher = Hasher::new("sha256").unwrap();
278    /// let result = hasher.hash_file(&PathBuf::from("Cargo.toml"), true).unwrap();
279    ///
280    /// println!("{}", result.hash);
281    /// ```
282    pub fn hash_file(&mut self, path: &Path, mmap: bool) -> Result<HashResult> {
283        if mmap {
284            if let Ok(result) = self.hash_file_mmap(path) {
285                return Ok(result);
286            }
287        }
288
289        let mut file = fs::File::open(path)?;
290        let hash = self.hash_reader(&mut file, &ProgressBar::hidden())?;
291
292        Ok(HashResult {
293            filename: path.display().to_string(),
294            hash: hex::encode(hash),
295        })
296    }
297}
298
299#[cfg(test)]
300mod tests {
301    use super::*;
302    use std::{env, path::PathBuf};
303
304    // We are only checking algorithms located in this file
305    static TEST_CASES: &[(&str, &str)] = &[
306        ("blake3", "68569ddf344009b938e1db0ec39b151b1626cfe46a87c3910dc18936a233f92b"),
307        ("md5", "0cbc6611f5540bd0809a388dc95a615b"),
308        ("sha1", "640ab2bae07bedc4c163f679a746f7ab7fb5d1fa"),
309        ("sha256", "532eaabd9574880dbf76b9b8cc00832c20a6ec113d682299550d7a6e0f345e25"),
310        ("sha512", "c6ee9e33cf5c6715a1d148fd73f7318884b41adcb916021e2bc0e800a5c5dd97f5142178f6ae88c8fdd98e1afb0ce4c8d2c54b5f37b30b7da1997bb33b0b8a31"),
311        ("sha3_256", "c0a5cca43b8aa79eb50e3464bc839dd6fd414fae0ddf928ca23dcebf8a8b8dd0"),
312        ("sha3_512", "301bb421c971fbb7ed01dcc3a9976ce53df034022ba982b97d0f27d48c4f03883aabf7c6bc778aa7c383062f6823045a6d41b8a720afbb8a9607690f89fbe1a7"),
313        ("blake2", "3d896914f86ae22c48b06140adb4492fa3f8e2686a83cec0c8b1dcd6903168751370078bbd6bbfe02a6ab1df12a19b5991b58e65e243ec279f6a5770b2dd0e31"),
314        ("xxh3_128", "391c8305c491690bc2da658a2d6348d5"),
315        ("xxh3_64", "b3f5bb77a55fad5e"),
316        ("xxh64", "da83efc38a8922b4"),
317        ("xxh32", "eac53571"),
318        ("fnv","2474e7fb1aec9f05"),
319    ];
320
321    fn get_test_file(name: &str) -> PathBuf {
322        let base = env::var("CARGO_MANIFEST_DIR").unwrap();
323        PathBuf::from(base).join("tests").join(name)
324    }
325
326    #[test]
327    fn test_hash_file() {
328        let file = get_test_file("test.txt");
329        for (algorithm, expected) in TEST_CASES {
330            let mut hasher = Hasher::new(&algorithm).unwrap();
331            let result = hasher.hash_file(&file, false).unwrap();
332            assert_eq!(
333                result.hash, *expected,
334                "Hash mishmatch for algorithm: {algorithm}"
335            );
336        }
337    }
338
339    #[test]
340    fn test_hash_file_mmap() {
341        let file = get_test_file("test.txt");
342        let (algorithm, expected) = TEST_CASES[0];
343        let mut hasher = Hasher::new(algorithm).unwrap();
344        let result = hasher.hash_file(&file, true).unwrap();
345        assert_eq!(result.hash, *expected, "Hashing with mmap failed");
346
347        let result = hasher
348            .hash_file_progressbar(&file, true, true, Some(1))
349            .unwrap();
350        assert_eq!(
351            result.hash, *expected,
352            "Hashing with mmap and progress bar failed"
353        );
354    }
355
356    #[test]
357    fn test_hash_file_progressbar() {
358        let file = get_test_file("test.txt");
359        let (algorithm, expected) = TEST_CASES[0];
360        let mut hasher = Hasher::new(algorithm).unwrap();
361        let result = hasher
362            .hash_file_progressbar(&file, true, false, Some(1))
363            .unwrap();
364        assert_eq!(result.hash, *expected, "Hashing with progress bar failed");
365        let result = hasher
366            .hash_file_progressbar(&file, false, false, Some(1))
367            .unwrap();
368        assert_eq!(
369            result.hash, *expected,
370            "Hashing without progress bar failed"
371        );
372    }
373
374    #[test]
375    fn test_hash_text() {
376        let (algorithm, expected) = TEST_CASES[0];
377        let mut hasher = Hasher::new(algorithm).unwrap();
378        let result = hasher.hash_text("Test").unwrap();
379        assert_eq!(result, *expected, "Hashing text failed");
380    }
381
382    #[test]
383    fn test_unsupported_algorithm() {
384        let result = Hasher::new("md1");
385        assert!(result.is_err());
386    }
387
388    #[test]
389    fn test_mmap_unsupported_algorithm() {
390        let file = get_test_file("test.txt");
391        let mut hasher = Hasher::new("md5").unwrap();
392        let result = hasher.hash_file(&file, true);
393        assert!(
394            result.is_ok(),
395            "Unsupported algorithm should fall back to non-mmap hashing"
396        );
397    }
398
399    #[test]
400    fn test_large_file() {
401        let file = get_test_file("test.large");
402        let algorithm = "sha3_512";
403        let mut hasher = Hasher::new(algorithm).unwrap();
404        let result = hasher.hash_file(&file, false);
405        assert!(result.is_ok(), "Hashing large file failed");
406    }
407}