Skip to main content

krait/index/
hasher.rs

1use std::path::{Path, PathBuf};
2
3use anyhow::Context;
4
5/// Hash a file's contents with BLAKE3, returning hex-encoded digest.
6///
7/// # Errors
8/// Returns an error if the file can't be read.
9pub fn hash_file(path: &Path) -> anyhow::Result<String> {
10    let data =
11        std::fs::read(path).with_context(|| format!("failed to read: {}", path.display()))?;
12    Ok(blake3::hash(&data).to_hex().to_string())
13}
14
15/// Hash multiple files in parallel using rayon.
16///
17/// Returns a vec of (path, hash) pairs. Files that can't be read are skipped.
18#[must_use]
19pub fn hash_files_parallel(paths: &[PathBuf]) -> Vec<(PathBuf, String)> {
20    use rayon::prelude::*;
21    paths
22        .par_iter()
23        .filter_map(|p| hash_file(p).ok().map(|h| (p.clone(), h)))
24        .collect()
25}
26
27#[cfg(test)]
28mod tests {
29    use super::*;
30
31    #[test]
32    fn hash_deterministic() {
33        let dir = tempfile::tempdir().unwrap();
34        let file = dir.path().join("test.txt");
35        std::fs::write(&file, "hello world").unwrap();
36
37        let h1 = hash_file(&file).unwrap();
38        let h2 = hash_file(&file).unwrap();
39        assert_eq!(h1, h2);
40        assert!(!h1.is_empty());
41    }
42
43    #[test]
44    fn hash_changes_on_modify() {
45        let dir = tempfile::tempdir().unwrap();
46        let file = dir.path().join("test.txt");
47
48        std::fs::write(&file, "version 1").unwrap();
49        let h1 = hash_file(&file).unwrap();
50
51        std::fs::write(&file, "version 2").unwrap();
52        let h2 = hash_file(&file).unwrap();
53
54        assert_ne!(h1, h2);
55    }
56
57    #[test]
58    fn hash_parallel_matches_sequential() {
59        let dir = tempfile::tempdir().unwrap();
60        let files: Vec<PathBuf> = (0..5)
61            .map(|i| {
62                let p = dir.path().join(format!("file{i}.txt"));
63                std::fs::write(&p, format!("content {i}")).unwrap();
64                p
65            })
66            .collect();
67
68        let parallel = hash_files_parallel(&files);
69        let sequential: Vec<(PathBuf, String)> = files
70            .iter()
71            .map(|p| (p.clone(), hash_file(p).unwrap()))
72            .collect();
73
74        assert_eq!(parallel.len(), sequential.len());
75        for (p, s) in parallel.iter().zip(sequential.iter()) {
76            assert_eq!(p.0, s.0);
77            assert_eq!(p.1, s.1);
78        }
79    }
80
81    #[test]
82    fn hash_missing_file_returns_error() {
83        let result = hash_file(Path::new("/nonexistent/file.txt"));
84        assert!(result.is_err());
85    }
86
87    #[test]
88    fn hash_empty_file() {
89        let dir = tempfile::tempdir().unwrap();
90        let file = dir.path().join("empty.txt");
91        std::fs::write(&file, "").unwrap();
92
93        let hash = hash_file(&file).unwrap();
94        assert!(!hash.is_empty());
95    }
96}