Skip to main content

vanta_store/
hash.rs

1//! Canonical content hashing for the store (`docs/09-store.md`).
2//!
3//! Store keys are `blake3-<hex>` over the *canonicalized* materialized tree:
4//! entries are visited in sorted order, the executable bit is normalized,
5//! timestamps are excluded, and symlinks are hashed by target. This makes the
6//! same content hash identically across machines and filesystems.
7
8use blake3::Hasher;
9use std::fs;
10use std::path::{Path, PathBuf};
11use vanta_core::{Area, VtaError, VtaResult};
12
13/// Hash a byte slice, returning a `blake3-<hex>` key.
14pub fn hash_bytes(bytes: &[u8]) -> String {
15    format!("blake3-{}", blake3::hash(bytes).to_hex())
16}
17
18/// Hash a directory tree canonically, returning a `blake3-<hex>` store key.
19pub fn hash_tree(root: &Path) -> VtaResult<String> {
20    let mut files = Vec::new();
21    collect(root, root, &mut files)?;
22    files.sort();
23
24    let mut hasher = Hasher::new();
25    for rel in &files {
26        hasher.update(rel.to_string_lossy().as_bytes());
27        hasher.update(&[0]);
28        let full = root.join(rel);
29        let meta = fs::symlink_metadata(&full).map_err(|e| io_err(&full, e))?;
30        if meta.file_type().is_symlink() {
31            let target = fs::read_link(&full).map_err(|e| io_err(&full, e))?;
32            hasher.update(b"L");
33            hasher.update(target.to_string_lossy().as_bytes());
34        } else {
35            hasher.update(if is_executable(&meta) { b"X" } else { b"F" });
36            let contents = fs::read(&full).map_err(|e| io_err(&full, e))?;
37            hasher.update(&(contents.len() as u64).to_le_bytes());
38            hasher.update(&contents);
39        }
40        hasher.update(&[0]);
41    }
42    Ok(format!("blake3-{}", hasher.finalize().to_hex()))
43}
44
45/// Recursively collect file paths (relative to `root`) under `dir`.
46fn collect(root: &Path, dir: &Path, out: &mut Vec<PathBuf>) -> VtaResult<()> {
47    for entry in fs::read_dir(dir).map_err(|e| io_err(dir, e))? {
48        let entry = entry.map_err(|e| io_err(dir, e))?;
49        let path = entry.path();
50        let ty = entry.file_type().map_err(|e| io_err(&path, e))?;
51        if ty.is_dir() {
52            collect(root, &path, out)?;
53        } else {
54            // Files and symlinks are recorded; empty directories are not part of
55            // content identity (they carry no bytes).
56            let rel = path.strip_prefix(root).unwrap_or(&path).to_path_buf();
57            out.push(rel);
58        }
59    }
60    Ok(())
61}
62
63#[cfg(unix)]
64fn is_executable(meta: &fs::Metadata) -> bool {
65    use std::os::unix::fs::PermissionsExt;
66    meta.permissions().mode() & 0o111 != 0
67}
68
69#[cfg(not(unix))]
70fn is_executable(_meta: &fs::Metadata) -> bool {
71    // On Windows, executability is by extension, not a mode bit. Normalize to
72    // false so a given artifact hashes consistently on that platform.
73    false
74}
75
76fn io_err(path: &Path, e: std::io::Error) -> VtaError {
77    VtaError::new(Area::Store, 2, format!("hashing {}: {e}", path.display()))
78}
79
80#[cfg(test)]
81mod tests {
82    use super::*;
83
84    fn tmp(tag: &str) -> PathBuf {
85        let p = std::env::temp_dir().join(format!("vanta-hash-{}-{}", tag, std::process::id()));
86        let _ = fs::remove_dir_all(&p);
87        fs::create_dir_all(&p).unwrap();
88        p
89    }
90
91    #[test]
92    fn bytes_have_prefix() {
93        assert!(hash_bytes(b"hello").starts_with("blake3-"));
94        assert_ne!(hash_bytes(b"a"), hash_bytes(b"b"));
95    }
96
97    #[test]
98    fn tree_is_deterministic_and_content_sensitive() {
99        let d = tmp("tree");
100        fs::create_dir_all(d.join("sub")).unwrap();
101        fs::write(d.join("a.txt"), b"alpha").unwrap();
102        fs::write(d.join("sub/b.txt"), b"beta").unwrap();
103        let h1 = hash_tree(&d).unwrap();
104        let h2 = hash_tree(&d).unwrap();
105        assert_eq!(h1, h2); // deterministic
106        fs::write(d.join("a.txt"), b"ALPHA").unwrap();
107        assert_ne!(hash_tree(&d).unwrap(), h1); // content-sensitive
108        let _ = fs::remove_dir_all(&d);
109    }
110}