Skip to main content

bock_build/
content_hash.rs

1//! Content hashing for change detection.
2//!
3//! Provides SHA-256 content hashing of source files to detect changes between
4//! builds. Only modules whose content hash differs from the cached hash need
5//! to be recompiled.
6
7use sha2::{Digest, Sha256};
8use std::collections::HashMap;
9use std::fmt;
10use std::fs;
11use std::io;
12use std::path::Path;
13
14/// A hex-encoded SHA-256 content hash.
15#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
16pub struct ContentHash(pub String);
17
18impl fmt::Display for ContentHash {
19    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
20        f.write_str(&self.0)
21    }
22}
23
24impl ContentHash {
25    /// Computes the SHA-256 hash of the given content bytes.
26    #[must_use]
27    pub fn of_bytes(content: &[u8]) -> Self {
28        let mut hasher = Sha256::new();
29        hasher.update(content);
30        let result = hasher.finalize();
31        Self(hex_encode(&result))
32    }
33
34    /// Computes the SHA-256 hash of the given string content.
35    #[must_use]
36    pub fn of_str(content: &str) -> Self {
37        Self::of_bytes(content.as_bytes())
38    }
39
40    /// Computes the SHA-256 hash of a file's contents.
41    ///
42    /// # Errors
43    ///
44    /// Returns an IO error if the file cannot be read.
45    pub fn of_file(path: &Path) -> io::Result<Self> {
46        let content = fs::read(path)?;
47        Ok(Self::of_bytes(&content))
48    }
49}
50
51/// A map from module identifiers to their content hashes.
52///
53/// Used to compare current file state against a cached build state
54/// to determine which modules have changed.
55#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
56pub struct HashManifest {
57    /// Map from module ID to content hash.
58    pub hashes: HashMap<String, ContentHash>,
59}
60
61impl HashManifest {
62    /// Creates a new empty hash manifest.
63    #[must_use]
64    pub fn new() -> Self {
65        Self::default()
66    }
67
68    /// Inserts or updates a module's content hash.
69    pub fn insert(&mut self, module_id: String, hash: ContentHash) {
70        self.hashes.insert(module_id, hash);
71    }
72
73    /// Returns the stored hash for a module, if any.
74    #[must_use]
75    pub fn get(&self, module_id: &str) -> Option<&ContentHash> {
76        self.hashes.get(module_id)
77    }
78
79    /// Computes which modules have changed between this manifest (old) and another (new).
80    ///
81    /// Returns the set of module IDs that are new, removed, or have different hashes.
82    #[must_use]
83    pub fn changed_modules(&self, current: &HashManifest) -> Vec<String> {
84        let mut changed = Vec::new();
85
86        // Modules that are new or have changed content
87        for (module_id, new_hash) in &current.hashes {
88            match self.hashes.get(module_id) {
89                Some(old_hash) if old_hash == new_hash => {}
90                _ => changed.push(module_id.clone()),
91            }
92        }
93
94        // Modules that were removed
95        for module_id in self.hashes.keys() {
96            if !current.hashes.contains_key(module_id) {
97                changed.push(module_id.clone());
98            }
99        }
100
101        changed
102    }
103
104    /// Returns the number of entries in the manifest.
105    #[must_use]
106    pub fn len(&self) -> usize {
107        self.hashes.len()
108    }
109
110    /// Returns true if the manifest is empty.
111    #[must_use]
112    pub fn is_empty(&self) -> bool {
113        self.hashes.is_empty()
114    }
115}
116
117/// Hex-encode a byte slice.
118fn hex_encode(bytes: &[u8]) -> String {
119    bytes.iter().map(|b| format!("{b:02x}")).collect()
120}
121
122#[cfg(test)]
123mod tests {
124    use super::*;
125
126    #[test]
127    fn hash_deterministic() {
128        let h1 = ContentHash::of_str("hello world");
129        let h2 = ContentHash::of_str("hello world");
130        assert_eq!(h1, h2);
131    }
132
133    #[test]
134    fn hash_differs_for_different_content() {
135        let h1 = ContentHash::of_str("hello");
136        let h2 = ContentHash::of_str("world");
137        assert_ne!(h1, h2);
138    }
139
140    #[test]
141    fn hash_is_hex_encoded_sha256() {
142        let h = ContentHash::of_str("");
143        // SHA-256 of empty string is well-known
144        assert_eq!(
145            h.0,
146            "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
147        );
148        assert_eq!(h.0.len(), 64);
149    }
150
151    #[test]
152    fn hash_manifest_changed_modules() {
153        let mut old = HashManifest::new();
154        old.insert("A".to_string(), ContentHash::of_str("v1"));
155        old.insert("B".to_string(), ContentHash::of_str("v1"));
156        old.insert("C".to_string(), ContentHash::of_str("v1"));
157
158        let mut new = HashManifest::new();
159        new.insert("A".to_string(), ContentHash::of_str("v1")); // unchanged
160        new.insert("B".to_string(), ContentHash::of_str("v2")); // changed
161        new.insert("D".to_string(), ContentHash::of_str("v1")); // added
162
163        let mut changed = old.changed_modules(&new);
164        changed.sort();
165        assert_eq!(changed, vec!["B", "C", "D"]);
166    }
167
168    #[test]
169    fn hash_manifest_empty() {
170        let old = HashManifest::new();
171        let new = HashManifest::new();
172        assert!(old.changed_modules(&new).is_empty());
173    }
174
175    #[test]
176    fn hash_manifest_all_new() {
177        let old = HashManifest::new();
178        let mut new = HashManifest::new();
179        new.insert("A".to_string(), ContentHash::of_str("v1"));
180        new.insert("B".to_string(), ContentHash::of_str("v1"));
181
182        let mut changed = old.changed_modules(&new);
183        changed.sort();
184        assert_eq!(changed, vec!["A", "B"]);
185    }
186
187    #[test]
188    fn hash_of_file() {
189        let dir = tempfile::tempdir().unwrap();
190        let path = dir.path().join("test.bock");
191        fs::write(&path, "fn main() {}").unwrap();
192
193        let h1 = ContentHash::of_file(&path).unwrap();
194        let h2 = ContentHash::of_str("fn main() {}");
195        assert_eq!(h1, h2);
196    }
197}