Skip to main content

weave_content/
build_cache.rs

1use std::collections::{HashMap, HashSet};
2use std::path::{Path, PathBuf};
3
4use serde::{Deserialize, Serialize};
5use sha2::{Digest, Sha256};
6
7/// Maximum build cache file size: 50 MB.
8const MAX_CACHE_SIZE_BYTES: u64 = 50 * 1024 * 1024;
9
10/// Maximum entries in the build cache.
11const MAX_CACHE_ENTRIES: usize = 50_000;
12
13/// Cache file name (gitignored, lives in content root).
14pub const CACHE_FILENAME: &str = ".build-cache.json";
15
16/// A single file entry in the build cache.
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct BuildCacheEntry {
19    /// SHA-256 hex digest of the file contents.
20    pub hash: String,
21    /// Paths of entity files this case depends on (empty for entity files).
22    #[serde(default, skip_serializing_if = "Vec::is_empty")]
23    pub deps: Vec<String>,
24}
25
26/// Incremental build cache backed by a JSON file.
27#[derive(Debug)]
28pub struct BuildCache {
29    path: PathBuf,
30    entries: HashMap<String, BuildCacheEntry>,
31}
32
33impl BuildCache {
34    /// Create an empty in-memory cache (no file backing).
35    pub fn empty() -> Self {
36        Self {
37            path: PathBuf::new(),
38            entries: HashMap::new(),
39        }
40    }
41
42    /// Load cache from file, or create empty if file doesn't exist or is invalid.
43    ///
44    /// # Errors
45    ///
46    /// Returns an error if the file exists but cannot be read.
47    pub fn load(content_root: &Path) -> Result<Self, String> {
48        let path = content_root.join(CACHE_FILENAME);
49        let entries = if path.exists() {
50            // Check file size before reading
51            let meta =
52                std::fs::metadata(&path).map_err(|e| format!("failed to stat cache file: {e}"))?;
53            if meta.len() > MAX_CACHE_SIZE_BYTES {
54                eprintln!("build cache exceeds {MAX_CACHE_SIZE_BYTES} bytes, starting fresh");
55                HashMap::new()
56            } else {
57                let content = std::fs::read_to_string(&path)
58                    .map_err(|e| format!("failed to read cache file: {e}"))?;
59                serde_json::from_str(&content).unwrap_or_default()
60            }
61        } else {
62            HashMap::new()
63        };
64
65        Ok(Self { path, entries })
66    }
67
68    /// Check if a file is unchanged since last build.
69    /// Returns `true` if the file hash matches the cache and all dependencies
70    /// are also unchanged.
71    pub fn is_unchanged(&self, path: &str, current_hash: &str) -> bool {
72        if let Some(entry) = self.entries.get(path) {
73            if entry.hash != current_hash {
74                return false;
75            }
76            // Check dependencies too
77            for dep in &entry.deps {
78                if let Some(dep_entry) = self.entries.get(dep) {
79                    // Dep entry exists — hash was recorded. We need to compare
80                    // with the current file hash, but we only know if it changed
81                    // if the caller already hashed it. For simplicity, we check
82                    // that the dep entry still exists (was not pruned).
83                    let _ = dep_entry;
84                } else {
85                    // Dependency not in cache — must rebuild
86                    return false;
87                }
88            }
89            true
90        } else {
91            false
92        }
93    }
94
95    /// Check if a file and all its dependencies are unchanged, given a map
96    /// of current file hashes.
97    pub fn is_unchanged_with_hashes(
98        &self,
99        path: &str,
100        current_hash: &str,
101        current_hashes: &HashMap<String, String>,
102    ) -> bool {
103        let Some(entry) = self.entries.get(path) else {
104            return false;
105        };
106        if entry.hash != current_hash {
107            return false;
108        }
109        for dep in &entry.deps {
110            let Some(dep_entry) = self.entries.get(dep) else {
111                return false;
112            };
113            if let Some(current_dep_hash) = current_hashes.get(dep) {
114                if dep_entry.hash != *current_dep_hash {
115                    return false;
116                }
117            } else {
118                // Dep file no longer exists
119                return false;
120            }
121        }
122        true
123    }
124
125    /// Record a file's hash and dependencies in the cache.
126    pub fn put(&mut self, path: &str, hash: String, deps: Vec<String>) {
127        if self.entries.len() >= MAX_CACHE_ENTRIES && !self.entries.contains_key(path) {
128            return;
129        }
130        self.entries
131            .insert(path.to_string(), BuildCacheEntry { hash, deps });
132    }
133
134    /// Remove entries for files that no longer exist.
135    pub fn prune(&mut self, existing_files: &HashSet<String>) {
136        self.entries.retain(|k, _| existing_files.contains(k));
137    }
138
139    /// Save cache to file.
140    ///
141    /// # Errors
142    ///
143    /// Returns an error if the file cannot be written.
144    pub fn save(&self) -> Result<(), String> {
145        if self.path.as_os_str().is_empty() {
146            return Ok(());
147        }
148        let json = serde_json::to_string_pretty(&self.entries)
149            .map_err(|e| format!("failed to serialize build cache: {e}"))?;
150        std::fs::write(&self.path, json).map_err(|e| format!("failed to write build cache: {e}"))
151    }
152
153    /// Number of entries in the cache.
154    pub fn len(&self) -> usize {
155        self.entries.len()
156    }
157
158    /// Whether the cache is empty.
159    pub fn is_empty(&self) -> bool {
160        self.entries.is_empty()
161    }
162}
163
164/// Compute SHA-256 hex digest of file contents.
165pub fn hash_file(path: &Path) -> Result<String, String> {
166    let content =
167        std::fs::read(path).map_err(|e| format!("failed to read {}: {e}", path.display()))?;
168    Ok(hash_bytes(&content))
169}
170
171/// Compute SHA-256 hex digest of bytes.
172pub fn hash_bytes(data: &[u8]) -> String {
173    let mut hasher = Sha256::new();
174    hasher.update(data);
175    let result = hasher.finalize();
176    hex_encode(&result)
177}
178
179fn hex_encode(bytes: &[u8]) -> String {
180    let mut s = String::with_capacity(bytes.len() * 2);
181    for b in bytes {
182        use std::fmt::Write;
183        let _ = write!(s, "{b:02x}");
184    }
185    s
186}
187
188#[cfg(test)]
189mod tests {
190    use super::*;
191
192    #[test]
193    fn hash_bytes_deterministic() {
194        let h1 = hash_bytes(b"hello world");
195        let h2 = hash_bytes(b"hello world");
196        assert_eq!(h1, h2);
197        assert_eq!(h1.len(), 64); // SHA-256 = 32 bytes = 64 hex chars
198    }
199
200    #[test]
201    fn hash_bytes_different_input() {
202        let h1 = hash_bytes(b"hello");
203        let h2 = hash_bytes(b"world");
204        assert_ne!(h1, h2);
205    }
206
207    #[test]
208    fn cache_put_and_check() {
209        let mut cache = BuildCache::empty();
210        cache.put("cases/test.md", "abc123".to_string(), vec![]);
211        assert!(cache.is_unchanged("cases/test.md", "abc123"));
212        assert!(!cache.is_unchanged("cases/test.md", "different"));
213    }
214
215    #[test]
216    fn cache_missing_entry() {
217        let cache = BuildCache::empty();
218        assert!(!cache.is_unchanged("missing.md", "abc"));
219    }
220
221    #[test]
222    fn cache_with_deps() {
223        let mut cache = BuildCache::empty();
224        cache.put("people/test.md", "entity_hash".to_string(), vec![]);
225        cache.put(
226            "cases/test.md",
227            "case_hash".to_string(),
228            vec!["people/test.md".to_string()],
229        );
230
231        let mut hashes = HashMap::new();
232        hashes.insert("cases/test.md".to_string(), "case_hash".to_string());
233        hashes.insert("people/test.md".to_string(), "entity_hash".to_string());
234
235        assert!(cache.is_unchanged_with_hashes("cases/test.md", "case_hash", &hashes));
236
237        // Change dep hash
238        hashes.insert("people/test.md".to_string(), "changed".to_string());
239        assert!(!cache.is_unchanged_with_hashes("cases/test.md", "case_hash", &hashes));
240    }
241
242    #[test]
243    fn cache_prune() {
244        let mut cache = BuildCache::empty();
245        cache.put("keep.md", "h1".to_string(), vec![]);
246        cache.put("remove.md", "h2".to_string(), vec![]);
247
248        let existing: HashSet<String> = ["keep.md".to_string()].into();
249        cache.prune(&existing);
250
251        assert_eq!(cache.len(), 1);
252        assert!(cache.is_unchanged("keep.md", "h1"));
253    }
254
255    #[test]
256    fn cache_boundary_enforced() {
257        let mut cache = BuildCache::empty();
258        for i in 0..MAX_CACHE_ENTRIES {
259            cache.put(&format!("file{i}.md"), format!("h{i}"), vec![]);
260        }
261        assert_eq!(cache.len(), MAX_CACHE_ENTRIES);
262
263        // One more should be rejected
264        cache.put("overflow.md", "hx".to_string(), vec![]);
265        assert_eq!(cache.len(), MAX_CACHE_ENTRIES);
266        assert!(!cache.is_unchanged("overflow.md", "hx"));
267    }
268
269    #[test]
270    fn cache_update_existing_within_boundary() {
271        let mut cache = BuildCache::empty();
272        for i in 0..MAX_CACHE_ENTRIES {
273            cache.put(&format!("file{i}.md"), format!("h{i}"), vec![]);
274        }
275
276        // Updating existing should work
277        cache.put("file0.md", "updated".to_string(), vec![]);
278        assert!(cache.is_unchanged("file0.md", "updated"));
279        assert_eq!(cache.len(), MAX_CACHE_ENTRIES);
280    }
281}