weave_content/
build_cache.rs1use std::collections::{HashMap, HashSet};
2use std::path::{Path, PathBuf};
3
4use serde::{Deserialize, Serialize};
5use sha2::{Digest, Sha256};
6
7const MAX_CACHE_SIZE_BYTES: u64 = 50 * 1024 * 1024;
9
10const MAX_CACHE_ENTRIES: usize = 50_000;
12
13pub const CACHE_FILENAME: &str = ".build-cache.json";
15
16#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct BuildCacheEntry {
19 pub hash: String,
21 #[serde(default, skip_serializing_if = "Vec::is_empty")]
23 pub deps: Vec<String>,
24}
25
26#[derive(Debug)]
28pub struct BuildCache {
29 path: PathBuf,
30 entries: HashMap<String, BuildCacheEntry>,
31}
32
33impl BuildCache {
34 pub fn empty() -> Self {
36 Self {
37 path: PathBuf::new(),
38 entries: HashMap::new(),
39 }
40 }
41
42 pub fn load(content_root: &Path) -> Result<Self, String> {
48 let path = content_root.join(CACHE_FILENAME);
49 let entries = if path.exists() {
50 let meta =
52 std::fs::metadata(&path).map_err(|e| format!("failed to stat cache file: {e}"))?;
53 if meta.len() > MAX_CACHE_SIZE_BYTES {
54 eprintln!("build cache exceeds {MAX_CACHE_SIZE_BYTES} bytes, starting fresh");
55 HashMap::new()
56 } else {
57 let content = std::fs::read_to_string(&path)
58 .map_err(|e| format!("failed to read cache file: {e}"))?;
59 serde_json::from_str(&content).unwrap_or_default()
60 }
61 } else {
62 HashMap::new()
63 };
64
65 Ok(Self { path, entries })
66 }
67
68 pub fn is_unchanged(&self, path: &str, current_hash: &str) -> bool {
72 if let Some(entry) = self.entries.get(path) {
73 if entry.hash != current_hash {
74 return false;
75 }
76 for dep in &entry.deps {
78 if let Some(dep_entry) = self.entries.get(dep) {
79 let _ = dep_entry;
84 } else {
85 return false;
87 }
88 }
89 true
90 } else {
91 false
92 }
93 }
94
95 pub fn is_unchanged_with_hashes(
98 &self,
99 path: &str,
100 current_hash: &str,
101 current_hashes: &HashMap<String, String>,
102 ) -> bool {
103 let Some(entry) = self.entries.get(path) else {
104 return false;
105 };
106 if entry.hash != current_hash {
107 return false;
108 }
109 for dep in &entry.deps {
110 let Some(dep_entry) = self.entries.get(dep) else {
111 return false;
112 };
113 if let Some(current_dep_hash) = current_hashes.get(dep) {
114 if dep_entry.hash != *current_dep_hash {
115 return false;
116 }
117 } else {
118 return false;
120 }
121 }
122 true
123 }
124
125 pub fn put(&mut self, path: &str, hash: String, deps: Vec<String>) {
127 if self.entries.len() >= MAX_CACHE_ENTRIES && !self.entries.contains_key(path) {
128 return;
129 }
130 self.entries
131 .insert(path.to_string(), BuildCacheEntry { hash, deps });
132 }
133
134 pub fn prune(&mut self, existing_files: &HashSet<String>) {
136 self.entries.retain(|k, _| existing_files.contains(k));
137 }
138
139 pub fn save(&self) -> Result<(), String> {
145 if self.path.as_os_str().is_empty() {
146 return Ok(());
147 }
148 let json = serde_json::to_string_pretty(&self.entries)
149 .map_err(|e| format!("failed to serialize build cache: {e}"))?;
150 std::fs::write(&self.path, json).map_err(|e| format!("failed to write build cache: {e}"))
151 }
152
153 pub fn len(&self) -> usize {
155 self.entries.len()
156 }
157
158 pub fn is_empty(&self) -> bool {
160 self.entries.is_empty()
161 }
162}
163
164pub fn hash_file(path: &Path) -> Result<String, String> {
166 let content =
167 std::fs::read(path).map_err(|e| format!("failed to read {}: {e}", path.display()))?;
168 Ok(hash_bytes(&content))
169}
170
171pub fn hash_bytes(data: &[u8]) -> String {
173 let mut hasher = Sha256::new();
174 hasher.update(data);
175 let result = hasher.finalize();
176 hex_encode(&result)
177}
178
179fn hex_encode(bytes: &[u8]) -> String {
180 let mut s = String::with_capacity(bytes.len() * 2);
181 for b in bytes {
182 use std::fmt::Write;
183 let _ = write!(s, "{b:02x}");
184 }
185 s
186}
187
188#[cfg(test)]
189mod tests {
190 use super::*;
191
192 #[test]
193 fn hash_bytes_deterministic() {
194 let h1 = hash_bytes(b"hello world");
195 let h2 = hash_bytes(b"hello world");
196 assert_eq!(h1, h2);
197 assert_eq!(h1.len(), 64); }
199
200 #[test]
201 fn hash_bytes_different_input() {
202 let h1 = hash_bytes(b"hello");
203 let h2 = hash_bytes(b"world");
204 assert_ne!(h1, h2);
205 }
206
207 #[test]
208 fn cache_put_and_check() {
209 let mut cache = BuildCache::empty();
210 cache.put("cases/test.md", "abc123".to_string(), vec![]);
211 assert!(cache.is_unchanged("cases/test.md", "abc123"));
212 assert!(!cache.is_unchanged("cases/test.md", "different"));
213 }
214
215 #[test]
216 fn cache_missing_entry() {
217 let cache = BuildCache::empty();
218 assert!(!cache.is_unchanged("missing.md", "abc"));
219 }
220
221 #[test]
222 fn cache_with_deps() {
223 let mut cache = BuildCache::empty();
224 cache.put("people/test.md", "entity_hash".to_string(), vec![]);
225 cache.put(
226 "cases/test.md",
227 "case_hash".to_string(),
228 vec!["people/test.md".to_string()],
229 );
230
231 let mut hashes = HashMap::new();
232 hashes.insert("cases/test.md".to_string(), "case_hash".to_string());
233 hashes.insert("people/test.md".to_string(), "entity_hash".to_string());
234
235 assert!(cache.is_unchanged_with_hashes("cases/test.md", "case_hash", &hashes));
236
237 hashes.insert("people/test.md".to_string(), "changed".to_string());
239 assert!(!cache.is_unchanged_with_hashes("cases/test.md", "case_hash", &hashes));
240 }
241
242 #[test]
243 fn cache_prune() {
244 let mut cache = BuildCache::empty();
245 cache.put("keep.md", "h1".to_string(), vec![]);
246 cache.put("remove.md", "h2".to_string(), vec![]);
247
248 let existing: HashSet<String> = ["keep.md".to_string()].into();
249 cache.prune(&existing);
250
251 assert_eq!(cache.len(), 1);
252 assert!(cache.is_unchanged("keep.md", "h1"));
253 }
254
255 #[test]
256 fn cache_boundary_enforced() {
257 let mut cache = BuildCache::empty();
258 for i in 0..MAX_CACHE_ENTRIES {
259 cache.put(&format!("file{i}.md"), format!("h{i}"), vec![]);
260 }
261 assert_eq!(cache.len(), MAX_CACHE_ENTRIES);
262
263 cache.put("overflow.md", "hx".to_string(), vec![]);
265 assert_eq!(cache.len(), MAX_CACHE_ENTRIES);
266 assert!(!cache.is_unchanged("overflow.md", "hx"));
267 }
268
269 #[test]
270 fn cache_update_existing_within_boundary() {
271 let mut cache = BuildCache::empty();
272 for i in 0..MAX_CACHE_ENTRIES {
273 cache.put(&format!("file{i}.md"), format!("h{i}"), vec![]);
274 }
275
276 cache.put("file0.md", "updated".to_string(), vec![]);
278 assert!(cache.is_unchanged("file0.md", "updated"));
279 assert_eq!(cache.len(), MAX_CACHE_ENTRIES);
280 }
281}