1use std::collections::{HashMap, HashSet};
7use std::path::{Path, PathBuf};
8use std::sync::Mutex;
9
10use serde::{Deserialize, Serialize};
11
12use mir_issues::Issue;
13
14pub type CacheHit = (Vec<Issue>, Vec<(String, u32, u16, u16)>);
17
18pub fn hash_content(content: &str) -> String {
24 blake3::hash(content.as_bytes()).to_hex().to_string()
25}
26
27#[derive(Debug, Clone, Serialize, Deserialize)]
32struct CacheEntry {
33 content_hash: String,
34 issues: Vec<Issue>,
35 #[serde(default)]
39 reference_locations: Vec<(String, u32, u16, u16)>,
40}
41
42#[derive(Debug, Default, Serialize, Deserialize)]
48struct CacheFile {
49 #[serde(default)]
50 entries: HashMap<String, CacheEntry>,
51 #[serde(default)]
54 reverse_deps: HashMap<String, HashSet<String>>,
55}
56
57pub struct AnalysisCache {
59 cache_dir: PathBuf,
60 entries: Mutex<HashMap<String, CacheEntry>>,
61 reverse_deps: Mutex<HashMap<String, HashSet<String>>>,
63 dirty: Mutex<bool>,
64}
65
66impl AnalysisCache {
67 pub fn open(cache_dir: &Path) -> Self {
71 std::fs::create_dir_all(cache_dir).ok();
72 let file = Self::load(cache_dir);
73 Self {
74 cache_dir: cache_dir.to_path_buf(),
75 entries: Mutex::new(file.entries),
76 reverse_deps: Mutex::new(file.reverse_deps),
77 dirty: Mutex::new(false),
78 }
79 }
80
81 pub fn open_default(project_root: &Path) -> Self {
83 Self::open(&project_root.join(".mir-cache"))
84 }
85
86 pub fn get(&self, file_path: &str, content_hash: &str) -> Option<CacheHit> {
92 let entries = self.entries.lock().unwrap();
93 entries.get(file_path).and_then(|e| {
94 if e.content_hash == content_hash {
95 Some((e.issues.clone(), e.reference_locations.clone()))
96 } else {
97 None
98 }
99 })
100 }
101
102 pub fn put(
106 &self,
107 file_path: &str,
108 content_hash: String,
109 issues: Vec<Issue>,
110 reference_locations: Vec<(String, u32, u16, u16)>,
111 ) {
112 let mut entries = self.entries.lock().unwrap();
113 entries.insert(
114 file_path.to_string(),
115 CacheEntry {
116 content_hash,
117 issues,
118 reference_locations,
119 },
120 );
121 *self.dirty.lock().unwrap() = true;
122 }
123
124 pub fn flush(&self) {
127 let dirty = {
128 let mut d = self.dirty.lock().unwrap();
129 let was = *d;
130 *d = false;
131 was
132 };
133 if !dirty {
134 return;
135 }
136 let cache_file = self.cache_dir.join("cache.json");
137 let file = CacheFile {
138 entries: self.entries.lock().unwrap().clone(),
139 reverse_deps: self.reverse_deps.lock().unwrap().clone(),
140 };
141 if let Ok(json) = serde_json::to_string(&file) {
142 std::fs::write(cache_file, json).ok();
143 }
144 }
145
146 pub fn set_reverse_deps(&self, deps: HashMap<String, HashSet<String>>) {
148 *self.reverse_deps.lock().unwrap() = deps;
149 *self.dirty.lock().unwrap() = true;
150 }
151
152 pub fn update_reverse_deps_for_file(&self, file: &str, new_targets: &HashSet<String>) {
163 let mut deps = self.reverse_deps.lock().unwrap();
164
165 for dependents in deps.values_mut() {
166 dependents.remove(file);
167 }
168 deps.retain(|_, dependents| !dependents.is_empty());
169
170 for target in new_targets {
171 if target != file {
172 deps.entry(target.clone())
173 .or_default()
174 .insert(file.to_string());
175 }
176 }
177
178 *self.dirty.lock().unwrap() = true;
179 }
180
181 pub fn evict_with_dependents(&self, changed_files: &[String]) -> usize {
185 let to_evict: Vec<String> = {
187 let deps = self.reverse_deps.lock().unwrap();
188 let mut visited: HashSet<String> = changed_files.iter().cloned().collect();
189 let mut queue: std::collections::VecDeque<String> =
190 changed_files.iter().cloned().collect();
191 let mut result = Vec::new();
192
193 while let Some(file) = queue.pop_front() {
194 if let Some(dependents) = deps.get(&file) {
195 for dep in dependents {
196 if visited.insert(dep.clone()) {
197 queue.push_back(dep.clone());
198 result.push(dep.clone());
199 }
200 }
201 }
202 }
203 result
204 };
205
206 let count = to_evict.len();
208 for file in &to_evict {
209 self.evict(file);
210 }
211 count
212 }
213
214 pub fn evict(&self, file_path: &str) {
216 let mut entries = self.entries.lock().unwrap();
217 entries.remove(file_path);
218 *self.dirty.lock().unwrap() = true;
219 }
220
221 fn load(cache_dir: &Path) -> CacheFile {
224 let cache_file = cache_dir.join("cache.json");
225 let Ok(bytes) = std::fs::read(&cache_file) else {
226 return CacheFile::default();
227 };
228 serde_json::from_slice(&bytes).unwrap_or_default()
229 }
230}
231
232#[cfg(test)]
237mod tests {
238 use super::*;
239 use tempfile::TempDir;
240
241 fn make_cache(dir: &TempDir) -> AnalysisCache {
242 AnalysisCache::open(dir.path())
243 }
244
245 fn seed(cache: &AnalysisCache, file: &str) {
246 cache.put(file, "hash".to_string(), vec![], vec![]);
247 }
248
249 #[test]
250 fn evict_with_dependents_linear_chain() {
251 let dir = TempDir::new().unwrap();
254 let cache = make_cache(&dir);
255 seed(&cache, "A");
256 seed(&cache, "B");
257 seed(&cache, "C");
258
259 let mut deps: HashMap<String, HashSet<String>> = HashMap::new();
260 deps.entry("A".into()).or_default().insert("B".into());
261 deps.entry("B".into()).or_default().insert("C".into());
262 cache.set_reverse_deps(deps);
263
264 let evicted = cache.evict_with_dependents(&["A".to_string()]);
265
266 assert_eq!(evicted, 2, "B and C should be evicted");
267 assert!(cache.get("A", "hash").is_some(), "A itself is not evicted");
268 assert!(cache.get("B", "hash").is_none(), "B should be evicted");
269 assert!(cache.get("C", "hash").is_none(), "C should be evicted");
270 }
271
272 #[test]
273 fn evict_with_dependents_diamond() {
274 let dir = TempDir::new().unwrap();
277 let cache = make_cache(&dir);
278 seed(&cache, "A");
279 seed(&cache, "B");
280 seed(&cache, "C");
281 seed(&cache, "D");
282
283 let mut deps: HashMap<String, HashSet<String>> = HashMap::new();
284 deps.entry("A".into()).or_default().insert("B".into());
285 deps.entry("A".into()).or_default().insert("C".into());
286 deps.entry("B".into()).or_default().insert("D".into());
287 deps.entry("C".into()).or_default().insert("D".into());
288 cache.set_reverse_deps(deps);
289
290 let evicted = cache.evict_with_dependents(&["A".to_string()]);
291
292 assert_eq!(evicted, 3, "B, C, D each evicted once");
293 assert!(cache.get("D", "hash").is_none());
294 }
295
296 #[test]
297 fn evict_with_dependents_cycle_safety() {
298 let dir = TempDir::new().unwrap();
301 let cache = make_cache(&dir);
302 seed(&cache, "A");
303 seed(&cache, "B");
304
305 let mut deps: HashMap<String, HashSet<String>> = HashMap::new();
306 deps.entry("A".into()).or_default().insert("B".into());
307 deps.entry("B".into()).or_default().insert("A".into());
308 cache.set_reverse_deps(deps);
309
310 let evicted = cache.evict_with_dependents(&["A".to_string()]);
311
312 assert_eq!(evicted, 1);
314 assert!(cache.get("B", "hash").is_none());
315 }
316
317 #[test]
318 fn evict_with_dependents_unrelated_file_untouched() {
319 let dir = TempDir::new().unwrap();
321 let cache = make_cache(&dir);
322 seed(&cache, "A");
323 seed(&cache, "B");
324 seed(&cache, "C");
325
326 let mut deps: HashMap<String, HashSet<String>> = HashMap::new();
327 deps.entry("A".into()).or_default().insert("B".into());
328 cache.set_reverse_deps(deps);
329
330 let evicted = cache.evict_with_dependents(&["C".to_string()]);
331
332 assert_eq!(evicted, 0);
333 assert!(
334 cache.get("B", "hash").is_some(),
335 "B unrelated, should survive"
336 );
337 }
338
339 #[test]
340 fn old_cache_without_reference_locations_deserializes_to_empty() {
341 let dir = TempDir::new().unwrap();
345 let cache_file = dir.path().join("cache.json");
346
347 std::fs::write(
349 &cache_file,
350 r#"{"entries":{"a.php":{"content_hash":"abc","issues":[]}},"reverse_deps":{}}"#,
351 )
352 .unwrap();
353
354 let cache = AnalysisCache::open(dir.path());
355 let hit = cache
356 .get("a.php", "abc")
357 .expect("old cache entry should deserialize successfully");
358
359 assert!(hit.0.is_empty(), "no issues");
360 assert!(
361 hit.1.is_empty(),
362 "reference_locations should default to empty vec, not fail"
363 );
364 }
365}