Skip to main content

mir_analyzer/
cache.rs

1/// Per-file analysis result cache backed by a JSON file on disk.
2///
3/// Cache key: file path.  Cache validity: SHA-256 hash of file content.
4/// If the content hash matches what was stored, the cached issues are returned
5/// and Pass 2 analysis is skipped for that file.
6use std::collections::{HashMap, HashSet};
7use std::path::{Path, PathBuf};
8use std::sync::Mutex;
9
10use serde::{Deserialize, Serialize};
11use sha2::{Digest, Sha256};
12
13use mir_issues::Issue;
14
15// ---------------------------------------------------------------------------
16// Hash helper
17// ---------------------------------------------------------------------------
18
19/// Compute the SHA-256 hex digest of `content`.
20pub fn hash_content(content: &str) -> String {
21    let mut h = Sha256::new();
22    h.update(content.as_bytes());
23    h.finalize().iter().fold(String::new(), |mut acc, b| {
24        use std::fmt::Write;
25        write!(acc, "{:02x}", b).unwrap();
26        acc
27    })
28}
29
30// ---------------------------------------------------------------------------
31// CacheEntry
32// ---------------------------------------------------------------------------
33
34#[derive(Debug, Clone, Serialize, Deserialize)]
35struct CacheEntry {
36    content_hash: String,
37    issues: Vec<Issue>,
38}
39
40// ---------------------------------------------------------------------------
41// AnalysisCache
42// ---------------------------------------------------------------------------
43
44/// Serialized form of the full cache file.
45#[derive(Debug, Default, Serialize, Deserialize)]
46struct CacheFile {
47    #[serde(default)]
48    entries: HashMap<String, CacheEntry>,
49    /// Reverse dependency graph: defining_file → [files that depend on it].
50    /// Persisted so that the next run can invalidate dependents before Pass 1.
51    #[serde(default)]
52    reverse_deps: HashMap<String, HashSet<String>>,
53}
54
55/// Thread-safe, disk-backed cache for per-file analysis results.
56pub struct AnalysisCache {
57    cache_dir: PathBuf,
58    entries: Mutex<HashMap<String, CacheEntry>>,
59    /// Reverse dependency graph loaded from disk (from the previous run).
60    reverse_deps: Mutex<HashMap<String, HashSet<String>>>,
61    dirty: Mutex<bool>,
62}
63
64impl AnalysisCache {
65    /// Open or create a cache stored under `cache_dir`.
66    /// If the directory or cache file do not exist they are created lazily on
67    /// the first `flush()` call.
68    pub fn open(cache_dir: &Path) -> Self {
69        std::fs::create_dir_all(cache_dir).ok();
70        let file = Self::load(cache_dir);
71        Self {
72            cache_dir: cache_dir.to_path_buf(),
73            entries: Mutex::new(file.entries),
74            reverse_deps: Mutex::new(file.reverse_deps),
75            dirty: Mutex::new(false),
76        }
77    }
78
79    /// Open the default cache directory: `{project_root}/.mir-cache/`.
80    pub fn open_default(project_root: &Path) -> Self {
81        Self::open(&project_root.join(".mir-cache"))
82    }
83
84    /// Return cached issues for `file_path` if its `content_hash` matches.
85    /// Returns `None` if there is no entry or the file has changed.
86    pub fn get(&self, file_path: &str, content_hash: &str) -> Option<Vec<Issue>> {
87        let entries = self.entries.lock().unwrap();
88        entries.get(file_path).and_then(|e| {
89            if e.content_hash == content_hash {
90                Some(e.issues.clone())
91            } else {
92                None
93            }
94        })
95    }
96
97    /// Store `issues` for `file_path` with the given `content_hash`.
98    pub fn put(&self, file_path: &str, content_hash: String, issues: Vec<Issue>) {
99        let mut entries = self.entries.lock().unwrap();
100        entries.insert(
101            file_path.to_string(),
102            CacheEntry {
103                content_hash,
104                issues,
105            },
106        );
107        *self.dirty.lock().unwrap() = true;
108    }
109
110    /// Persist the in-memory cache to `{cache_dir}/cache.json`.
111    /// This is a no-op if nothing changed since the last flush.
112    pub fn flush(&self) {
113        let dirty = {
114            let mut d = self.dirty.lock().unwrap();
115            let was = *d;
116            *d = false;
117            was
118        };
119        if !dirty {
120            return;
121        }
122        let cache_file = self.cache_dir.join("cache.json");
123        let file = CacheFile {
124            entries: self.entries.lock().unwrap().clone(),
125            reverse_deps: self.reverse_deps.lock().unwrap().clone(),
126        };
127        if let Ok(json) = serde_json::to_string(&file) {
128            std::fs::write(cache_file, json).ok();
129        }
130    }
131
132    /// Replace the reverse dependency graph (called after each Pass 1).
133    pub fn set_reverse_deps(&self, deps: HashMap<String, HashSet<String>>) {
134        *self.reverse_deps.lock().unwrap() = deps;
135        *self.dirty.lock().unwrap() = true;
136    }
137
138    /// BFS from each changed file through the reverse dep graph.
139    /// Evicts every reachable dependent's cache entry.
140    /// Returns the number of entries evicted.
141    pub fn evict_with_dependents(&self, changed_files: &[String]) -> usize {
142        // Phase 1: collect all dependents to evict via BFS (lock held only here).
143        let to_evict: Vec<String> = {
144            let deps = self.reverse_deps.lock().unwrap();
145            let mut visited: HashSet<String> = changed_files.iter().cloned().collect();
146            let mut queue: std::collections::VecDeque<String> =
147                changed_files.iter().cloned().collect();
148            let mut result = Vec::new();
149
150            while let Some(file) = queue.pop_front() {
151                if let Some(dependents) = deps.get(&file) {
152                    for dep in dependents {
153                        if visited.insert(dep.clone()) {
154                            queue.push_back(dep.clone());
155                            result.push(dep.clone());
156                        }
157                    }
158                }
159            }
160            result
161        };
162
163        // Phase 2: evict (reverse_deps lock released above, entries lock taken per file).
164        let count = to_evict.len();
165        for file in &to_evict {
166            self.evict(file);
167        }
168        count
169    }
170
171    /// Remove a single file's cache entry.
172    pub fn evict(&self, file_path: &str) {
173        let mut entries = self.entries.lock().unwrap();
174        entries.remove(file_path);
175        *self.dirty.lock().unwrap() = true;
176    }
177
178    // -----------------------------------------------------------------------
179
180    fn load(cache_dir: &Path) -> CacheFile {
181        let cache_file = cache_dir.join("cache.json");
182        let Ok(bytes) = std::fs::read(&cache_file) else {
183            return CacheFile::default();
184        };
185        serde_json::from_slice(&bytes).unwrap_or_default()
186    }
187}
188
189// ---------------------------------------------------------------------------
190// Tests
191// ---------------------------------------------------------------------------
192
193#[cfg(test)]
194mod tests {
195    use super::*;
196    use tempfile::TempDir;
197
198    fn make_cache(dir: &TempDir) -> AnalysisCache {
199        AnalysisCache::open(dir.path())
200    }
201
202    fn seed(cache: &AnalysisCache, file: &str) {
203        cache.put(file, "hash".to_string(), vec![]);
204    }
205
206    #[test]
207    fn evict_with_dependents_linear_chain() {
208        // reverse_deps: A → [B], B → [C]
209        // Changing A must evict B and C.
210        let dir = TempDir::new().unwrap();
211        let cache = make_cache(&dir);
212        seed(&cache, "A");
213        seed(&cache, "B");
214        seed(&cache, "C");
215
216        let mut deps: HashMap<String, HashSet<String>> = HashMap::new();
217        deps.entry("A".into()).or_default().insert("B".into());
218        deps.entry("B".into()).or_default().insert("C".into());
219        cache.set_reverse_deps(deps);
220
221        let evicted = cache.evict_with_dependents(&["A".to_string()]);
222
223        assert_eq!(evicted, 2, "B and C should be evicted");
224        assert!(cache.get("A", "hash").is_some(), "A itself is not evicted");
225        assert!(cache.get("B", "hash").is_none(), "B should be evicted");
226        assert!(cache.get("C", "hash").is_none(), "C should be evicted");
227    }
228
229    #[test]
230    fn evict_with_dependents_diamond() {
231        // reverse_deps: A → [B, C], B → [D], C → [D]
232        // D should be evicted exactly once (visited set prevents double-eviction).
233        let dir = TempDir::new().unwrap();
234        let cache = make_cache(&dir);
235        seed(&cache, "A");
236        seed(&cache, "B");
237        seed(&cache, "C");
238        seed(&cache, "D");
239
240        let mut deps: HashMap<String, HashSet<String>> = HashMap::new();
241        deps.entry("A".into()).or_default().insert("B".into());
242        deps.entry("A".into()).or_default().insert("C".into());
243        deps.entry("B".into()).or_default().insert("D".into());
244        deps.entry("C".into()).or_default().insert("D".into());
245        cache.set_reverse_deps(deps);
246
247        let evicted = cache.evict_with_dependents(&["A".to_string()]);
248
249        assert_eq!(evicted, 3, "B, C, D each evicted once");
250        assert!(cache.get("D", "hash").is_none());
251    }
252
253    #[test]
254    fn evict_with_dependents_cycle_safety() {
255        // reverse_deps: A → [B], B → [A]  (circular)
256        // Must not loop forever; B should be evicted.
257        let dir = TempDir::new().unwrap();
258        let cache = make_cache(&dir);
259        seed(&cache, "A");
260        seed(&cache, "B");
261
262        let mut deps: HashMap<String, HashSet<String>> = HashMap::new();
263        deps.entry("A".into()).or_default().insert("B".into());
264        deps.entry("B".into()).or_default().insert("A".into());
265        cache.set_reverse_deps(deps);
266
267        let evicted = cache.evict_with_dependents(&["A".to_string()]);
268
269        // B is a dependent of A; A is the seed (not counted as "evicted dependent")
270        assert_eq!(evicted, 1);
271        assert!(cache.get("B", "hash").is_none());
272    }
273
274    #[test]
275    fn evict_with_dependents_unrelated_file_untouched() {
276        // Changing C should not evict B (which depends on A, not C).
277        let dir = TempDir::new().unwrap();
278        let cache = make_cache(&dir);
279        seed(&cache, "A");
280        seed(&cache, "B");
281        seed(&cache, "C");
282
283        let mut deps: HashMap<String, HashSet<String>> = HashMap::new();
284        deps.entry("A".into()).or_default().insert("B".into());
285        cache.set_reverse_deps(deps);
286
287        let evicted = cache.evict_with_dependents(&["C".to_string()]);
288
289        assert_eq!(evicted, 0);
290        assert!(
291            cache.get("B", "hash").is_some(),
292            "B unrelated, should survive"
293        );
294    }
295}