Skip to main content

mir_analyzer/
cache.rs

1/// Per-file analysis result cache backed by a JSON file on disk.
2///
3/// Cache key: file path.  Cache validity: BLAKE3 hash of file content.
4/// If the content hash matches what was stored, the cached issues are returned
5/// and Pass 2 analysis is skipped for that file.
6use std::collections::{HashMap, HashSet};
7use std::path::{Path, PathBuf};
8use std::sync::Mutex;
9
10use serde::{Deserialize, Serialize};
11
12use mir_issues::Issue;
13
14/// Cached analysis result returned on a cache hit: issues and reference location
15/// tuples `(symbol_key, line, col_start, col_end)`.
16pub type CacheHit = (Vec<Issue>, Vec<(String, u32, u16, u16)>);
17
18// ---------------------------------------------------------------------------
19// Hash helper
20// ---------------------------------------------------------------------------
21
22/// Compute the BLAKE3 hex digest of `content`.
23pub fn hash_content(content: &str) -> String {
24    blake3::hash(content.as_bytes()).to_hex().to_string()
25}
26
27// ---------------------------------------------------------------------------
28// CacheEntry
29// ---------------------------------------------------------------------------
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
32struct CacheEntry {
33    content_hash: String,
34    issues: Vec<Issue>,
35    /// Reference locations recorded during Pass 2: (symbol_key, line, col_start, col_end).
36    /// Stored so that cache hits can replay symbol_reference_locations without re-running
37    /// analyze_bodies.
38    #[serde(default)]
39    reference_locations: Vec<(String, u32, u16, u16)>,
40}
41
42// ---------------------------------------------------------------------------
43// AnalysisCache
44// ---------------------------------------------------------------------------
45
46/// Serialized form of the full cache file.
47#[derive(Debug, Default, Serialize, Deserialize)]
48struct CacheFile {
49    #[serde(default)]
50    entries: HashMap<String, CacheEntry>,
51    /// Reverse dependency graph: defining_file → [files that depend on it].
52    /// Persisted so that the next run can invalidate dependents before Pass 1.
53    #[serde(default)]
54    reverse_deps: HashMap<String, HashSet<String>>,
55}
56
57/// Thread-safe, disk-backed cache for per-file analysis results.
58pub struct AnalysisCache {
59    cache_dir: PathBuf,
60    entries: Mutex<HashMap<String, CacheEntry>>,
61    /// Reverse dependency graph loaded from disk (from the previous run).
62    reverse_deps: Mutex<HashMap<String, HashSet<String>>>,
63    dirty: Mutex<bool>,
64}
65
66impl AnalysisCache {
67    /// Open or create a cache stored under `cache_dir`.
68    /// If the directory or cache file do not exist they are created lazily on
69    /// the first `flush()` call.
70    pub fn open(cache_dir: &Path) -> Self {
71        std::fs::create_dir_all(cache_dir).ok();
72        let file = Self::load(cache_dir);
73        Self {
74            cache_dir: cache_dir.to_path_buf(),
75            entries: Mutex::new(file.entries),
76            reverse_deps: Mutex::new(file.reverse_deps),
77            dirty: Mutex::new(false),
78        }
79    }
80
81    /// Open the default cache directory: `{project_root}/.mir-cache/`.
82    pub fn open_default(project_root: &Path) -> Self {
83        Self::open(&project_root.join(".mir-cache"))
84    }
85
86    /// Return cached issues and reference locations for `file_path` if its
87    /// `content_hash` matches. Returns `None` if there is no entry or the file
88    /// has changed. The second element of the tuple is the list of
89    /// `(symbol_key, line, col_start, col_end)` entries to replay into
90    /// the salsa db via `MirDatabase::replay_reference_locations`.
91    pub fn get(&self, file_path: &str, content_hash: &str) -> Option<CacheHit> {
92        let entries = self.entries.lock().unwrap();
93        entries.get(file_path).and_then(|e| {
94            if e.content_hash == content_hash {
95                Some((e.issues.clone(), e.reference_locations.clone()))
96            } else {
97                None
98            }
99        })
100    }
101
102    /// Store `issues` and `reference_locations` for `file_path` with the given
103    /// `content_hash`. `reference_locations` is a list of
104    /// `(symbol_key, line, col_start, col_end)` recorded during Pass 2.
105    pub fn put(
106        &self,
107        file_path: &str,
108        content_hash: String,
109        issues: Vec<Issue>,
110        reference_locations: Vec<(String, u32, u16, u16)>,
111    ) {
112        let mut entries = self.entries.lock().unwrap();
113        entries.insert(
114            file_path.to_string(),
115            CacheEntry {
116                content_hash,
117                issues,
118                reference_locations,
119            },
120        );
121        *self.dirty.lock().unwrap() = true;
122    }
123
124    /// Persist the in-memory cache to `{cache_dir}/cache.json`.
125    /// This is a no-op if nothing changed since the last flush.
126    pub fn flush(&self) {
127        let dirty = {
128            let mut d = self.dirty.lock().unwrap();
129            let was = *d;
130            *d = false;
131            was
132        };
133        if !dirty {
134            return;
135        }
136        let cache_file = self.cache_dir.join("cache.json");
137        let file = CacheFile {
138            entries: self.entries.lock().unwrap().clone(),
139            reverse_deps: self.reverse_deps.lock().unwrap().clone(),
140        };
141        if let Ok(json) = serde_json::to_string(&file) {
142            std::fs::write(cache_file, json).ok();
143        }
144    }
145
146    /// Replace the reverse dependency graph (called after each Pass 1).
147    pub fn set_reverse_deps(&self, deps: HashMap<String, HashSet<String>>) {
148        *self.reverse_deps.lock().unwrap() = deps;
149        *self.dirty.lock().unwrap() = true;
150    }
151
152    /// Update the reverse-dep graph for a single file in place.
153    ///
154    /// `new_targets` is the set of files `file` depends on (its imports'
155    /// defining files plus parent / interfaces / traits' defining files).
156    /// This removes `file` from every existing dependent set, then inserts it
157    /// into each of `new_targets`' dependent sets — preserving the invariant
158    /// that the graph reflects the file's *current* outgoing edges.
159    ///
160    /// Used by `AnalysisSession::ingest_file` to keep cross-file invalidation
161    /// correct without rebuilding the whole graph on every edit.
162    pub fn update_reverse_deps_for_file(&self, file: &str, new_targets: &HashSet<String>) {
163        let mut deps = self.reverse_deps.lock().unwrap();
164
165        for dependents in deps.values_mut() {
166            dependents.remove(file);
167        }
168        deps.retain(|_, dependents| !dependents.is_empty());
169
170        for target in new_targets {
171            if target != file {
172                deps.entry(target.clone())
173                    .or_default()
174                    .insert(file.to_string());
175            }
176        }
177
178        *self.dirty.lock().unwrap() = true;
179    }
180
181    /// BFS from each changed file through the reverse dep graph.
182    /// Evicts every reachable dependent's cache entry.
183    /// Returns the number of entries evicted.
184    pub fn evict_with_dependents(&self, changed_files: &[String]) -> usize {
185        // Phase 1: collect all dependents to evict via BFS (lock held only here).
186        let to_evict: Vec<String> = {
187            let deps = self.reverse_deps.lock().unwrap();
188            let mut visited: HashSet<String> = changed_files.iter().cloned().collect();
189            let mut queue: std::collections::VecDeque<String> =
190                changed_files.iter().cloned().collect();
191            let mut result = Vec::new();
192
193            while let Some(file) = queue.pop_front() {
194                if let Some(dependents) = deps.get(&file) {
195                    for dep in dependents {
196                        if visited.insert(dep.clone()) {
197                            queue.push_back(dep.clone());
198                            result.push(dep.clone());
199                        }
200                    }
201                }
202            }
203            result
204        };
205
206        // Phase 2: evict (reverse_deps lock released above, entries lock taken per file).
207        let count = to_evict.len();
208        for file in &to_evict {
209            self.evict(file);
210        }
211        count
212    }
213
214    /// Remove a single file's cache entry.
215    pub fn evict(&self, file_path: &str) {
216        let mut entries = self.entries.lock().unwrap();
217        entries.remove(file_path);
218        *self.dirty.lock().unwrap() = true;
219    }
220
221    // -----------------------------------------------------------------------
222
223    fn load(cache_dir: &Path) -> CacheFile {
224        let cache_file = cache_dir.join("cache.json");
225        let Ok(bytes) = std::fs::read(&cache_file) else {
226            return CacheFile::default();
227        };
228        serde_json::from_slice(&bytes).unwrap_or_default()
229    }
230}
231
232// ---------------------------------------------------------------------------
233// Tests
234// ---------------------------------------------------------------------------
235
236#[cfg(test)]
237mod tests {
238    use super::*;
239    use tempfile::TempDir;
240
241    fn make_cache(dir: &TempDir) -> AnalysisCache {
242        AnalysisCache::open(dir.path())
243    }
244
245    fn seed(cache: &AnalysisCache, file: &str) {
246        cache.put(file, "hash".to_string(), vec![], vec![]);
247    }
248
249    #[test]
250    fn evict_with_dependents_linear_chain() {
251        // reverse_deps: A → [B], B → [C]
252        // Changing A must evict B and C.
253        let dir = TempDir::new().unwrap();
254        let cache = make_cache(&dir);
255        seed(&cache, "A");
256        seed(&cache, "B");
257        seed(&cache, "C");
258
259        let mut deps: HashMap<String, HashSet<String>> = HashMap::new();
260        deps.entry("A".into()).or_default().insert("B".into());
261        deps.entry("B".into()).or_default().insert("C".into());
262        cache.set_reverse_deps(deps);
263
264        let evicted = cache.evict_with_dependents(&["A".to_string()]);
265
266        assert_eq!(evicted, 2, "B and C should be evicted");
267        assert!(cache.get("A", "hash").is_some(), "A itself is not evicted");
268        assert!(cache.get("B", "hash").is_none(), "B should be evicted");
269        assert!(cache.get("C", "hash").is_none(), "C should be evicted");
270    }
271
272    #[test]
273    fn evict_with_dependents_diamond() {
274        // reverse_deps: A → [B, C], B → [D], C → [D]
275        // D should be evicted exactly once (visited set prevents double-eviction).
276        let dir = TempDir::new().unwrap();
277        let cache = make_cache(&dir);
278        seed(&cache, "A");
279        seed(&cache, "B");
280        seed(&cache, "C");
281        seed(&cache, "D");
282
283        let mut deps: HashMap<String, HashSet<String>> = HashMap::new();
284        deps.entry("A".into()).or_default().insert("B".into());
285        deps.entry("A".into()).or_default().insert("C".into());
286        deps.entry("B".into()).or_default().insert("D".into());
287        deps.entry("C".into()).or_default().insert("D".into());
288        cache.set_reverse_deps(deps);
289
290        let evicted = cache.evict_with_dependents(&["A".to_string()]);
291
292        assert_eq!(evicted, 3, "B, C, D each evicted once");
293        assert!(cache.get("D", "hash").is_none());
294    }
295
296    #[test]
297    fn evict_with_dependents_cycle_safety() {
298        // reverse_deps: A → [B], B → [A]  (circular)
299        // Must not loop forever; B should be evicted.
300        let dir = TempDir::new().unwrap();
301        let cache = make_cache(&dir);
302        seed(&cache, "A");
303        seed(&cache, "B");
304
305        let mut deps: HashMap<String, HashSet<String>> = HashMap::new();
306        deps.entry("A".into()).or_default().insert("B".into());
307        deps.entry("B".into()).or_default().insert("A".into());
308        cache.set_reverse_deps(deps);
309
310        let evicted = cache.evict_with_dependents(&["A".to_string()]);
311
312        // B is a dependent of A; A is the seed (not counted as "evicted dependent")
313        assert_eq!(evicted, 1);
314        assert!(cache.get("B", "hash").is_none());
315    }
316
317    #[test]
318    fn evict_with_dependents_unrelated_file_untouched() {
319        // Changing C should not evict B (which depends on A, not C).
320        let dir = TempDir::new().unwrap();
321        let cache = make_cache(&dir);
322        seed(&cache, "A");
323        seed(&cache, "B");
324        seed(&cache, "C");
325
326        let mut deps: HashMap<String, HashSet<String>> = HashMap::new();
327        deps.entry("A".into()).or_default().insert("B".into());
328        cache.set_reverse_deps(deps);
329
330        let evicted = cache.evict_with_dependents(&["C".to_string()]);
331
332        assert_eq!(evicted, 0);
333        assert!(
334            cache.get("B", "hash").is_some(),
335            "B unrelated, should survive"
336        );
337    }
338
339    #[test]
340    fn old_cache_without_reference_locations_deserializes_to_empty() {
341        // Cache entries written before the reference_locations field was added
342        // must still be readable. The #[serde(default)] attribute covers this,
343        // but we verify it explicitly so a future refactor can't silently break it.
344        let dir = TempDir::new().unwrap();
345        let cache_file = dir.path().join("cache.json");
346
347        // Write a cache file in the old format (no reference_locations field).
348        std::fs::write(
349            &cache_file,
350            r#"{"entries":{"a.php":{"content_hash":"abc","issues":[]}},"reverse_deps":{}}"#,
351        )
352        .unwrap();
353
354        let cache = AnalysisCache::open(dir.path());
355        let hit = cache
356            .get("a.php", "abc")
357            .expect("old cache entry should deserialize successfully");
358
359        assert!(hit.0.is_empty(), "no issues");
360        assert!(
361            hit.1.is_empty(),
362            "reference_locations should default to empty vec, not fail"
363        );
364    }
365}