Skip to main content

mir_analyzer/
cache.rs

1/// Per-file analysis result cache backed by a JSON file on disk.
2///
3/// Cache key: file path.  Cache validity: BLAKE3 hash of file content.
4/// If the content hash matches what was stored, the cached issues are returned
5/// and Pass 2 analysis is skipped for that file.
6use std::collections::{HashMap, HashSet};
7use std::path::{Path, PathBuf};
8use std::sync::atomic::{AtomicBool, Ordering};
9
10use parking_lot::Mutex;
11
12use serde::{Deserialize, Serialize};
13
14use mir_issues::Issue;
15
16/// Cached analysis result returned on a cache hit: issues and reference location
17/// tuples `(symbol_key, line, col_start, col_end)`.
18pub type CacheHit = (Vec<Issue>, Vec<(String, u32, u16, u16)>);
19
20// ---------------------------------------------------------------------------
21// Hash helper
22// ---------------------------------------------------------------------------
23
24/// Compute the BLAKE3 hex digest of `content`.
25pub fn hash_content(content: &str) -> String {
26    blake3::hash(content.as_bytes()).to_hex().to_string()
27}
28
29// ---------------------------------------------------------------------------
30// CacheEntry
31// ---------------------------------------------------------------------------
32
33#[derive(Debug, Clone, Serialize, Deserialize)]
34struct CacheEntry {
35    content_hash: String,
36    issues: Vec<Issue>,
37    /// Reference locations recorded during Pass 2: (symbol_key, line, col_start, col_end).
38    /// Stored so that cache hits can replay symbol_reference_locations without re-running
39    /// analyze_bodies.
40    #[serde(default)]
41    reference_locations: Vec<(String, u32, u16, u16)>,
42}
43
44// ---------------------------------------------------------------------------
45// AnalysisCache
46// ---------------------------------------------------------------------------
47
48/// Serialized form of the full cache file.
49#[derive(Debug, Default, Serialize, Deserialize)]
50struct CacheFile {
51    #[serde(default)]
52    entries: HashMap<String, CacheEntry>,
53    /// Reverse dependency graph: defining_file → [files that depend on it].
54    /// Persisted so that the next run can invalidate dependents before Pass 1.
55    #[serde(default)]
56    reverse_deps: HashMap<String, HashSet<String>>,
57}
58
59/// View for serializing cache data without cloning.
60#[derive(Serialize)]
61struct CacheFileView<'a> {
62    entries: &'a HashMap<String, CacheEntry>,
63    reverse_deps: &'a HashMap<String, HashSet<String>>,
64}
65
66/// Thread-safe, disk-backed cache for per-file analysis results.
67pub struct AnalysisCache {
68    cache_dir: PathBuf,
69    entries: Mutex<HashMap<String, CacheEntry>>,
70    /// Reverse dependency graph loaded from disk (from the previous run).
71    reverse_deps: Mutex<HashMap<String, HashSet<String>>>,
72    dirty: AtomicBool,
73}
74
75impl AnalysisCache {
76    /// Open or create a cache stored under `cache_dir`.
77    /// If the directory or cache file do not exist they are created lazily on
78    /// the first `flush()` call.
79    pub fn open(cache_dir: &Path) -> Self {
80        std::fs::create_dir_all(cache_dir).ok();
81        let file = Self::load(cache_dir);
82        Self {
83            cache_dir: cache_dir.to_path_buf(),
84            entries: Mutex::new(file.entries),
85            reverse_deps: Mutex::new(file.reverse_deps),
86            dirty: AtomicBool::new(false),
87        }
88    }
89
90    /// Open the default cache directory: `{project_root}/.mir-cache/`.
91    pub fn open_default(project_root: &Path) -> Self {
92        Self::open(&project_root.join(".mir-cache"))
93    }
94
95    /// Return cached issues and reference locations for `file_path` if its
96    /// `content_hash` matches. Returns `None` if there is no entry or the file
97    /// has changed. The second element of the tuple is the list of
98    /// `(symbol_key, line, col_start, col_end)` entries to replay into
99    /// the salsa db via `MirDatabase::replay_reference_locations`.
100    pub fn get(&self, file_path: &str, content_hash: &str) -> Option<CacheHit> {
101        let entries = self.entries.lock();
102        entries.get(file_path).and_then(|e| {
103            if e.content_hash == content_hash {
104                Some((e.issues.clone(), e.reference_locations.clone()))
105            } else {
106                None
107            }
108        })
109    }
110
111    /// Store `issues` and `reference_locations` for `file_path` with the given
112    /// `content_hash`. `reference_locations` is a list of
113    /// `(symbol_key, line, col_start, col_end)` recorded during Pass 2.
114    pub fn put(
115        &self,
116        file_path: &str,
117        content_hash: String,
118        issues: Vec<Issue>,
119        reference_locations: Vec<(String, u32, u16, u16)>,
120    ) {
121        let mut entries = self.entries.lock();
122        entries.insert(
123            file_path.to_string(),
124            CacheEntry {
125                content_hash,
126                issues,
127                reference_locations,
128            },
129        );
130        self.dirty.store(true, Ordering::Relaxed);
131    }
132
133    /// Persist the in-memory cache to `{cache_dir}/cache.json`.
134    /// This is a no-op if nothing changed since the last flush.
135    pub fn flush(&self) {
136        let was_dirty = self.dirty.swap(false, Ordering::Relaxed);
137        if !was_dirty {
138            return;
139        }
140        let cache_file = self.cache_dir.join("cache.json");
141        let entries = self.entries.lock();
142        let reverse_deps = self.reverse_deps.lock();
143        let view = CacheFileView {
144            entries: &entries,
145            reverse_deps: &reverse_deps,
146        };
147        if let Ok(json) = serde_json::to_string(&view) {
148            std::fs::write(cache_file, json).ok();
149        }
150    }
151
152    /// Replace the reverse dependency graph (called after each Pass 1).
153    pub fn set_reverse_deps(&self, deps: HashMap<String, HashSet<String>>) {
154        *self.reverse_deps.lock() = deps;
155        self.dirty.store(true, Ordering::Relaxed);
156    }
157
158    /// Update the reverse-dep graph for a single file in place.
159    ///
160    /// `new_targets` is the set of files `file` depends on (its imports'
161    /// defining files plus parent / interfaces / traits' defining files).
162    /// This removes `file` from every existing dependent set, then inserts it
163    /// into each of `new_targets`' dependent sets — preserving the invariant
164    /// that the graph reflects the file's *current* outgoing edges.
165    ///
166    /// Used by `AnalysisSession::ingest_file` to keep cross-file invalidation
167    /// correct without rebuilding the whole graph on every edit.
168    pub fn update_reverse_deps_for_file(&self, file: &str, new_targets: &HashSet<String>) {
169        let mut deps = self.reverse_deps.lock();
170
171        for dependents in deps.values_mut() {
172            dependents.remove(file);
173        }
174        deps.retain(|_, dependents| !dependents.is_empty());
175
176        for target in new_targets {
177            if target != file {
178                deps.entry(target.clone())
179                    .or_default()
180                    .insert(file.to_string());
181            }
182        }
183
184        self.dirty.store(true, Ordering::Relaxed);
185    }
186
187    /// BFS from each changed file through the reverse dep graph.
188    /// Evicts every reachable dependent's cache entry.
189    /// Returns the number of entries evicted.
190    pub fn evict_with_dependents(&self, changed_files: &[String]) -> usize {
191        // Phase 1: collect all dependents to evict via BFS (lock held only here).
192        let to_evict: Vec<String> = {
193            let deps = self.reverse_deps.lock();
194            let mut visited: HashSet<String> = changed_files.iter().cloned().collect();
195            let mut queue: std::collections::VecDeque<String> =
196                changed_files.iter().cloned().collect();
197            let mut result = Vec::new();
198
199            while let Some(file) = queue.pop_front() {
200                if let Some(dependents) = deps.get(&file) {
201                    for dep in dependents {
202                        let cloned = dep.clone();
203                        if visited.insert(cloned.clone()) {
204                            queue.push_back(cloned);
205                            result.push(dep.clone());
206                        }
207                    }
208                }
209            }
210            result
211        };
212
213        // Phase 2: evict (reverse_deps lock released above, entries lock taken per file).
214        let count = to_evict.len();
215        for file in &to_evict {
216            self.evict(file);
217        }
218        count
219    }
220
221    /// Remove a single file's cache entry.
222    pub fn evict(&self, file_path: &str) {
223        let mut entries = self.entries.lock();
224        entries.remove(file_path);
225        self.dirty.store(true, Ordering::Relaxed);
226    }
227
228    // -----------------------------------------------------------------------
229
230    fn load(cache_dir: &Path) -> CacheFile {
231        let cache_file = cache_dir.join("cache.json");
232        let Ok(bytes) = std::fs::read(&cache_file) else {
233            return CacheFile::default();
234        };
235        serde_json::from_slice(&bytes).unwrap_or_default()
236    }
237}
238
239// ---------------------------------------------------------------------------
240// Tests
241// ---------------------------------------------------------------------------
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246    use tempfile::TempDir;
247
248    fn make_cache(dir: &TempDir) -> AnalysisCache {
249        AnalysisCache::open(dir.path())
250    }
251
252    fn seed(cache: &AnalysisCache, file: &str) {
253        cache.put(file, "hash".to_string(), vec![], vec![]);
254    }
255
256    #[test]
257    fn evict_with_dependents_linear_chain() {
258        // reverse_deps: A → [B], B → [C]
259        // Changing A must evict B and C.
260        let dir = TempDir::new().unwrap();
261        let cache = make_cache(&dir);
262        seed(&cache, "A");
263        seed(&cache, "B");
264        seed(&cache, "C");
265
266        let mut deps: HashMap<String, HashSet<String>> = HashMap::new();
267        deps.entry("A".into()).or_default().insert("B".into());
268        deps.entry("B".into()).or_default().insert("C".into());
269        cache.set_reverse_deps(deps);
270
271        let evicted = cache.evict_with_dependents(&["A".to_string()]);
272
273        assert_eq!(evicted, 2, "B and C should be evicted");
274        assert!(cache.get("A", "hash").is_some(), "A itself is not evicted");
275        assert!(cache.get("B", "hash").is_none(), "B should be evicted");
276        assert!(cache.get("C", "hash").is_none(), "C should be evicted");
277    }
278
279    #[test]
280    fn evict_with_dependents_diamond() {
281        // reverse_deps: A → [B, C], B → [D], C → [D]
282        // D should be evicted exactly once (visited set prevents double-eviction).
283        let dir = TempDir::new().unwrap();
284        let cache = make_cache(&dir);
285        seed(&cache, "A");
286        seed(&cache, "B");
287        seed(&cache, "C");
288        seed(&cache, "D");
289
290        let mut deps: HashMap<String, HashSet<String>> = HashMap::new();
291        deps.entry("A".into()).or_default().insert("B".into());
292        deps.entry("A".into()).or_default().insert("C".into());
293        deps.entry("B".into()).or_default().insert("D".into());
294        deps.entry("C".into()).or_default().insert("D".into());
295        cache.set_reverse_deps(deps);
296
297        let evicted = cache.evict_with_dependents(&["A".to_string()]);
298
299        assert_eq!(evicted, 3, "B, C, D each evicted once");
300        assert!(cache.get("D", "hash").is_none());
301    }
302
303    #[test]
304    fn evict_with_dependents_cycle_safety() {
305        // reverse_deps: A → [B], B → [A]  (circular)
306        // Must not loop forever; B should be evicted.
307        let dir = TempDir::new().unwrap();
308        let cache = make_cache(&dir);
309        seed(&cache, "A");
310        seed(&cache, "B");
311
312        let mut deps: HashMap<String, HashSet<String>> = HashMap::new();
313        deps.entry("A".into()).or_default().insert("B".into());
314        deps.entry("B".into()).or_default().insert("A".into());
315        cache.set_reverse_deps(deps);
316
317        let evicted = cache.evict_with_dependents(&["A".to_string()]);
318
319        // B is a dependent of A; A is the seed (not counted as "evicted dependent")
320        assert_eq!(evicted, 1);
321        assert!(cache.get("B", "hash").is_none());
322    }
323
324    #[test]
325    fn evict_with_dependents_unrelated_file_untouched() {
326        // Changing C should not evict B (which depends on A, not C).
327        let dir = TempDir::new().unwrap();
328        let cache = make_cache(&dir);
329        seed(&cache, "A");
330        seed(&cache, "B");
331        seed(&cache, "C");
332
333        let mut deps: HashMap<String, HashSet<String>> = HashMap::new();
334        deps.entry("A".into()).or_default().insert("B".into());
335        cache.set_reverse_deps(deps);
336
337        let evicted = cache.evict_with_dependents(&["C".to_string()]);
338
339        assert_eq!(evicted, 0);
340        assert!(
341            cache.get("B", "hash").is_some(),
342            "B unrelated, should survive"
343        );
344    }
345
346    #[test]
347    fn old_cache_without_reference_locations_deserializes_to_empty() {
348        // Cache entries written before the reference_locations field was added
349        // must still be readable. The #[serde(default)] attribute covers this,
350        // but we verify it explicitly so a future refactor can't silently break it.
351        let dir = TempDir::new().unwrap();
352        let cache_file = dir.path().join("cache.json");
353
354        // Write a cache file in the old format (no reference_locations field).
355        std::fs::write(
356            &cache_file,
357            r#"{"entries":{"a.php":{"content_hash":"abc","issues":[]}},"reverse_deps":{}}"#,
358        )
359        .unwrap();
360
361        let cache = AnalysisCache::open(dir.path());
362        let hit = cache
363            .get("a.php", "abc")
364            .expect("old cache entry should deserialize successfully");
365
366        assert!(hit.0.is_empty(), "no issues");
367        assert!(
368            hit.1.is_empty(),
369            "reference_locations should default to empty vec, not fail"
370        );
371    }
372}