Skip to main content

code_analyze_core/
cache.rs

1// SPDX-FileCopyrightText: 2026 code-analyze-mcp contributors
2// SPDX-License-Identifier: Apache-2.0
3//! LRU cache for analysis results indexed by path, modification time, and mode.
4//!
5//! Provides thread-safe, capacity-bounded caching of file analysis outputs using LRU eviction.
6//! Recovers gracefully from poisoned mutex conditions.
7
8use crate::analyze::{AnalysisOutput, FileAnalysisOutput};
9use crate::traversal::WalkEntry;
10use crate::types::AnalysisMode;
11use lru::LruCache;
12use rayon::prelude::*;
13use std::fs;
14use std::num::NonZeroUsize;
15use std::path::PathBuf;
16use std::sync::{Arc, Mutex};
17use std::time::SystemTime;
18use tracing::{debug, instrument};
19
20const DIR_CACHE_CAPACITY: usize = 20;
21
22/// Cache key combining path, modification time, and analysis mode.
23#[derive(Debug, Clone, Eq, PartialEq, Hash)]
24pub struct CacheKey {
25    pub path: PathBuf,
26    pub modified: SystemTime,
27    pub mode: AnalysisMode,
28}
29
30/// Cache key for directory analysis combining file mtimes, mode, and `max_depth`.
31#[derive(Debug, Clone, Eq, PartialEq, Hash)]
32pub struct DirectoryCacheKey {
33    files: Vec<(PathBuf, SystemTime)>,
34    mode: AnalysisMode,
35    max_depth: Option<u32>,
36}
37
38impl DirectoryCacheKey {
39    /// Build a cache key from walk entries, capturing mtime for each file.
40    /// Files are sorted by path for deterministic hashing.
41    /// Directories are filtered out; only file entries are processed.
42    /// Metadata collection is parallelized using rayon.
43    #[must_use]
44    pub fn from_entries(entries: &[WalkEntry], max_depth: Option<u32>, mode: AnalysisMode) -> Self {
45        let mut files: Vec<(PathBuf, SystemTime)> = entries
46            .par_iter()
47            .filter(|e| !e.is_dir)
48            .map(|e| {
49                let mtime = fs::metadata(&e.path)
50                    .and_then(|m| m.modified())
51                    .unwrap_or(SystemTime::UNIX_EPOCH);
52                (e.path.clone(), mtime)
53            })
54            .collect();
55        files.sort_by(|a, b| a.0.cmp(&b.0));
56        Self {
57            files,
58            mode,
59            max_depth,
60        }
61    }
62}
63
64/// Recover from a poisoned mutex by clearing the cache.
65/// On poison, creates a new empty cache and returns the recovery value.
66fn lock_or_recover<K, V, T, F>(mutex: &Mutex<LruCache<K, V>>, capacity: usize, recovery: F) -> T
67where
68    K: std::hash::Hash + Eq,
69    F: FnOnce(&mut LruCache<K, V>) -> T,
70{
71    match mutex.lock() {
72        Ok(mut guard) => recovery(&mut guard),
73        Err(poisoned) => {
74            let cache_size = NonZeroUsize::new(capacity).unwrap_or(NonZeroUsize::new(100).unwrap());
75            let new_cache = LruCache::new(cache_size);
76            let mut guard = poisoned.into_inner();
77            *guard = new_cache;
78            recovery(&mut guard)
79        }
80    }
81}
82
83/// LRU cache for file analysis results with mutex protection.
84pub struct AnalysisCache {
85    file_capacity: usize,
86    cache: Arc<Mutex<LruCache<CacheKey, Arc<FileAnalysisOutput>>>>,
87    directory_cache: Arc<Mutex<LruCache<DirectoryCacheKey, Arc<AnalysisOutput>>>>,
88}
89
90impl AnalysisCache {
91    /// Create a new cache with the specified capacity.
92    #[must_use]
93    pub fn new(capacity: usize) -> Self {
94        let file_capacity = capacity.max(1);
95        let cache_size = NonZeroUsize::new(file_capacity).unwrap();
96        let dir_cache_size = NonZeroUsize::new(DIR_CACHE_CAPACITY).unwrap();
97        Self {
98            file_capacity,
99            cache: Arc::new(Mutex::new(LruCache::new(cache_size))),
100            directory_cache: Arc::new(Mutex::new(LruCache::new(dir_cache_size))),
101        }
102    }
103
104    /// Get a cached analysis result if it exists.
105    #[instrument(skip(self), fields(path = ?key.path))]
106    pub fn get(&self, key: &CacheKey) -> Option<Arc<FileAnalysisOutput>> {
107        lock_or_recover(&self.cache, self.file_capacity, |guard| {
108            let result = guard.get(key).cloned();
109            let cache_size = guard.len();
110            if let Some(v) = result {
111                debug!(cache_event = "hit", cache_size = cache_size, path = ?key.path);
112                Some(v)
113            } else {
114                debug!(cache_event = "miss", cache_size = cache_size, path = ?key.path);
115                None
116            }
117        })
118    }
119
120    /// Store an analysis result in the cache.
121    #[instrument(skip(self, value), fields(path = ?key.path))]
122    // public API; callers expect owned semantics
123    #[allow(clippy::needless_pass_by_value)]
124    pub fn put(&self, key: CacheKey, value: Arc<FileAnalysisOutput>) {
125        lock_or_recover(&self.cache, self.file_capacity, |guard| {
126            let push_result = guard.push(key.clone(), value);
127            let cache_size = guard.len();
128            match push_result {
129                None => {
130                    debug!(cache_event = "insert", cache_size = cache_size, path = ?key.path);
131                }
132                Some((returned_key, _)) => {
133                    if returned_key == key {
134                        debug!(cache_event = "update", cache_size = cache_size, path = ?key.path);
135                    } else {
136                        debug!(cache_event = "eviction", cache_size = cache_size, path = ?key.path, evicted_path = ?returned_key.path);
137                    }
138                }
139            }
140        });
141    }
142
143    /// Get a cached directory analysis result if it exists.
144    #[instrument(skip(self))]
145    pub fn get_directory(&self, key: &DirectoryCacheKey) -> Option<Arc<AnalysisOutput>> {
146        lock_or_recover(&self.directory_cache, DIR_CACHE_CAPACITY, |guard| {
147            let result = guard.get(key).cloned();
148            let cache_size = guard.len();
149            if let Some(v) = result {
150                debug!(cache_event = "hit", cache_size = cache_size);
151                Some(v)
152            } else {
153                debug!(cache_event = "miss", cache_size = cache_size);
154                None
155            }
156        })
157    }
158
159    /// Store a directory analysis result in the cache.
160    #[instrument(skip(self, value))]
161    pub fn put_directory(&self, key: DirectoryCacheKey, value: Arc<AnalysisOutput>) {
162        lock_or_recover(&self.directory_cache, DIR_CACHE_CAPACITY, |guard| {
163            let push_result = guard.push(key, value);
164            let cache_size = guard.len();
165            match push_result {
166                None => {
167                    debug!(cache_event = "insert", cache_size = cache_size);
168                }
169                Some((_, _)) => {
170                    debug!(cache_event = "eviction", cache_size = cache_size);
171                }
172            }
173        });
174    }
175}
176
177impl Clone for AnalysisCache {
178    fn clone(&self) -> Self {
179        Self {
180            file_capacity: self.file_capacity,
181            cache: Arc::clone(&self.cache),
182            directory_cache: Arc::clone(&self.directory_cache),
183        }
184    }
185}
186
187#[cfg(test)]
188mod tests {
189    use super::*;
190
191    #[test]
192    fn test_from_entries_skips_dirs() {
193        // Arrange: create a real temp dir and a real temp file for hermetic isolation.
194        let dir = tempfile::tempdir().expect("tempdir");
195        let file = tempfile::NamedTempFile::new_in(dir.path()).expect("tempfile");
196        let file_path = file.path().to_path_buf();
197
198        let entries = vec![
199            WalkEntry {
200                path: dir.path().to_path_buf(),
201                depth: 0,
202                is_dir: true,
203                is_symlink: false,
204                symlink_target: None,
205            },
206            WalkEntry {
207                path: file_path.clone(),
208                depth: 0,
209                is_dir: false,
210                is_symlink: false,
211                symlink_target: None,
212            },
213        ];
214
215        // Act: build cache key from entries
216        let key = DirectoryCacheKey::from_entries(&entries, None, AnalysisMode::Overview);
217
218        // Assert: only the file entry should be in the cache key
219        // The directory entry should be filtered out
220        assert_eq!(key.files.len(), 1);
221        assert_eq!(key.files[0].0, file_path);
222    }
223}