Skip to main content

code_analyze_core/
cache.rs

1// SPDX-FileCopyrightText: 2026 code-analyze-mcp contributors
2// SPDX-License-Identifier: Apache-2.0
3//! LRU cache for analysis results indexed by path, modification time, and mode.
4//!
5//! Provides thread-safe, capacity-bounded caching of file analysis outputs using LRU eviction.
6//! Recovers gracefully from poisoned mutex conditions.
7
8use crate::analyze::{AnalysisOutput, FileAnalysisOutput};
9use crate::traversal::WalkEntry;
10use crate::types::AnalysisMode;
11use lru::LruCache;
12use rayon::prelude::*;
13use std::fs;
14use std::num::NonZeroUsize;
15use std::path::PathBuf;
16use std::sync::{Arc, Mutex};
17use std::time::SystemTime;
18use tracing::{debug, instrument};
19
20/// Cache key combining path, modification time, and analysis mode.
21#[derive(Debug, Clone, Eq, PartialEq, Hash)]
22pub struct CacheKey {
23    pub path: PathBuf,
24    pub modified: SystemTime,
25    pub mode: AnalysisMode,
26}
27
28/// Cache key for directory analysis combining file mtimes, mode, and `max_depth`.
29#[derive(Debug, Clone, Eq, PartialEq, Hash)]
30pub struct DirectoryCacheKey {
31    files: Vec<(PathBuf, SystemTime)>,
32    mode: AnalysisMode,
33    max_depth: Option<u32>,
34    git_ref: Option<String>,
35}
36
37impl DirectoryCacheKey {
38    /// Build a cache key from walk entries, capturing mtime for each file.
39    /// Files are sorted by path for deterministic hashing.
40    /// Directories are filtered out; only file entries are processed.
41    /// Metadata collection is parallelized using rayon.
42    /// The `git_ref` is included so that filtered and unfiltered results have distinct keys.
43    #[must_use]
44    pub fn from_entries(
45        entries: &[WalkEntry],
46        max_depth: Option<u32>,
47        mode: AnalysisMode,
48        git_ref: Option<&str>,
49    ) -> Self {
50        let mut files: Vec<(PathBuf, SystemTime)> = entries
51            .par_iter()
52            .filter(|e| !e.is_dir)
53            .map(|e| {
54                let mtime = fs::metadata(&e.path)
55                    .and_then(|m| m.modified())
56                    .unwrap_or(SystemTime::UNIX_EPOCH);
57                (e.path.clone(), mtime)
58            })
59            .collect();
60        files.sort_by(|a, b| a.0.cmp(&b.0));
61        Self {
62            files,
63            mode,
64            max_depth,
65            git_ref: git_ref.map(ToOwned::to_owned),
66        }
67    }
68}
69
70/// Recover from a poisoned mutex by clearing the cache.
71/// On poison, creates a new empty cache and returns the recovery value.
72fn lock_or_recover<K, V, T, F>(mutex: &Mutex<LruCache<K, V>>, capacity: usize, recovery: F) -> T
73where
74    K: std::hash::Hash + Eq,
75    F: FnOnce(&mut LruCache<K, V>) -> T,
76{
77    match mutex.lock() {
78        Ok(mut guard) => recovery(&mut guard),
79        Err(poisoned) => {
80            let cache_size = NonZeroUsize::new(capacity).unwrap_or(NonZeroUsize::new(100).unwrap());
81            let new_cache = LruCache::new(cache_size);
82            let mut guard = poisoned.into_inner();
83            *guard = new_cache;
84            recovery(&mut guard)
85        }
86    }
87}
88
89/// LRU cache for file analysis results with mutex protection.
90pub struct AnalysisCache {
91    file_capacity: usize,
92    dir_capacity: usize,
93    cache: Arc<Mutex<LruCache<CacheKey, Arc<FileAnalysisOutput>>>>,
94    directory_cache: Arc<Mutex<LruCache<DirectoryCacheKey, Arc<AnalysisOutput>>>>,
95}
96
97impl AnalysisCache {
98    /// Create a new cache with the specified file capacity.
99    /// The directory cache capacity is read from the `CODE_ANALYZE_DIR_CACHE_CAPACITY`
100    /// environment variable (default: 20).
101    #[must_use]
102    pub fn new(capacity: usize) -> Self {
103        let file_capacity = capacity.max(1);
104        let dir_capacity: usize = std::env::var("CODE_ANALYZE_DIR_CACHE_CAPACITY")
105            .ok()
106            .and_then(|v| v.parse().ok())
107            .unwrap_or(20);
108        let dir_capacity = dir_capacity.max(1);
109        let cache_size = NonZeroUsize::new(file_capacity).unwrap();
110        let dir_cache_size = NonZeroUsize::new(dir_capacity).unwrap();
111        Self {
112            file_capacity,
113            dir_capacity,
114            cache: Arc::new(Mutex::new(LruCache::new(cache_size))),
115            directory_cache: Arc::new(Mutex::new(LruCache::new(dir_cache_size))),
116        }
117    }
118
119    /// Get a cached analysis result if it exists.
120    #[instrument(skip(self), fields(path = ?key.path))]
121    pub fn get(&self, key: &CacheKey) -> Option<Arc<FileAnalysisOutput>> {
122        lock_or_recover(&self.cache, self.file_capacity, |guard| {
123            let result = guard.get(key).cloned();
124            let cache_size = guard.len();
125            if let Some(v) = result {
126                debug!(cache_event = "hit", cache_size = cache_size, path = ?key.path);
127                Some(v)
128            } else {
129                debug!(cache_event = "miss", cache_size = cache_size, path = ?key.path);
130                None
131            }
132        })
133    }
134
135    /// Store an analysis result in the cache.
136    #[instrument(skip(self, value), fields(path = ?key.path))]
137    // public API; callers expect owned semantics
138    #[allow(clippy::needless_pass_by_value)]
139    pub fn put(&self, key: CacheKey, value: Arc<FileAnalysisOutput>) {
140        lock_or_recover(&self.cache, self.file_capacity, |guard| {
141            let push_result = guard.push(key.clone(), value);
142            let cache_size = guard.len();
143            match push_result {
144                None => {
145                    debug!(cache_event = "insert", cache_size = cache_size, path = ?key.path);
146                }
147                Some((returned_key, _)) => {
148                    if returned_key == key {
149                        debug!(cache_event = "update", cache_size = cache_size, path = ?key.path);
150                    } else {
151                        debug!(cache_event = "eviction", cache_size = cache_size, path = ?key.path, evicted_path = ?returned_key.path);
152                    }
153                }
154            }
155        });
156    }
157
158    /// Get a cached directory analysis result if it exists.
159    #[instrument(skip(self))]
160    pub fn get_directory(&self, key: &DirectoryCacheKey) -> Option<Arc<AnalysisOutput>> {
161        lock_or_recover(&self.directory_cache, self.dir_capacity, |guard| {
162            let result = guard.get(key).cloned();
163            let cache_size = guard.len();
164            if let Some(v) = result {
165                debug!(cache_event = "hit", cache_size = cache_size);
166                Some(v)
167            } else {
168                debug!(cache_event = "miss", cache_size = cache_size);
169                None
170            }
171        })
172    }
173
174    /// Store a directory analysis result in the cache.
175    #[instrument(skip(self, value))]
176    pub fn put_directory(&self, key: DirectoryCacheKey, value: Arc<AnalysisOutput>) {
177        lock_or_recover(&self.directory_cache, self.dir_capacity, |guard| {
178            let push_result = guard.push(key, value);
179            let cache_size = guard.len();
180            match push_result {
181                None => {
182                    debug!(cache_event = "insert", cache_size = cache_size);
183                }
184                Some((_, _)) => {
185                    debug!(cache_event = "eviction", cache_size = cache_size);
186                }
187            }
188        });
189    }
190}
191
192impl Clone for AnalysisCache {
193    fn clone(&self) -> Self {
194        Self {
195            file_capacity: self.file_capacity,
196            dir_capacity: self.dir_capacity,
197            cache: Arc::clone(&self.cache),
198            directory_cache: Arc::clone(&self.directory_cache),
199        }
200    }
201}
202
203#[cfg(test)]
204mod tests {
205    use super::*;
206
207    #[test]
208    fn test_from_entries_skips_dirs() {
209        // Arrange: create a real temp dir and a real temp file for hermetic isolation.
210        let dir = tempfile::tempdir().expect("tempdir");
211        let file = tempfile::NamedTempFile::new_in(dir.path()).expect("tempfile");
212        let file_path = file.path().to_path_buf();
213
214        let entries = vec![
215            WalkEntry {
216                path: dir.path().to_path_buf(),
217                depth: 0,
218                is_dir: true,
219                is_symlink: false,
220                symlink_target: None,
221            },
222            WalkEntry {
223                path: file_path.clone(),
224                depth: 0,
225                is_dir: false,
226                is_symlink: false,
227                symlink_target: None,
228            },
229        ];
230
231        // Act: build cache key from entries
232        let key = DirectoryCacheKey::from_entries(&entries, None, AnalysisMode::Overview, None);
233
234        // Assert: only the file entry should be in the cache key
235        // The directory entry should be filtered out
236        assert_eq!(key.files.len(), 1);
237        assert_eq!(key.files[0].0, file_path);
238    }
239}