Skip to main content

aptu_coder_core/
cache.rs

1// SPDX-FileCopyrightText: 2026 aptu-coder contributors
2// SPDX-License-Identifier: Apache-2.0
3//! LRU cache for analysis results indexed by path, modification time, and mode.
4//!
5//! Provides thread-safe, capacity-bounded caching of file analysis outputs using LRU eviction.
6//! Recovers gracefully from poisoned mutex conditions.
7
8use crate::analyze::{AnalysisOutput, FileAnalysisOutput};
9use crate::traversal::WalkEntry;
10use crate::types::AnalysisMode;
11use lru::LruCache;
12use rayon::prelude::*;
13use std::num::NonZeroUsize;
14use std::path::PathBuf;
15use std::sync::{Arc, Mutex};
16use std::time::SystemTime;
17use tracing::{debug, instrument};
18
19/// Cache key combining path, modification time, and analysis mode.
20#[derive(Debug, Clone, Eq, PartialEq, Hash)]
21pub struct CacheKey {
22    pub path: PathBuf,
23    pub modified: SystemTime,
24    pub mode: AnalysisMode,
25}
26
27/// Cache key for directory analysis combining file mtimes, mode, and `max_depth`.
28#[derive(Debug, Clone, Eq, PartialEq, Hash)]
29pub struct DirectoryCacheKey {
30    files: Vec<(PathBuf, SystemTime)>,
31    mode: AnalysisMode,
32    max_depth: Option<u32>,
33    git_ref: Option<String>,
34}
35
36impl DirectoryCacheKey {
37    /// Build a cache key from walk entries, capturing mtime for each file.
38    /// Files are sorted by path for deterministic hashing.
39    /// Directories are filtered out; only file entries are processed.
40    /// Metadata collection is parallelized using rayon.
41    /// The `git_ref` is included so that filtered and unfiltered results have distinct keys.
42    #[must_use]
43    pub fn from_entries(
44        entries: &[WalkEntry],
45        max_depth: Option<u32>,
46        mode: AnalysisMode,
47        git_ref: Option<&str>,
48    ) -> Self {
49        let mut files: Vec<(PathBuf, SystemTime)> = entries
50            .par_iter()
51            .filter(|e| !e.is_dir)
52            .map(|e| {
53                let mtime = e.mtime.unwrap_or(SystemTime::UNIX_EPOCH);
54                (e.path.clone(), mtime)
55            })
56            .collect();
57        files.sort_by(|a, b| a.0.cmp(&b.0));
58        Self {
59            files,
60            mode,
61            max_depth,
62            git_ref: git_ref.map(ToOwned::to_owned),
63        }
64    }
65}
66
67/// Recover from a poisoned mutex by clearing the cache.
68/// On poison, creates a new empty cache and returns the recovery value.
69fn lock_or_recover<K, V, T, F>(mutex: &Mutex<LruCache<K, V>>, capacity: usize, recovery: F) -> T
70where
71    K: std::hash::Hash + Eq,
72    F: FnOnce(&mut LruCache<K, V>) -> T,
73{
74    match mutex.lock() {
75        Ok(mut guard) => recovery(&mut guard),
76        Err(poisoned) => {
77            let cache_size = NonZeroUsize::new(capacity).unwrap_or(NonZeroUsize::new(100).unwrap());
78            let new_cache = LruCache::new(cache_size);
79            let mut guard = poisoned.into_inner();
80            *guard = new_cache;
81            recovery(&mut guard)
82        }
83    }
84}
85
86/// LRU cache for file analysis results with mutex protection.
87pub struct AnalysisCache {
88    file_capacity: usize,
89    dir_capacity: usize,
90    cache: Arc<Mutex<LruCache<CacheKey, Arc<FileAnalysisOutput>>>>,
91    directory_cache: Arc<Mutex<LruCache<DirectoryCacheKey, Arc<AnalysisOutput>>>>,
92}
93
94impl AnalysisCache {
95    /// Create a new cache with the specified file capacity.
96    /// The directory cache capacity is read from the `CODE_ANALYZE_DIR_CACHE_CAPACITY`
97    /// environment variable (default: 20).
98    #[must_use]
99    pub fn new(capacity: usize) -> Self {
100        let file_capacity = capacity.max(1);
101        let dir_capacity: usize = std::env::var("CODE_ANALYZE_DIR_CACHE_CAPACITY")
102            .ok()
103            .and_then(|v| v.parse().ok())
104            .unwrap_or(20);
105        let dir_capacity = dir_capacity.max(1);
106        let cache_size = NonZeroUsize::new(file_capacity).unwrap();
107        let dir_cache_size = NonZeroUsize::new(dir_capacity).unwrap();
108        Self {
109            file_capacity,
110            dir_capacity,
111            cache: Arc::new(Mutex::new(LruCache::new(cache_size))),
112            directory_cache: Arc::new(Mutex::new(LruCache::new(dir_cache_size))),
113        }
114    }
115
116    /// Get a cached analysis result if it exists.
117    #[instrument(skip(self), fields(path = ?key.path))]
118    pub fn get(&self, key: &CacheKey) -> Option<Arc<FileAnalysisOutput>> {
119        lock_or_recover(&self.cache, self.file_capacity, |guard| {
120            let result = guard.get(key).cloned();
121            let cache_size = guard.len();
122            if let Some(v) = result {
123                debug!(cache_event = "hit", cache_size = cache_size, path = ?key.path);
124                Some(v)
125            } else {
126                debug!(cache_event = "miss", cache_size = cache_size, path = ?key.path);
127                None
128            }
129        })
130    }
131
132    /// Store an analysis result in the cache.
133    #[instrument(skip(self, value), fields(path = ?key.path))]
134    // public API; callers expect owned semantics
135    #[allow(clippy::needless_pass_by_value)]
136    pub fn put(&self, key: CacheKey, value: Arc<FileAnalysisOutput>) {
137        lock_or_recover(&self.cache, self.file_capacity, |guard| {
138            let push_result = guard.push(key.clone(), value);
139            let cache_size = guard.len();
140            match push_result {
141                None => {
142                    debug!(cache_event = "insert", cache_size = cache_size, path = ?key.path);
143                }
144                Some((returned_key, _)) => {
145                    if returned_key == key {
146                        debug!(cache_event = "update", cache_size = cache_size, path = ?key.path);
147                    } else {
148                        debug!(cache_event = "eviction", cache_size = cache_size, path = ?key.path, evicted_path = ?returned_key.path);
149                    }
150                }
151            }
152        });
153    }
154
155    /// Get a cached directory analysis result if it exists.
156    #[instrument(skip(self))]
157    pub fn get_directory(&self, key: &DirectoryCacheKey) -> Option<Arc<AnalysisOutput>> {
158        lock_or_recover(&self.directory_cache, self.dir_capacity, |guard| {
159            let result = guard.get(key).cloned();
160            let cache_size = guard.len();
161            if let Some(v) = result {
162                debug!(cache_event = "hit", cache_size = cache_size);
163                Some(v)
164            } else {
165                debug!(cache_event = "miss", cache_size = cache_size);
166                None
167            }
168        })
169    }
170
171    /// Store a directory analysis result in the cache.
172    #[instrument(skip(self, value))]
173    pub fn put_directory(&self, key: DirectoryCacheKey, value: Arc<AnalysisOutput>) {
174        lock_or_recover(&self.directory_cache, self.dir_capacity, |guard| {
175            let push_result = guard.push(key, value);
176            let cache_size = guard.len();
177            match push_result {
178                None => {
179                    debug!(cache_event = "insert", cache_size = cache_size);
180                }
181                Some((_, _)) => {
182                    debug!(cache_event = "eviction", cache_size = cache_size);
183                }
184            }
185        });
186    }
187
188    /// Invalidate all cache entries for a given file path.
189    /// Removes all entries regardless of modification time or analysis mode.
190    #[instrument(skip(self), fields(path = ?path))]
191    pub fn invalidate_file(&self, path: &std::path::Path) {
192        lock_or_recover(&self.cache, self.file_capacity, |guard| {
193            let keys: Vec<CacheKey> = guard
194                .iter()
195                .filter(|(k, _)| k.path == path)
196                .map(|(k, _)| k.clone())
197                .collect();
198            for key in keys {
199                guard.pop(&key);
200            }
201            let cache_size = guard.len();
202            debug!(cache_event = "invalidate_file", cache_size = cache_size, path = ?path);
203        });
204    }
205}
206
207impl Clone for AnalysisCache {
208    fn clone(&self) -> Self {
209        Self {
210            file_capacity: self.file_capacity,
211            dir_capacity: self.dir_capacity,
212            cache: Arc::clone(&self.cache),
213            directory_cache: Arc::clone(&self.directory_cache),
214        }
215    }
216}
217
218#[cfg(test)]
219mod tests {
220    use super::*;
221    use crate::types::SemanticAnalysis;
222
223    #[test]
224    fn test_from_entries_skips_dirs() {
225        // Arrange: create a real temp dir and a real temp file for hermetic isolation.
226        let dir = tempfile::tempdir().expect("tempdir");
227        let file = tempfile::NamedTempFile::new_in(dir.path()).expect("tempfile");
228        let file_path = file.path().to_path_buf();
229
230        let entries = vec![
231            WalkEntry {
232                path: dir.path().to_path_buf(),
233                depth: 0,
234                is_dir: true,
235                is_symlink: false,
236                symlink_target: None,
237                mtime: None,
238                canonical_path: PathBuf::new(),
239            },
240            WalkEntry {
241                path: file_path.clone(),
242                depth: 0,
243                is_dir: false,
244                is_symlink: false,
245                symlink_target: None,
246                mtime: None,
247                canonical_path: PathBuf::new(),
248            },
249        ];
250
251        // Act: build cache key from entries
252        let key = DirectoryCacheKey::from_entries(&entries, None, AnalysisMode::Overview, None);
253
254        // Assert: only the file entry should be in the cache key
255        // The directory entry should be filtered out
256        assert_eq!(key.files.len(), 1);
257        assert_eq!(key.files[0].0, file_path);
258    }
259
260    #[test]
261    fn test_invalidate_file_single_mode() {
262        // Arrange: create a cache and insert one entry for a path
263        let cache = AnalysisCache::new(10);
264        let path = PathBuf::from("/test/file.rs");
265        let key = CacheKey {
266            path: path.clone(),
267            modified: SystemTime::UNIX_EPOCH,
268            mode: AnalysisMode::Overview,
269        };
270        let output = Arc::new(FileAnalysisOutput::new(
271            String::new(),
272            SemanticAnalysis::default(),
273            0,
274            None,
275        ));
276        cache.put(key.clone(), output);
277
278        // Act: invalidate the file
279        cache.invalidate_file(&path);
280
281        // Assert: the entry should be removed
282        assert!(cache.get(&key).is_none());
283    }
284
285    #[test]
286    fn test_invalidate_file_multi_mode() {
287        // Arrange: create a cache and insert two entries for the same path with different modes
288        let cache = AnalysisCache::new(10);
289        let path = PathBuf::from("/test/file.rs");
290        let key1 = CacheKey {
291            path: path.clone(),
292            modified: SystemTime::UNIX_EPOCH,
293            mode: AnalysisMode::Overview,
294        };
295        let key2 = CacheKey {
296            path: path.clone(),
297            modified: SystemTime::UNIX_EPOCH,
298            mode: AnalysisMode::FileDetails,
299        };
300        let output = Arc::new(FileAnalysisOutput::new(
301            String::new(),
302            SemanticAnalysis::default(),
303            0,
304            None,
305        ));
306        cache.put(key1.clone(), output.clone());
307        cache.put(key2.clone(), output);
308
309        // Act: invalidate the file
310        cache.invalidate_file(&path);
311
312        // Assert: both entries should be removed
313        assert!(cache.get(&key1).is_none());
314        assert!(cache.get(&key2).is_none());
315    }
316}