Skip to main content

aptu_coder_core/
cache.rs

1// SPDX-FileCopyrightText: 2026 aptu-coder contributors
2// SPDX-License-Identifier: Apache-2.0
3//! LRU cache for analysis results indexed by path, modification time, and mode.
4//!
5//! Provides thread-safe, capacity-bounded caching of file analysis outputs using LRU eviction.
6//! Recovers gracefully from poisoned mutex conditions.
7
8use crate::analyze::{AnalysisOutput, FileAnalysisOutput};
9use crate::traversal::WalkEntry;
10use crate::types::AnalysisMode;
11use lru::LruCache;
12use rayon::prelude::*;
13use std::num::NonZeroUsize;
14use std::path::PathBuf;
15use std::sync::{Arc, Mutex};
16use std::time::SystemTime;
17use tracing::{debug, instrument};
18
19/// Cache key combining path, modification time, and analysis mode.
20#[derive(Debug, Clone, Eq, PartialEq, Hash)]
21pub struct CacheKey {
22    pub path: PathBuf,
23    pub modified: SystemTime,
24    pub mode: AnalysisMode,
25}
26
27/// Cache key for directory analysis combining file mtimes, mode, and `max_depth`.
28#[derive(Debug, Clone, Eq, PartialEq, Hash)]
29pub struct DirectoryCacheKey {
30    files: Vec<(PathBuf, SystemTime)>,
31    mode: AnalysisMode,
32    max_depth: Option<u32>,
33    git_ref: Option<String>,
34}
35
36impl DirectoryCacheKey {
37    /// Build a cache key from walk entries, capturing mtime for each file.
38    /// Files are sorted by path for deterministic hashing.
39    /// Directories are filtered out; only file entries are processed.
40    /// Metadata collection is parallelized using rayon.
41    /// The `git_ref` is included so that filtered and unfiltered results have distinct keys.
42    #[must_use]
43    pub fn from_entries(
44        entries: &[WalkEntry],
45        max_depth: Option<u32>,
46        mode: AnalysisMode,
47        git_ref: Option<&str>,
48    ) -> Self {
49        let mut files: Vec<(PathBuf, SystemTime)> = entries
50            .par_iter()
51            .filter(|e| !e.is_dir)
52            .map(|e| {
53                let mtime = e.mtime.unwrap_or(SystemTime::UNIX_EPOCH);
54                (e.path.clone(), mtime)
55            })
56            .collect();
57        files.sort_by(|a, b| a.0.cmp(&b.0));
58        Self {
59            files,
60            mode,
61            max_depth,
62            git_ref: git_ref.map(ToOwned::to_owned),
63        }
64    }
65}
66
67/// Recover from a poisoned mutex by clearing the cache.
68/// On poison, creates a new empty cache and returns the recovery value.
69fn lock_or_recover<K, V, T, F>(mutex: &Mutex<LruCache<K, V>>, capacity: usize, recovery: F) -> T
70where
71    K: std::hash::Hash + Eq,
72    F: FnOnce(&mut LruCache<K, V>) -> T,
73{
74    match mutex.lock() {
75        Ok(mut guard) => recovery(&mut guard),
76        Err(poisoned) => {
77            let cache_size = NonZeroUsize::new(capacity).unwrap_or(NonZeroUsize::new(100).unwrap());
78            let new_cache = LruCache::new(cache_size);
79            let mut guard = poisoned.into_inner();
80            *guard = new_cache;
81            recovery(&mut guard)
82        }
83    }
84}
85
86/// LRU cache for file analysis results with mutex protection.
87pub struct AnalysisCache {
88    file_capacity: usize,
89    dir_capacity: usize,
90    cache: Arc<Mutex<LruCache<CacheKey, Arc<FileAnalysisOutput>>>>,
91    directory_cache: Arc<Mutex<LruCache<DirectoryCacheKey, Arc<AnalysisOutput>>>>,
92}
93
94impl AnalysisCache {
95    /// Create a new cache with the specified file capacity.
96    /// The directory cache capacity is read from the `APTU_CODER_DIR_CACHE_CAPACITY`
97    /// environment variable (default: 20).
98    #[must_use]
99    pub fn new(capacity: usize) -> Self {
100        let file_capacity = capacity.max(1);
101        let dir_capacity: usize = std::env::var("APTU_CODER_DIR_CACHE_CAPACITY")
102            .ok()
103            .and_then(|v| v.parse().ok())
104            .unwrap_or(20);
105        let dir_capacity = dir_capacity.max(1);
106        let cache_size = NonZeroUsize::new(file_capacity).unwrap();
107        let dir_cache_size = NonZeroUsize::new(dir_capacity).unwrap();
108        Self {
109            file_capacity,
110            dir_capacity,
111            cache: Arc::new(Mutex::new(LruCache::new(cache_size))),
112            directory_cache: Arc::new(Mutex::new(LruCache::new(dir_cache_size))),
113        }
114    }
115
116    /// Get a cached analysis result if it exists.
117    #[instrument(skip(self), fields(path = ?key.path))]
118    pub fn get(&self, key: &CacheKey) -> Option<Arc<FileAnalysisOutput>> {
119        lock_or_recover(&self.cache, self.file_capacity, |guard| {
120            let result = guard.get(key).cloned();
121            let cache_size = guard.len();
122            if let Some(v) = result {
123                debug!(cache_event = "hit", cache_size = cache_size, path = ?key.path);
124                Some(v)
125            } else {
126                debug!(cache_event = "miss", cache_size = cache_size, path = ?key.path);
127                None
128            }
129        })
130    }
131
132    /// Store an analysis result in the cache.
133    #[instrument(skip(self, value), fields(path = ?key.path))]
134    // public API; callers expect owned semantics
135    #[allow(clippy::needless_pass_by_value)]
136    pub fn put(&self, key: CacheKey, value: Arc<FileAnalysisOutput>) {
137        lock_or_recover(&self.cache, self.file_capacity, |guard| {
138            let push_result = guard.push(key.clone(), value);
139            let cache_size = guard.len();
140            match push_result {
141                None => {
142                    debug!(cache_event = "insert", cache_size = cache_size, path = ?key.path);
143                }
144                Some((returned_key, _)) => {
145                    if returned_key == key {
146                        debug!(cache_event = "update", cache_size = cache_size, path = ?key.path);
147                    } else {
148                        debug!(cache_event = "eviction", cache_size = cache_size, path = ?key.path, evicted_path = ?returned_key.path);
149                    }
150                }
151            }
152        });
153    }
154
155    /// Get a cached directory analysis result if it exists.
156    #[instrument(skip(self))]
157    pub fn get_directory(&self, key: &DirectoryCacheKey) -> Option<Arc<AnalysisOutput>> {
158        lock_or_recover(&self.directory_cache, self.dir_capacity, |guard| {
159            let result = guard.get(key).cloned();
160            let cache_size = guard.len();
161            if let Some(v) = result {
162                debug!(cache_event = "hit", cache_size = cache_size);
163                Some(v)
164            } else {
165                debug!(cache_event = "miss", cache_size = cache_size);
166                None
167            }
168        })
169    }
170
171    /// Store a directory analysis result in the cache.
172    #[instrument(skip(self, value))]
173    pub fn put_directory(&self, key: DirectoryCacheKey, value: Arc<AnalysisOutput>) {
174        lock_or_recover(&self.directory_cache, self.dir_capacity, |guard| {
175            let push_result = guard.push(key, value);
176            let cache_size = guard.len();
177            match push_result {
178                None => {
179                    debug!(cache_event = "insert", cache_size = cache_size);
180                }
181                Some((_, _)) => {
182                    debug!(cache_event = "eviction", cache_size = cache_size);
183                }
184            }
185        });
186    }
187
188    /// Returns the configured file-cache capacity.
189    /// Exposed for testing across crate boundaries; not part of the stable API.
190    #[doc(hidden)]
191    pub fn file_capacity(&self) -> usize {
192        self.file_capacity
193    }
194
195    /// Invalidate all cache entries for a given file path.
196    /// Removes all entries regardless of modification time or analysis mode.
197    #[instrument(skip(self), fields(path = ?path))]
198    pub fn invalidate_file(&self, path: &std::path::Path) {
199        lock_or_recover(&self.cache, self.file_capacity, |guard| {
200            let keys: Vec<CacheKey> = guard
201                .iter()
202                .filter(|(k, _)| k.path == path)
203                .map(|(k, _)| k.clone())
204                .collect();
205            for key in keys {
206                guard.pop(&key);
207            }
208            let cache_size = guard.len();
209            debug!(cache_event = "invalidate_file", cache_size = cache_size, path = ?path);
210        });
211    }
212}
213
214impl Clone for AnalysisCache {
215    fn clone(&self) -> Self {
216        Self {
217            file_capacity: self.file_capacity,
218            dir_capacity: self.dir_capacity,
219            cache: Arc::clone(&self.cache),
220            directory_cache: Arc::clone(&self.directory_cache),
221        }
222    }
223}
224
225#[cfg(test)]
226mod tests {
227    use super::*;
228    use crate::types::SemanticAnalysis;
229
230    #[test]
231    fn test_from_entries_skips_dirs() {
232        // Arrange: create a real temp dir and a real temp file for hermetic isolation.
233        let dir = tempfile::tempdir().expect("tempdir");
234        let file = tempfile::NamedTempFile::new_in(dir.path()).expect("tempfile");
235        let file_path = file.path().to_path_buf();
236
237        let entries = vec![
238            WalkEntry {
239                path: dir.path().to_path_buf(),
240                depth: 0,
241                is_dir: true,
242                is_symlink: false,
243                symlink_target: None,
244                mtime: None,
245                canonical_path: PathBuf::new(),
246            },
247            WalkEntry {
248                path: file_path.clone(),
249                depth: 0,
250                is_dir: false,
251                is_symlink: false,
252                symlink_target: None,
253                mtime: None,
254                canonical_path: PathBuf::new(),
255            },
256        ];
257
258        // Act: build cache key from entries
259        let key = DirectoryCacheKey::from_entries(&entries, None, AnalysisMode::Overview, None);
260
261        // Assert: only the file entry should be in the cache key
262        // The directory entry should be filtered out
263        assert_eq!(key.files.len(), 1);
264        assert_eq!(key.files[0].0, file_path);
265    }
266
267    #[test]
268    fn test_invalidate_file_single_mode() {
269        // Arrange: create a cache and insert one entry for a path
270        let cache = AnalysisCache::new(10);
271        let path = PathBuf::from("/test/file.rs");
272        let key = CacheKey {
273            path: path.clone(),
274            modified: SystemTime::UNIX_EPOCH,
275            mode: AnalysisMode::Overview,
276        };
277        let output = Arc::new(FileAnalysisOutput::new(
278            String::new(),
279            SemanticAnalysis::default(),
280            0,
281            None,
282        ));
283        cache.put(key.clone(), output);
284
285        // Act: invalidate the file
286        cache.invalidate_file(&path);
287
288        // Assert: the entry should be removed
289        assert!(cache.get(&key).is_none());
290    }
291
292    #[test]
293    fn test_invalidate_file_multi_mode() {
294        // Arrange: create a cache and insert two entries for the same path with different modes
295        let cache = AnalysisCache::new(10);
296        let path = PathBuf::from("/test/file.rs");
297        let key1 = CacheKey {
298            path: path.clone(),
299            modified: SystemTime::UNIX_EPOCH,
300            mode: AnalysisMode::Overview,
301        };
302        let key2 = CacheKey {
303            path: path.clone(),
304            modified: SystemTime::UNIX_EPOCH,
305            mode: AnalysisMode::FileDetails,
306        };
307        let output = Arc::new(FileAnalysisOutput::new(
308            String::new(),
309            SemanticAnalysis::default(),
310            0,
311            None,
312        ));
313        cache.put(key1.clone(), output.clone());
314        cache.put(key2.clone(), output);
315
316        // Act: invalidate the file
317        cache.invalidate_file(&path);
318
319        // Assert: both entries should be removed
320        assert!(cache.get(&key1).is_none());
321        assert!(cache.get(&key2).is_none());
322    }
323
324    // Mutex serialises the two dir-cache-capacity tests to prevent env var races.
325    static DIR_CACHE_ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
326
327    #[test]
328    fn test_dir_cache_capacity_default() {
329        let _guard = DIR_CACHE_ENV_LOCK.lock().unwrap();
330
331        // Arrange: ensure the env var is not set
332        unsafe { std::env::remove_var("APTU_CODER_DIR_CACHE_CAPACITY") };
333
334        // Act
335        let cache = AnalysisCache::new(100);
336
337        // Assert: default dir capacity is 20
338        assert_eq!(cache.dir_capacity, 20);
339    }
340
341    #[test]
342    fn test_dir_cache_capacity_from_env() {
343        let _guard = DIR_CACHE_ENV_LOCK.lock().unwrap();
344
345        // Arrange
346        unsafe { std::env::set_var("APTU_CODER_DIR_CACHE_CAPACITY", "7") };
347
348        // Act
349        let cache = AnalysisCache::new(100);
350
351        // Cleanup before assertions to minimise env pollution window
352        unsafe { std::env::remove_var("APTU_CODER_DIR_CACHE_CAPACITY") };
353
354        // Assert
355        assert_eq!(cache.dir_capacity, 7);
356    }
357}