1use crate::analyze::{AnalysisOutput, FileAnalysisOutput};
9use crate::traversal::WalkEntry;
10use crate::types::AnalysisMode;
11use lru::LruCache;
12use rayon::prelude::*;
13use serde::{Serialize, de::DeserializeOwned};
14use std::num::NonZeroUsize;
15#[cfg(unix)]
16use std::os::unix::fs::PermissionsExt;
17use std::path::PathBuf;
18use std::sync::{Arc, Mutex};
19use std::time::SystemTime;
20use tempfile::NamedTempFile;
21use tracing::{debug, error, instrument, warn};
22
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum CacheTier {
26 L1Memory,
27 L2Disk,
28 Miss,
29}
30
31impl CacheTier {
32 pub fn as_str(&self) -> &'static str {
33 match self {
34 CacheTier::L1Memory => "l1_memory",
35 CacheTier::L2Disk => "l2_disk",
36 CacheTier::Miss => "miss",
37 }
38 }
39}
40
41#[derive(Debug, Clone, Eq, PartialEq, Hash)]
43pub struct CacheKey {
44 pub path: PathBuf,
45 pub modified: SystemTime,
46 pub mode: AnalysisMode,
47}
48
49#[derive(Debug, Clone, Eq, PartialEq, Hash)]
51pub struct DirectoryCacheKey {
52 files: Vec<(PathBuf, SystemTime)>,
53 mode: AnalysisMode,
54 max_depth: Option<u32>,
55 git_ref: Option<String>,
56}
57
58impl DirectoryCacheKey {
59 #[must_use]
65 pub fn from_entries(
66 entries: &[WalkEntry],
67 max_depth: Option<u32>,
68 mode: AnalysisMode,
69 git_ref: Option<&str>,
70 ) -> Self {
71 let mut files: Vec<(PathBuf, SystemTime)> = entries
72 .par_iter()
73 .filter(|e| !e.is_dir)
74 .map(|e| {
75 let mtime = e.mtime.unwrap_or(SystemTime::UNIX_EPOCH);
76 (e.path.clone(), mtime)
77 })
78 .collect();
79 files.sort_by(|a, b| a.0.cmp(&b.0));
80 Self {
81 files,
82 mode,
83 max_depth,
84 git_ref: git_ref.map(ToOwned::to_owned),
85 }
86 }
87}
88
89fn lock_or_recover<K, V, T, F>(mutex: &Mutex<LruCache<K, V>>, capacity: usize, recovery: F) -> T
92where
93 K: std::hash::Hash + Eq,
94 F: FnOnce(&mut LruCache<K, V>) -> T,
95{
96 match mutex.lock() {
97 Ok(mut guard) => recovery(&mut guard),
98 Err(poisoned) => {
99 let cache_size = NonZeroUsize::new(capacity)
101 .unwrap_or_else(|| unsafe { NonZeroUsize::new_unchecked(100) });
102 let new_cache = LruCache::new(cache_size);
103 let mut guard = poisoned.into_inner();
104 *guard = new_cache;
105 recovery(&mut guard)
106 }
107 }
108}
109
110pub struct AnalysisCache {
112 file_capacity: usize,
113 dir_capacity: usize,
114 cache: Arc<Mutex<LruCache<CacheKey, Arc<FileAnalysisOutput>>>>,
115 directory_cache: Arc<Mutex<LruCache<DirectoryCacheKey, Arc<AnalysisOutput>>>>,
116}
117
118impl AnalysisCache {
119 #[must_use]
123 pub fn new(capacity: usize) -> Self {
124 let file_capacity = capacity.max(1);
125 let dir_capacity: usize = std::env::var("APTU_CODER_DIR_CACHE_CAPACITY")
126 .ok()
127 .and_then(|v| v.parse().ok())
128 .unwrap_or(20);
129 let dir_capacity = dir_capacity.max(1);
130 let cache_size = unsafe { NonZeroUsize::new_unchecked(file_capacity) };
132 let dir_cache_size = unsafe { NonZeroUsize::new_unchecked(dir_capacity) };
134 Self {
135 file_capacity,
136 dir_capacity,
137 cache: Arc::new(Mutex::new(LruCache::new(cache_size))),
138 directory_cache: Arc::new(Mutex::new(LruCache::new(dir_cache_size))),
139 }
140 }
141
142 #[instrument(skip(self), fields(path = ?key.path))]
144 pub fn get(&self, key: &CacheKey) -> Option<Arc<FileAnalysisOutput>> {
145 lock_or_recover(&self.cache, self.file_capacity, |guard| {
146 let result = guard.get(key).cloned();
147 let cache_size = guard.len();
148 if let Some(v) = result {
149 debug!(cache_event = "hit", cache_size = cache_size, path = ?key.path);
150 Some(v)
151 } else {
152 debug!(cache_event = "miss", cache_size = cache_size, path = ?key.path);
153 None
154 }
155 })
156 }
157
158 #[instrument(skip(self, value), fields(path = ?key.path))]
160 #[allow(clippy::needless_pass_by_value)]
162 pub fn put(&self, key: CacheKey, value: Arc<FileAnalysisOutput>) {
163 lock_or_recover(&self.cache, self.file_capacity, |guard| {
164 let push_result = guard.push(key.clone(), value);
165 let cache_size = guard.len();
166 match push_result {
167 None => {
168 debug!(cache_event = "insert", cache_size = cache_size, path = ?key.path);
169 }
170 Some((returned_key, _)) => {
171 if returned_key == key {
172 debug!(cache_event = "update", cache_size = cache_size, path = ?key.path);
173 } else {
174 debug!(cache_event = "eviction", cache_size = cache_size, path = ?key.path, evicted_path = ?returned_key.path);
175 }
176 }
177 }
178 });
179 }
180
181 #[instrument(skip(self))]
183 pub fn get_directory(&self, key: &DirectoryCacheKey) -> Option<Arc<AnalysisOutput>> {
184 lock_or_recover(&self.directory_cache, self.dir_capacity, |guard| {
185 let result = guard.get(key).cloned();
186 let cache_size = guard.len();
187 if let Some(v) = result {
188 debug!(cache_event = "hit", cache_size = cache_size);
189 Some(v)
190 } else {
191 debug!(cache_event = "miss", cache_size = cache_size);
192 None
193 }
194 })
195 }
196
197 #[instrument(skip(self, value))]
199 pub fn put_directory(&self, key: DirectoryCacheKey, value: Arc<AnalysisOutput>) {
200 lock_or_recover(&self.directory_cache, self.dir_capacity, |guard| {
201 let push_result = guard.push(key, value);
202 let cache_size = guard.len();
203 match push_result {
204 None => {
205 debug!(cache_event = "insert", cache_size = cache_size);
206 }
207 Some((_, _)) => {
208 debug!(cache_event = "eviction", cache_size = cache_size);
209 }
210 }
211 });
212 }
213
214 #[doc(hidden)]
217 pub fn file_capacity(&self) -> usize {
218 self.file_capacity
219 }
220
221 #[instrument(skip(self), fields(path = ?path))]
224 pub fn invalidate_file(&self, path: &std::path::Path) {
225 lock_or_recover(&self.cache, self.file_capacity, |guard| {
226 let keys: Vec<CacheKey> = guard
227 .iter()
228 .filter(|(k, _)| k.path == path)
229 .map(|(k, _)| k.clone())
230 .collect();
231 for key in keys {
232 guard.pop(&key);
233 }
234 let cache_size = guard.len();
235 debug!(cache_event = "invalidate_file", cache_size = cache_size, path = ?path);
236 });
237 }
238}
239
240impl Clone for AnalysisCache {
241 fn clone(&self) -> Self {
242 Self {
243 file_capacity: self.file_capacity,
244 dir_capacity: self.dir_capacity,
245 cache: Arc::clone(&self.cache),
246 directory_cache: Arc::clone(&self.directory_cache),
247 }
248 }
249}
250
251#[cfg(test)]
252mod tests {
253 use super::*;
254 use crate::types::SemanticAnalysis;
255
256 #[test]
257 fn test_from_entries_skips_dirs() {
258 let dir = tempfile::tempdir().expect("tempdir");
260 let file = tempfile::NamedTempFile::new_in(dir.path()).expect("tempfile");
261 let file_path = file.path().to_path_buf();
262
263 let entries = vec![
264 WalkEntry {
265 path: dir.path().to_path_buf(),
266 depth: 0,
267 is_dir: true,
268 is_symlink: false,
269 symlink_target: None,
270 mtime: None,
271 canonical_path: PathBuf::new(),
272 },
273 WalkEntry {
274 path: file_path.clone(),
275 depth: 0,
276 is_dir: false,
277 is_symlink: false,
278 symlink_target: None,
279 mtime: None,
280 canonical_path: PathBuf::new(),
281 },
282 ];
283
284 let key = DirectoryCacheKey::from_entries(&entries, None, AnalysisMode::Overview, None);
286
287 assert_eq!(key.files.len(), 1);
290 assert_eq!(key.files[0].0, file_path);
291 }
292
293 #[test]
294 fn test_invalidate_file_single_mode() {
295 let cache = AnalysisCache::new(10);
297 let path = PathBuf::from("/test/file.rs");
298 let key = CacheKey {
299 path: path.clone(),
300 modified: SystemTime::UNIX_EPOCH,
301 mode: AnalysisMode::Overview,
302 };
303 let output = Arc::new(FileAnalysisOutput::new(
304 String::new(),
305 SemanticAnalysis::default(),
306 0,
307 None,
308 ));
309 cache.put(key.clone(), output);
310
311 cache.invalidate_file(&path);
313
314 assert!(cache.get(&key).is_none());
316 }
317
318 #[test]
319 fn test_invalidate_file_multi_mode() {
320 let cache = AnalysisCache::new(10);
322 let path = PathBuf::from("/test/file.rs");
323 let key1 = CacheKey {
324 path: path.clone(),
325 modified: SystemTime::UNIX_EPOCH,
326 mode: AnalysisMode::Overview,
327 };
328 let key2 = CacheKey {
329 path: path.clone(),
330 modified: SystemTime::UNIX_EPOCH,
331 mode: AnalysisMode::FileDetails,
332 };
333 let output = Arc::new(FileAnalysisOutput::new(
334 String::new(),
335 SemanticAnalysis::default(),
336 0,
337 None,
338 ));
339 cache.put(key1.clone(), output.clone());
340 cache.put(key2.clone(), output);
341
342 cache.invalidate_file(&path);
344
345 assert!(cache.get(&key1).is_none());
347 assert!(cache.get(&key2).is_none());
348 }
349
350 static DIR_CACHE_ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
352
353 #[test]
354 fn test_dir_cache_capacity_default() {
355 let _guard = DIR_CACHE_ENV_LOCK.lock().unwrap();
356
357 unsafe { std::env::remove_var("APTU_CODER_DIR_CACHE_CAPACITY") };
359
360 let cache = AnalysisCache::new(100);
362
363 assert_eq!(cache.dir_capacity, 20);
365 }
366
367 #[test]
368 fn test_dir_cache_capacity_from_env() {
369 let _guard = DIR_CACHE_ENV_LOCK.lock().unwrap();
370
371 unsafe { std::env::set_var("APTU_CODER_DIR_CACHE_CAPACITY", "7") };
373
374 let cache = AnalysisCache::new(100);
376
377 unsafe { std::env::remove_var("APTU_CODER_DIR_CACHE_CAPACITY") };
379
380 assert_eq!(cache.dir_capacity, 7);
382 }
383}
384
385const DISK_CACHE_DEGRADED_THRESHOLD: u64 = 3;
392
393pub struct DiskCache {
394 base: std::path::PathBuf,
395 disabled: bool,
396 write_failures: std::sync::atomic::AtomicU64,
398 total_write_failures: std::sync::atomic::AtomicU64,
400}
401
402impl DiskCache {
403 pub fn drain_write_failures(&self) -> u64 {
406 self.write_failures
407 .swap(0, std::sync::atomic::Ordering::Relaxed)
408 }
409
410 pub fn is_degraded(&self) -> bool {
413 self.total_write_failures
414 .load(std::sync::atomic::Ordering::Relaxed)
415 >= DISK_CACHE_DEGRADED_THRESHOLD
416 }
417}
418
419impl DiskCache {
420 pub fn new(base: std::path::PathBuf, disabled: bool) -> Self {
423 if disabled {
424 return Self {
425 base,
426 disabled: true,
427 write_failures: std::sync::atomic::AtomicU64::new(0),
428 total_write_failures: std::sync::atomic::AtomicU64::new(0),
429 };
430 }
431 if let Err(e) = std::fs::create_dir_all(&base) {
432 warn!(path = %base.display(), error = %e, "disk cache disabled: failed to create cache directory");
433 return Self {
434 base,
435 disabled: true,
436 write_failures: std::sync::atomic::AtomicU64::new(0),
437 total_write_failures: std::sync::atomic::AtomicU64::new(0),
438 };
439 }
440 #[cfg(unix)]
441 if let Err(e) = std::fs::set_permissions(&base, std::fs::Permissions::from_mode(0o700)) {
442 warn!(path = %base.display(), error = %e, "disk cache: failed to set directory permissions to 0700");
443 }
444 #[cfg(not(unix))]
445 let _ = &base; Self {
447 base,
448 disabled: false,
449 write_failures: std::sync::atomic::AtomicU64::new(0),
450 total_write_failures: std::sync::atomic::AtomicU64::new(0),
451 }
452 }
453
454 pub fn entry_path(&self, tool: &str, key: &blake3::Hash) -> std::path::PathBuf {
455 let hex = format!("{}", key);
456 self.base
457 .join(tool)
458 .join(&hex[..2])
459 .join(format!("{}.json.snap", hex))
460 }
461
462 pub fn get<T: DeserializeOwned>(&self, tool: &str, key: &blake3::Hash) -> Option<T> {
464 if self.disabled {
465 return None;
466 }
467 let path = self.entry_path(tool, key);
468 let compressed = match std::fs::read(&path) {
469 Ok(b) => b,
470 Err(_) => return None,
471 };
472 let bytes = match snap::raw::Decoder::new().decompress_vec(&compressed) {
473 Ok(b) => b,
474 Err(e) => {
475 debug!(tool, error = %e, "disk cache decompression failed");
476 return None;
477 }
478 };
479 match serde_json::from_slice(&bytes) {
480 Ok(v) => Some(v),
481 Err(e) => {
482 debug!(tool, error = %e, "disk cache deserialization failed");
483 None
484 }
485 }
486 }
487
488 fn serialize_entry<T: Serialize>(value: &T) -> Option<Vec<u8>> {
490 let bytes = serde_json::to_vec(value).ok()?;
491 snap::raw::Encoder::new().compress_vec(&bytes).ok()
492 }
493
494 fn write_entry_atomically(
497 dir: &std::path::Path,
498 path: &std::path::Path,
499 compressed: &[u8],
500 ) -> Result<(), std::io::Error> {
501 use std::io::Write;
502 let mut tmp = NamedTempFile::new_in(dir)?;
503 tmp.write_all(compressed)?;
504 tmp.persist(path).map(|_| ()).map_err(|e| e.error)
505 }
506
507 pub fn put<T: Serialize>(&self, tool: &str, key: &blake3::Hash, value: &T) {
509 if self.disabled {
510 return;
511 }
512 let path = self.entry_path(tool, key);
513 let dir = match path.parent() {
514 Some(d) => d.to_path_buf(),
515 None => return,
516 };
517 if let Err(e) = std::fs::create_dir_all(&dir) {
518 warn!(tool, error = %e, "disk cache: failed to create cache directory");
519 self.record_write_failure();
520 return;
521 }
522 let compressed = match Self::serialize_entry(value) {
523 Some(c) => c,
524 None => return,
525 };
526 if Self::write_entry_atomically(&dir, &path, &compressed)
527 .ok()
528 .is_none()
529 {
530 self.record_write_failure();
531 }
532 }
533
534 fn record_write_failure(&self) {
538 self.write_failures
539 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
540 let total = self
541 .total_write_failures
542 .fetch_add(1, std::sync::atomic::Ordering::Relaxed)
543 + 1;
544 if total == DISK_CACHE_DEGRADED_THRESHOLD {
545 error!(
546 path = %self.base.display(),
547 total,
548 threshold = DISK_CACHE_DEGRADED_THRESHOLD,
549 "disk cache is degraded: consecutive write failures have reached the alert threshold; \
550 check disk space and permissions at the cache directory"
551 );
552 }
553 }
554
555 pub fn evict_stale(&self, retention_days: u64) {
557 if self.disabled {
558 return;
559 }
560 let cutoff = std::time::SystemTime::now()
561 .checked_sub(std::time::Duration::from_secs(retention_days * 86_400))
562 .unwrap_or(std::time::UNIX_EPOCH);
563 let _ = evict_dir_recursive(&self.base, cutoff);
564 }
565}
566
567fn evict_dir_recursive(
568 dir: &std::path::Path,
569 cutoff: std::time::SystemTime,
570) -> std::io::Result<()> {
571 for entry in std::fs::read_dir(dir)? {
572 let entry = entry?;
573 let meta = entry.metadata()?;
574 let path = entry.path();
575 if meta.is_dir() {
576 let _ = evict_dir_recursive(&path, cutoff);
577 } else if meta.is_file()
578 && let Ok(mtime) = meta.modified()
579 && mtime < cutoff
580 {
581 let _ = std::fs::remove_file(&path);
582 }
583 }
584 Ok(())
585}
586
587#[cfg(test)]
588mod disk_cache_tests {
589 use super::*;
590 use tempfile::TempDir;
591
592 #[test]
593 fn test_disk_cache_roundtrip() {
594 let dir = TempDir::new().unwrap();
595 let cache1 = DiskCache::new(dir.path().to_path_buf(), false);
596 let key = blake3::hash(b"test-key");
597 let value = serde_json::json!({"result": "hello", "count": 42});
598 cache1.put("analyze_file", &key, &value);
599 let cache2 = DiskCache::new(dir.path().to_path_buf(), false);
600 let result: Option<serde_json::Value> = cache2.get("analyze_file", &key);
601 assert_eq!(result, Some(value));
602 }
603
604 #[cfg(unix)]
605 #[test]
606 fn test_disk_cache_permissions() {
607 use std::os::unix::fs::PermissionsExt;
608 let dir = TempDir::new().unwrap();
609 let cache_dir = dir.path().join("analysis-cache");
610 let _cache = DiskCache::new(cache_dir.clone(), false);
611 let meta = std::fs::metadata(&cache_dir).unwrap();
612 let mode = meta.permissions().mode() & 0o777;
613 assert_eq!(mode, 0o700, "cache dir must be mode 0700");
614 }
615
616 #[test]
617 fn test_disk_cache_corrupt_entry_returns_none() {
618 let dir = TempDir::new().unwrap();
619 let cache = DiskCache::new(dir.path().to_path_buf(), false);
620 let key = blake3::hash(b"corrupt-key");
621 let path = cache.entry_path("analyze_file", &key);
622 std::fs::create_dir_all(path.parent().unwrap()).unwrap();
623 std::fs::write(&path, b"not valid snappy data").unwrap();
624 let result: Option<serde_json::Value> = cache.get("analyze_file", &key);
625 assert!(result.is_none(), "corrupt entry must return None");
626 }
627
628 #[test]
629 fn test_disk_cache_disabled_on_dir_creation_failure() {
630 let dir = TempDir::new().unwrap();
631 let blocked = dir.path().join("blocked");
634 std::fs::write(&blocked, b"").unwrap();
635 let cache = DiskCache::new(blocked, false);
636 let key = blake3::hash(b"should-not-exist");
638 cache.put("analyze_file", &key, &serde_json::json!({"x": 1}));
639 let result: Option<serde_json::Value> = cache.get("analyze_file", &key);
640 assert!(
641 result.is_none(),
642 "cache must be disabled after dir creation failure"
643 );
644 assert!(
645 cache.disabled,
646 "disabled flag must be true after dir creation failure"
647 );
648 }
649}