1use crate::analyze::{AnalysisOutput, FileAnalysisOutput};
9use crate::traversal::WalkEntry;
10use crate::types::AnalysisMode;
11use lru::LruCache;
12use rayon::prelude::*;
13use serde::{Serialize, de::DeserializeOwned};
14use std::num::NonZeroUsize;
15use std::os::unix::fs::PermissionsExt;
16use std::path::PathBuf;
17use std::sync::{Arc, Mutex};
18use std::time::SystemTime;
19use tempfile::NamedTempFile;
20use tracing::{debug, error, instrument, warn};
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
24pub enum CacheTier {
25 L1Memory,
26 L2Disk,
27 Miss,
28}
29
30impl CacheTier {
31 pub fn as_str(&self) -> &'static str {
32 match self {
33 CacheTier::L1Memory => "l1_memory",
34 CacheTier::L2Disk => "l2_disk",
35 CacheTier::Miss => "miss",
36 }
37 }
38}
39
40#[derive(Debug, Clone, Eq, PartialEq, Hash)]
42pub struct CacheKey {
43 pub path: PathBuf,
44 pub modified: SystemTime,
45 pub mode: AnalysisMode,
46}
47
48#[derive(Debug, Clone, Eq, PartialEq, Hash)]
50pub struct DirectoryCacheKey {
51 files: Vec<(PathBuf, SystemTime)>,
52 mode: AnalysisMode,
53 max_depth: Option<u32>,
54 git_ref: Option<String>,
55}
56
57impl DirectoryCacheKey {
58 #[must_use]
64 pub fn from_entries(
65 entries: &[WalkEntry],
66 max_depth: Option<u32>,
67 mode: AnalysisMode,
68 git_ref: Option<&str>,
69 ) -> Self {
70 let mut files: Vec<(PathBuf, SystemTime)> = entries
71 .par_iter()
72 .filter(|e| !e.is_dir)
73 .map(|e| {
74 let mtime = e.mtime.unwrap_or(SystemTime::UNIX_EPOCH);
75 (e.path.clone(), mtime)
76 })
77 .collect();
78 files.sort_by(|a, b| a.0.cmp(&b.0));
79 Self {
80 files,
81 mode,
82 max_depth,
83 git_ref: git_ref.map(ToOwned::to_owned),
84 }
85 }
86}
87
88fn lock_or_recover<K, V, T, F>(mutex: &Mutex<LruCache<K, V>>, capacity: usize, recovery: F) -> T
91where
92 K: std::hash::Hash + Eq,
93 F: FnOnce(&mut LruCache<K, V>) -> T,
94{
95 match mutex.lock() {
96 Ok(mut guard) => recovery(&mut guard),
97 Err(poisoned) => {
98 let cache_size = NonZeroUsize::new(capacity)
100 .unwrap_or_else(|| unsafe { NonZeroUsize::new_unchecked(100) });
101 let new_cache = LruCache::new(cache_size);
102 let mut guard = poisoned.into_inner();
103 *guard = new_cache;
104 recovery(&mut guard)
105 }
106 }
107}
108
109pub struct AnalysisCache {
111 file_capacity: usize,
112 dir_capacity: usize,
113 cache: Arc<Mutex<LruCache<CacheKey, Arc<FileAnalysisOutput>>>>,
114 directory_cache: Arc<Mutex<LruCache<DirectoryCacheKey, Arc<AnalysisOutput>>>>,
115}
116
117impl AnalysisCache {
118 #[must_use]
122 pub fn new(capacity: usize) -> Self {
123 let file_capacity = capacity.max(1);
124 let dir_capacity: usize = std::env::var("APTU_CODER_DIR_CACHE_CAPACITY")
125 .ok()
126 .and_then(|v| v.parse().ok())
127 .unwrap_or(20);
128 let dir_capacity = dir_capacity.max(1);
129 let cache_size = unsafe { NonZeroUsize::new_unchecked(file_capacity) };
131 let dir_cache_size = unsafe { NonZeroUsize::new_unchecked(dir_capacity) };
133 Self {
134 file_capacity,
135 dir_capacity,
136 cache: Arc::new(Mutex::new(LruCache::new(cache_size))),
137 directory_cache: Arc::new(Mutex::new(LruCache::new(dir_cache_size))),
138 }
139 }
140
141 #[instrument(skip(self), fields(path = ?key.path))]
143 pub fn get(&self, key: &CacheKey) -> Option<Arc<FileAnalysisOutput>> {
144 lock_or_recover(&self.cache, self.file_capacity, |guard| {
145 let result = guard.get(key).cloned();
146 let cache_size = guard.len();
147 if let Some(v) = result {
148 debug!(cache_event = "hit", cache_size = cache_size, path = ?key.path);
149 Some(v)
150 } else {
151 debug!(cache_event = "miss", cache_size = cache_size, path = ?key.path);
152 None
153 }
154 })
155 }
156
157 #[instrument(skip(self, value), fields(path = ?key.path))]
159 #[allow(clippy::needless_pass_by_value)]
161 pub fn put(&self, key: CacheKey, value: Arc<FileAnalysisOutput>) {
162 lock_or_recover(&self.cache, self.file_capacity, |guard| {
163 let push_result = guard.push(key.clone(), value);
164 let cache_size = guard.len();
165 match push_result {
166 None => {
167 debug!(cache_event = "insert", cache_size = cache_size, path = ?key.path);
168 }
169 Some((returned_key, _)) => {
170 if returned_key == key {
171 debug!(cache_event = "update", cache_size = cache_size, path = ?key.path);
172 } else {
173 debug!(cache_event = "eviction", cache_size = cache_size, path = ?key.path, evicted_path = ?returned_key.path);
174 }
175 }
176 }
177 });
178 }
179
180 #[instrument(skip(self))]
182 pub fn get_directory(&self, key: &DirectoryCacheKey) -> Option<Arc<AnalysisOutput>> {
183 lock_or_recover(&self.directory_cache, self.dir_capacity, |guard| {
184 let result = guard.get(key).cloned();
185 let cache_size = guard.len();
186 if let Some(v) = result {
187 debug!(cache_event = "hit", cache_size = cache_size);
188 Some(v)
189 } else {
190 debug!(cache_event = "miss", cache_size = cache_size);
191 None
192 }
193 })
194 }
195
196 #[instrument(skip(self, value))]
198 pub fn put_directory(&self, key: DirectoryCacheKey, value: Arc<AnalysisOutput>) {
199 lock_or_recover(&self.directory_cache, self.dir_capacity, |guard| {
200 let push_result = guard.push(key, value);
201 let cache_size = guard.len();
202 match push_result {
203 None => {
204 debug!(cache_event = "insert", cache_size = cache_size);
205 }
206 Some((_, _)) => {
207 debug!(cache_event = "eviction", cache_size = cache_size);
208 }
209 }
210 });
211 }
212
213 #[doc(hidden)]
216 pub fn file_capacity(&self) -> usize {
217 self.file_capacity
218 }
219
220 #[instrument(skip(self), fields(path = ?path))]
223 pub fn invalidate_file(&self, path: &std::path::Path) {
224 lock_or_recover(&self.cache, self.file_capacity, |guard| {
225 let keys: Vec<CacheKey> = guard
226 .iter()
227 .filter(|(k, _)| k.path == path)
228 .map(|(k, _)| k.clone())
229 .collect();
230 for key in keys {
231 guard.pop(&key);
232 }
233 let cache_size = guard.len();
234 debug!(cache_event = "invalidate_file", cache_size = cache_size, path = ?path);
235 });
236 }
237}
238
239impl Clone for AnalysisCache {
240 fn clone(&self) -> Self {
241 Self {
242 file_capacity: self.file_capacity,
243 dir_capacity: self.dir_capacity,
244 cache: Arc::clone(&self.cache),
245 directory_cache: Arc::clone(&self.directory_cache),
246 }
247 }
248}
249
250#[cfg(test)]
251mod tests {
252 use super::*;
253 use crate::types::SemanticAnalysis;
254
255 #[test]
256 fn test_from_entries_skips_dirs() {
257 let dir = tempfile::tempdir().expect("tempdir");
259 let file = tempfile::NamedTempFile::new_in(dir.path()).expect("tempfile");
260 let file_path = file.path().to_path_buf();
261
262 let entries = vec![
263 WalkEntry {
264 path: dir.path().to_path_buf(),
265 depth: 0,
266 is_dir: true,
267 is_symlink: false,
268 symlink_target: None,
269 mtime: None,
270 canonical_path: PathBuf::new(),
271 },
272 WalkEntry {
273 path: file_path.clone(),
274 depth: 0,
275 is_dir: false,
276 is_symlink: false,
277 symlink_target: None,
278 mtime: None,
279 canonical_path: PathBuf::new(),
280 },
281 ];
282
283 let key = DirectoryCacheKey::from_entries(&entries, None, AnalysisMode::Overview, None);
285
286 assert_eq!(key.files.len(), 1);
289 assert_eq!(key.files[0].0, file_path);
290 }
291
292 #[test]
293 fn test_invalidate_file_single_mode() {
294 let cache = AnalysisCache::new(10);
296 let path = PathBuf::from("/test/file.rs");
297 let key = CacheKey {
298 path: path.clone(),
299 modified: SystemTime::UNIX_EPOCH,
300 mode: AnalysisMode::Overview,
301 };
302 let output = Arc::new(FileAnalysisOutput::new(
303 String::new(),
304 SemanticAnalysis::default(),
305 0,
306 None,
307 ));
308 cache.put(key.clone(), output);
309
310 cache.invalidate_file(&path);
312
313 assert!(cache.get(&key).is_none());
315 }
316
317 #[test]
318 fn test_invalidate_file_multi_mode() {
319 let cache = AnalysisCache::new(10);
321 let path = PathBuf::from("/test/file.rs");
322 let key1 = CacheKey {
323 path: path.clone(),
324 modified: SystemTime::UNIX_EPOCH,
325 mode: AnalysisMode::Overview,
326 };
327 let key2 = CacheKey {
328 path: path.clone(),
329 modified: SystemTime::UNIX_EPOCH,
330 mode: AnalysisMode::FileDetails,
331 };
332 let output = Arc::new(FileAnalysisOutput::new(
333 String::new(),
334 SemanticAnalysis::default(),
335 0,
336 None,
337 ));
338 cache.put(key1.clone(), output.clone());
339 cache.put(key2.clone(), output);
340
341 cache.invalidate_file(&path);
343
344 assert!(cache.get(&key1).is_none());
346 assert!(cache.get(&key2).is_none());
347 }
348
349 static DIR_CACHE_ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
351
352 #[test]
353 fn test_dir_cache_capacity_default() {
354 let _guard = DIR_CACHE_ENV_LOCK.lock().unwrap();
355
356 unsafe { std::env::remove_var("APTU_CODER_DIR_CACHE_CAPACITY") };
358
359 let cache = AnalysisCache::new(100);
361
362 assert_eq!(cache.dir_capacity, 20);
364 }
365
366 #[test]
367 fn test_dir_cache_capacity_from_env() {
368 let _guard = DIR_CACHE_ENV_LOCK.lock().unwrap();
369
370 unsafe { std::env::set_var("APTU_CODER_DIR_CACHE_CAPACITY", "7") };
372
373 let cache = AnalysisCache::new(100);
375
376 unsafe { std::env::remove_var("APTU_CODER_DIR_CACHE_CAPACITY") };
378
379 assert_eq!(cache.dir_capacity, 7);
381 }
382}
383
384const DISK_CACHE_DEGRADED_THRESHOLD: u64 = 3;
391
392pub struct DiskCache {
393 base: std::path::PathBuf,
394 disabled: bool,
395 write_failures: std::sync::atomic::AtomicU64,
397 total_write_failures: std::sync::atomic::AtomicU64,
399}
400
401impl DiskCache {
402 pub fn drain_write_failures(&self) -> u64 {
405 self.write_failures
406 .swap(0, std::sync::atomic::Ordering::Relaxed)
407 }
408
409 pub fn is_degraded(&self) -> bool {
412 self.total_write_failures
413 .load(std::sync::atomic::Ordering::Relaxed)
414 >= DISK_CACHE_DEGRADED_THRESHOLD
415 }
416}
417
418impl DiskCache {
419 pub fn new(base: std::path::PathBuf, disabled: bool) -> Self {
422 if disabled {
423 return Self {
424 base,
425 disabled: true,
426 write_failures: std::sync::atomic::AtomicU64::new(0),
427 total_write_failures: std::sync::atomic::AtomicU64::new(0),
428 };
429 }
430 if let Err(e) = std::fs::create_dir_all(&base) {
431 warn!(path = %base.display(), error = %e, "disk cache disabled: failed to create cache directory");
432 return Self {
433 base,
434 disabled: true,
435 write_failures: std::sync::atomic::AtomicU64::new(0),
436 total_write_failures: std::sync::atomic::AtomicU64::new(0),
437 };
438 }
439 if let Err(e) = std::fs::set_permissions(&base, std::fs::Permissions::from_mode(0o700)) {
440 warn!(path = %base.display(), error = %e, "disk cache: failed to set directory permissions to 0700");
441 }
442 Self {
443 base,
444 disabled: false,
445 write_failures: std::sync::atomic::AtomicU64::new(0),
446 total_write_failures: std::sync::atomic::AtomicU64::new(0),
447 }
448 }
449
450 pub fn entry_path(&self, tool: &str, key: &blake3::Hash) -> std::path::PathBuf {
451 let hex = format!("{}", key);
452 self.base
453 .join(tool)
454 .join(&hex[..2])
455 .join(format!("{}.json.snap", hex))
456 }
457
458 pub fn get<T: DeserializeOwned>(&self, tool: &str, key: &blake3::Hash) -> Option<T> {
460 if self.disabled {
461 return None;
462 }
463 let path = self.entry_path(tool, key);
464 let compressed = match std::fs::read(&path) {
465 Ok(b) => b,
466 Err(_) => return None,
467 };
468 let bytes = match snap::raw::Decoder::new().decompress_vec(&compressed) {
469 Ok(b) => b,
470 Err(e) => {
471 debug!(tool, error = %e, "disk cache decompression failed");
472 return None;
473 }
474 };
475 match serde_json::from_slice(&bytes) {
476 Ok(v) => Some(v),
477 Err(e) => {
478 debug!(tool, error = %e, "disk cache deserialization failed");
479 None
480 }
481 }
482 }
483
484 fn serialize_entry<T: Serialize>(value: &T) -> Option<Vec<u8>> {
486 let bytes = serde_json::to_vec(value).ok()?;
487 snap::raw::Encoder::new().compress_vec(&bytes).ok()
488 }
489
490 fn write_entry_atomically(
493 dir: &std::path::Path,
494 path: &std::path::Path,
495 compressed: &[u8],
496 ) -> Result<(), std::io::Error> {
497 use std::io::Write;
498 let mut tmp = NamedTempFile::new_in(dir)?;
499 tmp.write_all(compressed)?;
500 tmp.persist(path).map(|_| ()).map_err(|e| e.error)
501 }
502
503 pub fn put<T: Serialize>(&self, tool: &str, key: &blake3::Hash, value: &T) {
505 if self.disabled {
506 return;
507 }
508 let path = self.entry_path(tool, key);
509 let dir = match path.parent() {
510 Some(d) => d.to_path_buf(),
511 None => return,
512 };
513 if let Err(e) = std::fs::create_dir_all(&dir) {
514 warn!(tool, error = %e, "disk cache: failed to create cache directory");
515 self.record_write_failure();
516 return;
517 }
518 let compressed = match Self::serialize_entry(value) {
519 Some(c) => c,
520 None => return,
521 };
522 if Self::write_entry_atomically(&dir, &path, &compressed)
523 .ok()
524 .is_none()
525 {
526 self.record_write_failure();
527 }
528 }
529
530 fn record_write_failure(&self) {
534 self.write_failures
535 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
536 let total = self
537 .total_write_failures
538 .fetch_add(1, std::sync::atomic::Ordering::Relaxed)
539 + 1;
540 if total == DISK_CACHE_DEGRADED_THRESHOLD {
541 error!(
542 path = %self.base.display(),
543 total,
544 threshold = DISK_CACHE_DEGRADED_THRESHOLD,
545 "disk cache is degraded: consecutive write failures have reached the alert threshold; \
546 check disk space and permissions at the cache directory"
547 );
548 }
549 }
550
551 pub fn evict_stale(&self, retention_days: u64) {
553 if self.disabled {
554 return;
555 }
556 let cutoff = std::time::SystemTime::now()
557 .checked_sub(std::time::Duration::from_secs(retention_days * 86_400))
558 .unwrap_or(std::time::UNIX_EPOCH);
559 let _ = evict_dir_recursive(&self.base, cutoff);
560 }
561}
562
563fn evict_dir_recursive(
564 dir: &std::path::Path,
565 cutoff: std::time::SystemTime,
566) -> std::io::Result<()> {
567 for entry in std::fs::read_dir(dir)? {
568 let entry = entry?;
569 let meta = entry.metadata()?;
570 let path = entry.path();
571 if meta.is_dir() {
572 let _ = evict_dir_recursive(&path, cutoff);
573 } else if meta.is_file()
574 && let Ok(mtime) = meta.modified()
575 && mtime < cutoff
576 {
577 let _ = std::fs::remove_file(&path);
578 }
579 }
580 Ok(())
581}
582
583#[cfg(test)]
584mod disk_cache_tests {
585 use super::*;
586 use tempfile::TempDir;
587
588 #[test]
589 fn test_disk_cache_roundtrip() {
590 let dir = TempDir::new().unwrap();
591 let cache1 = DiskCache::new(dir.path().to_path_buf(), false);
592 let key = blake3::hash(b"test-key");
593 let value = serde_json::json!({"result": "hello", "count": 42});
594 cache1.put("analyze_file", &key, &value);
595 let cache2 = DiskCache::new(dir.path().to_path_buf(), false);
596 let result: Option<serde_json::Value> = cache2.get("analyze_file", &key);
597 assert_eq!(result, Some(value));
598 }
599
600 #[test]
601 fn test_disk_cache_permissions() {
602 use std::os::unix::fs::PermissionsExt;
603 let dir = TempDir::new().unwrap();
604 let cache_dir = dir.path().join("analysis-cache");
605 let _cache = DiskCache::new(cache_dir.clone(), false);
606 let meta = std::fs::metadata(&cache_dir).unwrap();
607 let mode = meta.permissions().mode() & 0o777;
608 assert_eq!(mode, 0o700, "cache dir must be mode 0700");
609 }
610
611 #[test]
612 fn test_disk_cache_corrupt_entry_returns_none() {
613 let dir = TempDir::new().unwrap();
614 let cache = DiskCache::new(dir.path().to_path_buf(), false);
615 let key = blake3::hash(b"corrupt-key");
616 let path = cache.entry_path("analyze_file", &key);
617 std::fs::create_dir_all(path.parent().unwrap()).unwrap();
618 std::fs::write(&path, b"not valid snappy data").unwrap();
619 let result: Option<serde_json::Value> = cache.get("analyze_file", &key);
620 assert!(result.is_none(), "corrupt entry must return None");
621 }
622
623 #[test]
624 fn test_disk_cache_disabled_on_dir_creation_failure() {
625 let dir = TempDir::new().unwrap();
626 let blocked = dir.path().join("blocked");
629 std::fs::write(&blocked, b"").unwrap();
630 let cache = DiskCache::new(blocked, false);
631 let key = blake3::hash(b"should-not-exist");
633 cache.put("analyze_file", &key, &serde_json::json!({"x": 1}));
634 let result: Option<serde_json::Value> = cache.get("analyze_file", &key);
635 assert!(
636 result.is_none(),
637 "cache must be disabled after dir creation failure"
638 );
639 assert!(
640 cache.disabled,
641 "disabled flag must be true after dir creation failure"
642 );
643 }
644}