1use crate::analyze::{AnalysisOutput, FileAnalysisOutput};
9use crate::traversal::WalkEntry;
10use crate::types::AnalysisMode;
11use lru::LruCache;
12use rayon::prelude::*;
13use serde::{Serialize, de::DeserializeOwned};
14use std::num::NonZeroUsize;
15use std::os::unix::fs::PermissionsExt;
16use std::path::PathBuf;
17use std::sync::{Arc, Mutex};
18use std::time::SystemTime;
19use tempfile::NamedTempFile;
20use tracing::{debug, error, instrument, warn};
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
24pub enum CacheTier {
25 L1Memory,
26 L2Disk,
27 Miss,
28}
29
30impl CacheTier {
31 pub fn as_str(&self) -> &'static str {
32 match self {
33 CacheTier::L1Memory => "l1_memory",
34 CacheTier::L2Disk => "l2_disk",
35 CacheTier::Miss => "miss",
36 }
37 }
38}
39
40#[derive(Debug, Clone, Eq, PartialEq, Hash)]
42pub struct CacheKey {
43 pub path: PathBuf,
44 pub modified: SystemTime,
45 pub mode: AnalysisMode,
46}
47
48#[derive(Debug, Clone, Eq, PartialEq, Hash)]
50pub struct DirectoryCacheKey {
51 files: Vec<(PathBuf, SystemTime)>,
52 mode: AnalysisMode,
53 max_depth: Option<u32>,
54 git_ref: Option<String>,
55}
56
57impl DirectoryCacheKey {
58 #[must_use]
64 pub fn from_entries(
65 entries: &[WalkEntry],
66 max_depth: Option<u32>,
67 mode: AnalysisMode,
68 git_ref: Option<&str>,
69 ) -> Self {
70 let mut files: Vec<(PathBuf, SystemTime)> = entries
71 .par_iter()
72 .filter(|e| !e.is_dir)
73 .map(|e| {
74 let mtime = e.mtime.unwrap_or(SystemTime::UNIX_EPOCH);
75 (e.path.clone(), mtime)
76 })
77 .collect();
78 files.sort_by(|a, b| a.0.cmp(&b.0));
79 Self {
80 files,
81 mode,
82 max_depth,
83 git_ref: git_ref.map(ToOwned::to_owned),
84 }
85 }
86}
87
88fn lock_or_recover<K, V, T, F>(mutex: &Mutex<LruCache<K, V>>, capacity: usize, recovery: F) -> T
91where
92 K: std::hash::Hash + Eq,
93 F: FnOnce(&mut LruCache<K, V>) -> T,
94{
95 match mutex.lock() {
96 Ok(mut guard) => recovery(&mut guard),
97 Err(poisoned) => {
98 let cache_size = NonZeroUsize::new(capacity).unwrap_or(NonZeroUsize::new(100).unwrap());
99 let new_cache = LruCache::new(cache_size);
100 let mut guard = poisoned.into_inner();
101 *guard = new_cache;
102 recovery(&mut guard)
103 }
104 }
105}
106
107pub struct AnalysisCache {
109 file_capacity: usize,
110 dir_capacity: usize,
111 cache: Arc<Mutex<LruCache<CacheKey, Arc<FileAnalysisOutput>>>>,
112 directory_cache: Arc<Mutex<LruCache<DirectoryCacheKey, Arc<AnalysisOutput>>>>,
113}
114
115impl AnalysisCache {
116 #[must_use]
120 pub fn new(capacity: usize) -> Self {
121 let file_capacity = capacity.max(1);
122 let dir_capacity: usize = std::env::var("APTU_CODER_DIR_CACHE_CAPACITY")
123 .ok()
124 .and_then(|v| v.parse().ok())
125 .unwrap_or(20);
126 let dir_capacity = dir_capacity.max(1);
127 let cache_size = NonZeroUsize::new(file_capacity).unwrap();
128 let dir_cache_size = NonZeroUsize::new(dir_capacity).unwrap();
129 Self {
130 file_capacity,
131 dir_capacity,
132 cache: Arc::new(Mutex::new(LruCache::new(cache_size))),
133 directory_cache: Arc::new(Mutex::new(LruCache::new(dir_cache_size))),
134 }
135 }
136
137 #[instrument(skip(self), fields(path = ?key.path))]
139 pub fn get(&self, key: &CacheKey) -> Option<Arc<FileAnalysisOutput>> {
140 lock_or_recover(&self.cache, self.file_capacity, |guard| {
141 let result = guard.get(key).cloned();
142 let cache_size = guard.len();
143 if let Some(v) = result {
144 debug!(cache_event = "hit", cache_size = cache_size, path = ?key.path);
145 Some(v)
146 } else {
147 debug!(cache_event = "miss", cache_size = cache_size, path = ?key.path);
148 None
149 }
150 })
151 }
152
153 #[instrument(skip(self, value), fields(path = ?key.path))]
155 #[allow(clippy::needless_pass_by_value)]
157 pub fn put(&self, key: CacheKey, value: Arc<FileAnalysisOutput>) {
158 lock_or_recover(&self.cache, self.file_capacity, |guard| {
159 let push_result = guard.push(key.clone(), value);
160 let cache_size = guard.len();
161 match push_result {
162 None => {
163 debug!(cache_event = "insert", cache_size = cache_size, path = ?key.path);
164 }
165 Some((returned_key, _)) => {
166 if returned_key == key {
167 debug!(cache_event = "update", cache_size = cache_size, path = ?key.path);
168 } else {
169 debug!(cache_event = "eviction", cache_size = cache_size, path = ?key.path, evicted_path = ?returned_key.path);
170 }
171 }
172 }
173 });
174 }
175
176 #[instrument(skip(self))]
178 pub fn get_directory(&self, key: &DirectoryCacheKey) -> Option<Arc<AnalysisOutput>> {
179 lock_or_recover(&self.directory_cache, self.dir_capacity, |guard| {
180 let result = guard.get(key).cloned();
181 let cache_size = guard.len();
182 if let Some(v) = result {
183 debug!(cache_event = "hit", cache_size = cache_size);
184 Some(v)
185 } else {
186 debug!(cache_event = "miss", cache_size = cache_size);
187 None
188 }
189 })
190 }
191
192 #[instrument(skip(self, value))]
194 pub fn put_directory(&self, key: DirectoryCacheKey, value: Arc<AnalysisOutput>) {
195 lock_or_recover(&self.directory_cache, self.dir_capacity, |guard| {
196 let push_result = guard.push(key, value);
197 let cache_size = guard.len();
198 match push_result {
199 None => {
200 debug!(cache_event = "insert", cache_size = cache_size);
201 }
202 Some((_, _)) => {
203 debug!(cache_event = "eviction", cache_size = cache_size);
204 }
205 }
206 });
207 }
208
209 #[doc(hidden)]
212 pub fn file_capacity(&self) -> usize {
213 self.file_capacity
214 }
215
216 #[instrument(skip(self), fields(path = ?path))]
219 pub fn invalidate_file(&self, path: &std::path::Path) {
220 lock_or_recover(&self.cache, self.file_capacity, |guard| {
221 let keys: Vec<CacheKey> = guard
222 .iter()
223 .filter(|(k, _)| k.path == path)
224 .map(|(k, _)| k.clone())
225 .collect();
226 for key in keys {
227 guard.pop(&key);
228 }
229 let cache_size = guard.len();
230 debug!(cache_event = "invalidate_file", cache_size = cache_size, path = ?path);
231 });
232 }
233}
234
235impl Clone for AnalysisCache {
236 fn clone(&self) -> Self {
237 Self {
238 file_capacity: self.file_capacity,
239 dir_capacity: self.dir_capacity,
240 cache: Arc::clone(&self.cache),
241 directory_cache: Arc::clone(&self.directory_cache),
242 }
243 }
244}
245
246#[cfg(test)]
247mod tests {
248 use super::*;
249 use crate::types::SemanticAnalysis;
250
251 #[test]
252 fn test_from_entries_skips_dirs() {
253 let dir = tempfile::tempdir().expect("tempdir");
255 let file = tempfile::NamedTempFile::new_in(dir.path()).expect("tempfile");
256 let file_path = file.path().to_path_buf();
257
258 let entries = vec![
259 WalkEntry {
260 path: dir.path().to_path_buf(),
261 depth: 0,
262 is_dir: true,
263 is_symlink: false,
264 symlink_target: None,
265 mtime: None,
266 canonical_path: PathBuf::new(),
267 },
268 WalkEntry {
269 path: file_path.clone(),
270 depth: 0,
271 is_dir: false,
272 is_symlink: false,
273 symlink_target: None,
274 mtime: None,
275 canonical_path: PathBuf::new(),
276 },
277 ];
278
279 let key = DirectoryCacheKey::from_entries(&entries, None, AnalysisMode::Overview, None);
281
282 assert_eq!(key.files.len(), 1);
285 assert_eq!(key.files[0].0, file_path);
286 }
287
288 #[test]
289 fn test_invalidate_file_single_mode() {
290 let cache = AnalysisCache::new(10);
292 let path = PathBuf::from("/test/file.rs");
293 let key = CacheKey {
294 path: path.clone(),
295 modified: SystemTime::UNIX_EPOCH,
296 mode: AnalysisMode::Overview,
297 };
298 let output = Arc::new(FileAnalysisOutput::new(
299 String::new(),
300 SemanticAnalysis::default(),
301 0,
302 None,
303 ));
304 cache.put(key.clone(), output);
305
306 cache.invalidate_file(&path);
308
309 assert!(cache.get(&key).is_none());
311 }
312
313 #[test]
314 fn test_invalidate_file_multi_mode() {
315 let cache = AnalysisCache::new(10);
317 let path = PathBuf::from("/test/file.rs");
318 let key1 = CacheKey {
319 path: path.clone(),
320 modified: SystemTime::UNIX_EPOCH,
321 mode: AnalysisMode::Overview,
322 };
323 let key2 = CacheKey {
324 path: path.clone(),
325 modified: SystemTime::UNIX_EPOCH,
326 mode: AnalysisMode::FileDetails,
327 };
328 let output = Arc::new(FileAnalysisOutput::new(
329 String::new(),
330 SemanticAnalysis::default(),
331 0,
332 None,
333 ));
334 cache.put(key1.clone(), output.clone());
335 cache.put(key2.clone(), output);
336
337 cache.invalidate_file(&path);
339
340 assert!(cache.get(&key1).is_none());
342 assert!(cache.get(&key2).is_none());
343 }
344
345 static DIR_CACHE_ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
347
348 #[test]
349 fn test_dir_cache_capacity_default() {
350 let _guard = DIR_CACHE_ENV_LOCK.lock().unwrap();
351
352 unsafe { std::env::remove_var("APTU_CODER_DIR_CACHE_CAPACITY") };
354
355 let cache = AnalysisCache::new(100);
357
358 assert_eq!(cache.dir_capacity, 20);
360 }
361
362 #[test]
363 fn test_dir_cache_capacity_from_env() {
364 let _guard = DIR_CACHE_ENV_LOCK.lock().unwrap();
365
366 unsafe { std::env::set_var("APTU_CODER_DIR_CACHE_CAPACITY", "7") };
368
369 let cache = AnalysisCache::new(100);
371
372 unsafe { std::env::remove_var("APTU_CODER_DIR_CACHE_CAPACITY") };
374
375 assert_eq!(cache.dir_capacity, 7);
377 }
378}
379
380const DISK_CACHE_DEGRADED_THRESHOLD: u64 = 3;
387
388pub struct DiskCache {
389 base: std::path::PathBuf,
390 disabled: bool,
391 write_failures: std::sync::atomic::AtomicU64,
393 total_write_failures: std::sync::atomic::AtomicU64,
395}
396
397impl DiskCache {
398 pub fn drain_write_failures(&self) -> u64 {
401 self.write_failures
402 .swap(0, std::sync::atomic::Ordering::Relaxed)
403 }
404
405 pub fn is_degraded(&self) -> bool {
408 self.total_write_failures
409 .load(std::sync::atomic::Ordering::Relaxed)
410 >= DISK_CACHE_DEGRADED_THRESHOLD
411 }
412}
413
414impl DiskCache {
415 pub fn new(base: std::path::PathBuf, disabled: bool) -> Self {
418 if disabled {
419 return Self {
420 base,
421 disabled: true,
422 write_failures: std::sync::atomic::AtomicU64::new(0),
423 total_write_failures: std::sync::atomic::AtomicU64::new(0),
424 };
425 }
426 if let Err(e) = std::fs::create_dir_all(&base) {
427 warn!(path = %base.display(), error = %e, "disk cache disabled: failed to create cache directory");
428 return Self {
429 base,
430 disabled: true,
431 write_failures: std::sync::atomic::AtomicU64::new(0),
432 total_write_failures: std::sync::atomic::AtomicU64::new(0),
433 };
434 }
435 if let Err(e) = std::fs::set_permissions(&base, std::fs::Permissions::from_mode(0o700)) {
436 warn!(path = %base.display(), error = %e, "disk cache: failed to set directory permissions to 0700");
437 }
438 Self {
439 base,
440 disabled: false,
441 write_failures: std::sync::atomic::AtomicU64::new(0),
442 total_write_failures: std::sync::atomic::AtomicU64::new(0),
443 }
444 }
445
446 pub fn entry_path(&self, tool: &str, key: &blake3::Hash) -> std::path::PathBuf {
447 let hex = format!("{}", key);
448 self.base
449 .join(tool)
450 .join(&hex[..2])
451 .join(format!("{}.json.snap", hex))
452 }
453
454 pub fn get<T: DeserializeOwned>(&self, tool: &str, key: &blake3::Hash) -> Option<T> {
456 if self.disabled {
457 return None;
458 }
459 let path = self.entry_path(tool, key);
460 let compressed = match std::fs::read(&path) {
461 Ok(b) => b,
462 Err(_) => return None,
463 };
464 let bytes = match snap::raw::Decoder::new().decompress_vec(&compressed) {
465 Ok(b) => b,
466 Err(e) => {
467 debug!(tool, error = %e, "disk cache decompression failed");
468 return None;
469 }
470 };
471 match serde_json::from_slice(&bytes) {
472 Ok(v) => Some(v),
473 Err(e) => {
474 debug!(tool, error = %e, "disk cache deserialization failed");
475 None
476 }
477 }
478 }
479
480 fn serialize_entry<T: Serialize>(value: &T) -> Option<Vec<u8>> {
482 let bytes = serde_json::to_vec(value).ok()?;
483 snap::raw::Encoder::new().compress_vec(&bytes).ok()
484 }
485
486 fn write_entry_atomically(
489 dir: &std::path::Path,
490 path: &std::path::Path,
491 compressed: &[u8],
492 ) -> Result<(), std::io::Error> {
493 use std::io::Write;
494 let mut tmp = NamedTempFile::new_in(dir)?;
495 tmp.write_all(compressed)?;
496 tmp.persist(path).map(|_| ()).map_err(|e| e.error)
497 }
498
499 pub fn put<T: Serialize>(&self, tool: &str, key: &blake3::Hash, value: &T) {
501 if self.disabled {
502 return;
503 }
504 let path = self.entry_path(tool, key);
505 let dir = match path.parent() {
506 Some(d) => d.to_path_buf(),
507 None => return,
508 };
509 if let Err(e) = std::fs::create_dir_all(&dir) {
510 warn!(tool, error = %e, "disk cache: failed to create cache directory");
511 self.record_write_failure();
512 return;
513 }
514 let compressed = match Self::serialize_entry(value) {
515 Some(c) => c,
516 None => return,
517 };
518 if Self::write_entry_atomically(&dir, &path, &compressed)
519 .ok()
520 .is_none()
521 {
522 self.record_write_failure();
523 }
524 }
525
526 fn record_write_failure(&self) {
530 self.write_failures
531 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
532 let total = self
533 .total_write_failures
534 .fetch_add(1, std::sync::atomic::Ordering::Relaxed)
535 + 1;
536 if total == DISK_CACHE_DEGRADED_THRESHOLD {
537 error!(
538 path = %self.base.display(),
539 total,
540 threshold = DISK_CACHE_DEGRADED_THRESHOLD,
541 "disk cache is degraded: consecutive write failures have reached the alert threshold; \
542 check disk space and permissions at the cache directory"
543 );
544 }
545 }
546
547 pub fn evict_stale(&self, retention_days: u64) {
549 if self.disabled {
550 return;
551 }
552 let cutoff = std::time::SystemTime::now()
553 .checked_sub(std::time::Duration::from_secs(retention_days * 86_400))
554 .unwrap_or(std::time::UNIX_EPOCH);
555 let _ = evict_dir_recursive(&self.base, cutoff);
556 }
557}
558
559fn evict_dir_recursive(
560 dir: &std::path::Path,
561 cutoff: std::time::SystemTime,
562) -> std::io::Result<()> {
563 for entry in std::fs::read_dir(dir)? {
564 let entry = entry?;
565 let meta = entry.metadata()?;
566 let path = entry.path();
567 if meta.is_dir() {
568 let _ = evict_dir_recursive(&path, cutoff);
569 } else if meta.is_file()
570 && let Ok(mtime) = meta.modified()
571 && mtime < cutoff
572 {
573 let _ = std::fs::remove_file(&path);
574 }
575 }
576 Ok(())
577}
578
579#[cfg(test)]
580mod disk_cache_tests {
581 use super::*;
582 use tempfile::TempDir;
583
584 #[test]
585 fn test_disk_cache_roundtrip() {
586 let dir = TempDir::new().unwrap();
587 let cache1 = DiskCache::new(dir.path().to_path_buf(), false);
588 let key = blake3::hash(b"test-key");
589 let value = serde_json::json!({"result": "hello", "count": 42});
590 cache1.put("analyze_file", &key, &value);
591 let cache2 = DiskCache::new(dir.path().to_path_buf(), false);
592 let result: Option<serde_json::Value> = cache2.get("analyze_file", &key);
593 assert_eq!(result, Some(value));
594 }
595
596 #[test]
597 fn test_disk_cache_permissions() {
598 use std::os::unix::fs::PermissionsExt;
599 let dir = TempDir::new().unwrap();
600 let cache_dir = dir.path().join("analysis-cache");
601 let _cache = DiskCache::new(cache_dir.clone(), false);
602 let meta = std::fs::metadata(&cache_dir).unwrap();
603 let mode = meta.permissions().mode() & 0o777;
604 assert_eq!(mode, 0o700, "cache dir must be mode 0700");
605 }
606
607 #[test]
608 fn test_disk_cache_corrupt_entry_returns_none() {
609 let dir = TempDir::new().unwrap();
610 let cache = DiskCache::new(dir.path().to_path_buf(), false);
611 let key = blake3::hash(b"corrupt-key");
612 let path = cache.entry_path("analyze_file", &key);
613 std::fs::create_dir_all(path.parent().unwrap()).unwrap();
614 std::fs::write(&path, b"not valid snappy data").unwrap();
615 let result: Option<serde_json::Value> = cache.get("analyze_file", &key);
616 assert!(result.is_none(), "corrupt entry must return None");
617 }
618
619 #[test]
620 fn test_disk_cache_disabled_on_dir_creation_failure() {
621 let dir = TempDir::new().unwrap();
622 let blocked = dir.path().join("blocked");
625 std::fs::write(&blocked, b"").unwrap();
626 let cache = DiskCache::new(blocked, false);
627 let key = blake3::hash(b"should-not-exist");
629 cache.put("analyze_file", &key, &serde_json::json!({"x": 1}));
630 let result: Option<serde_json::Value> = cache.get("analyze_file", &key);
631 assert!(
632 result.is_none(),
633 "cache must be disabled after dir creation failure"
634 );
635 assert!(
636 cache.disabled,
637 "disabled flag must be true after dir creation failure"
638 );
639 }
640}