Skip to main content

aptu_core/
cache.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! TTL-based file caching for GitHub API responses.
4//!
5//! Stores issue and repository data as JSON files with embedded metadata
6//! (timestamp, optional etag). Cache entries are validated against TTL settings
7//! from configuration.
8
9// `async_yields_async` is suppressed because the FileCache trait uses async fn (RPITIT,
10// stable in Rust 1.95 / edition 2024). The trait is intentionally crate-internal
11// (not part of the public API) and is never used as `dyn FileCache`, so the lint
12// warning is a false positive. There is no plan to expose this trait publicly.
13#![allow(clippy::async_yields_async)]
14
15use std::path::PathBuf;
16use std::sync::OnceLock;
17
18use anyhow::{Context, Result};
19use chrono::{DateTime, Duration, Utc};
20use serde::{Deserialize, Serialize};
21use tracing::{debug, warn};
22
23/// Ensures the cache unavailable warning is only emitted once.
24static CACHE_UNAVAILABLE_WARNING: OnceLock<()> = OnceLock::new();
25
26/// Default TTL for issue cache entries (in minutes).
27pub const DEFAULT_ISSUE_TTL_MINS: i64 = 60;
28
29/// Default TTL for repository cache entries (in hours).
30pub const DEFAULT_REPO_TTL_HOURS: i64 = 24;
31
32/// Default TTL for model registry cache entries (in seconds).
33pub const DEFAULT_MODEL_TTL_SECS: u64 = 86400;
34
35/// Default TTL for security finding cache entries (in days).
36pub const DEFAULT_SECURITY_TTL_DAYS: i64 = 7;
37
38/// A cached entry with metadata.
39///
40/// Wraps cached data with timestamp and optional etag for validation.
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct CacheEntry<T> {
43    /// The cached data.
44    pub data: T,
45    /// When the entry was cached.
46    pub cached_at: DateTime<Utc>,
47    /// Optional `ETag` for future conditional requests.
48    #[serde(skip_serializing_if = "Option::is_none")]
49    pub etag: Option<String>,
50}
51
52impl<T> CacheEntry<T> {
53    /// Create a new cache entry.
54    pub fn new(data: T) -> Self {
55        Self {
56            data,
57            cached_at: Utc::now(),
58            etag: None,
59        }
60    }
61
62    /// Create a new cache entry with an etag.
63    pub fn with_etag(data: T, etag: String) -> Self {
64        Self {
65            data,
66            cached_at: Utc::now(),
67            etag: Some(etag),
68        }
69    }
70
71    /// Check if this entry is still valid based on TTL.
72    ///
73    /// # Arguments
74    ///
75    /// * `ttl` - Time-to-live duration
76    ///
77    /// # Returns
78    ///
79    /// `true` if the entry is within its TTL, `false` if expired.
80    pub fn is_valid(&self, ttl: Duration) -> bool {
81        let now = Utc::now();
82        now.signed_duration_since(self.cached_at) < ttl
83    }
84}
85
86/// Returns the cache directory.
87///
88/// - Linux: `~/.cache/aptu`
89/// - macOS: `~/Library/Caches/aptu`
90/// - Windows: `C:\Users\<User>\AppData\Local\aptu`
91///
92/// Returns `None` if the cache directory cannot be determined.
93#[must_use]
94pub fn cache_dir() -> Option<PathBuf> {
95    dirs::cache_dir().map(|dir| dir.join("aptu"))
96}
97
98/// Trait for TTL-based filesystem caching.
99///
100/// Provides a unified interface for caching serializable data with time-to-live validation.
101///
102/// `async_fn_in_trait` is suppressed because this trait is re-exported for use by crate
103/// consumers but is never intended to be implemented externally or used as `dyn FileCache`.
104/// All known implementors are in this crate, so auto-trait bounds are not a concern.
105#[allow(async_fn_in_trait)]
106pub trait FileCache<V> {
107    /// Get a cached value if it exists and is valid.
108    ///
109    /// # Arguments
110    ///
111    /// * `key` - Cache key (filename without extension)
112    ///
113    /// # Returns
114    ///
115    /// The cached value if it exists and is within TTL, `None` otherwise.
116    async fn get(&self, key: &str) -> Result<Option<V>>;
117
118    /// Get a cached value regardless of TTL (stale fallback).
119    ///
120    /// # Arguments
121    ///
122    /// * `key` - Cache key (filename without extension)
123    ///
124    /// # Returns
125    ///
126    /// The cached value if it exists, `None` otherwise.
127    async fn get_stale(&self, key: &str) -> Result<Option<V>>;
128
129    /// Set a cached value.
130    ///
131    /// # Arguments
132    ///
133    /// * `key` - Cache key (filename without extension)
134    /// * `value` - Value to cache
135    async fn set(&self, key: &str, value: &V) -> Result<()>;
136
137    /// Remove a cached value.
138    ///
139    /// # Arguments
140    ///
141    /// * `key` - Cache key (filename without extension)
142    async fn remove(&self, key: &str) -> Result<()>;
143}
144
145/// File-based cache implementation with TTL support.
146///
147/// Stores serialized data in JSON files with embedded metadata.
148/// When cache directory is unavailable (None), all operations become no-ops.
149pub struct FileCacheImpl<V> {
150    cache_dir: Option<PathBuf>,
151    ttl: Duration,
152    subdirectory: String,
153    _phantom: std::marker::PhantomData<V>,
154}
155
156impl<V> FileCacheImpl<V>
157where
158    V: Serialize + for<'de> Deserialize<'de>,
159{
160    /// Create a new file cache with default cache directory.
161    ///
162    /// # Arguments
163    ///
164    /// * `subdirectory` - Subdirectory within cache directory
165    /// * `ttl` - Time-to-live for cache entries
166    ///
167    /// If the cache directory cannot be determined, caching is disabled
168    /// and a warning is emitted.
169    #[must_use]
170    pub fn new(subdirectory: impl Into<String>, ttl: Duration) -> Self {
171        let cache_dir = cache_dir();
172        if cache_dir.is_none() {
173            CACHE_UNAVAILABLE_WARNING.get_or_init(|| {
174                warn!("Cache directory unavailable, caching disabled");
175            });
176        }
177        Self::with_dir(cache_dir, subdirectory, ttl)
178    }
179
180    /// Create a new file cache with custom cache directory.
181    ///
182    /// # Arguments
183    ///
184    /// * `cache_dir` - Base cache directory (None to disable caching)
185    /// * `subdirectory` - Subdirectory within cache directory
186    /// * `ttl` - Time-to-live for cache entries
187    #[must_use]
188    pub fn with_dir(
189        cache_dir: Option<PathBuf>,
190        subdirectory: impl Into<String>,
191        ttl: Duration,
192    ) -> Self {
193        Self {
194            cache_dir,
195            ttl,
196            subdirectory: subdirectory.into(),
197            _phantom: std::marker::PhantomData,
198        }
199    }
200
201    /// Check if caching is enabled.
202    fn is_enabled(&self) -> bool {
203        self.cache_dir.is_some()
204    }
205
206    /// Get the full path for a cache key.
207    ///
208    /// # Panics
209    ///
210    /// Panics if the key contains path separators or parent directory references,
211    /// which could lead to path traversal vulnerabilities.
212    fn cache_path(&self, key: &str) -> Option<PathBuf> {
213        // Validate key to prevent path traversal
214        assert!(
215            !key.contains('/') && !key.contains('\\') && !key.contains(".."),
216            "cache key must not contain path separators or '..': {key}"
217        );
218
219        let filename = if std::path::Path::new(key)
220            .extension()
221            .is_some_and(|ext| ext.eq_ignore_ascii_case("json"))
222        {
223            key.to_string()
224        } else {
225            format!("{key}.json")
226        };
227        self.cache_dir
228            .as_ref()
229            .map(|dir| dir.join(&self.subdirectory).join(filename))
230    }
231
232    /// Evict cache files older than the specified TTL.
233    ///
234    /// Scans the cache subdirectory and removes files with `cached_at` timestamps
235    /// older than `eviction_days`. Returns the count of files removed.
236    ///
237    /// # Arguments
238    ///
239    /// * `eviction_days` - Number of days to retain files
240    ///
241    /// # Returns
242    ///
243    /// The number of files evicted.
244    pub async fn evict_stale(&self, eviction_days: i64) -> usize {
245        if !self.is_enabled() {
246            return 0;
247        }
248
249        let Some(cache_dir) = &self.cache_dir else {
250            return 0;
251        };
252
253        let subdir = cache_dir.join(&self.subdirectory);
254
255        // Check if subdirectory exists
256        if !tokio::fs::try_exists(&subdir).await.unwrap_or(false) {
257            return 0;
258        }
259
260        let Ok(mut read_dir) = tokio::fs::read_dir(&subdir).await else {
261            return 0;
262        };
263
264        let mut evicted_count = 0;
265        let cutoff_time = Utc::now() - Duration::days(eviction_days);
266
267        while let Ok(Some(entry)) = read_dir.next_entry().await {
268            let path = entry.path();
269
270            // Only process .json files
271            if !path
272                .extension()
273                .is_some_and(|ext| ext.eq_ignore_ascii_case("json"))
274            {
275                continue;
276            }
277
278            let Ok(contents) = tokio::fs::read_to_string(&path).await else {
279                continue;
280            };
281
282            let Ok(entry_data) = serde_json::from_str::<CacheEntry<serde_json::Value>>(&contents)
283            else {
284                continue;
285            };
286
287            if entry_data.cached_at < cutoff_time && tokio::fs::remove_file(&path).await.is_ok() {
288                debug!("Evicted stale cache file: {}", path.display());
289                evicted_count += 1;
290            }
291        }
292
293        evicted_count
294    }
295}
296
297impl<V> FileCache<V> for FileCacheImpl<V>
298where
299    V: Serialize + for<'de> Deserialize<'de>,
300{
301    async fn get(&self, key: &str) -> Result<Option<V>> {
302        if !self.is_enabled() {
303            return Ok(None);
304        }
305
306        let Some(path) = self.cache_path(key) else {
307            return Ok(None);
308        };
309
310        if !tokio::fs::try_exists(&path)
311            .await
312            .with_context(|| format!("Failed to check cache file: {}", path.display()))?
313        {
314            return Ok(None);
315        }
316
317        let contents = tokio::fs::read_to_string(&path)
318            .await
319            .with_context(|| format!("Failed to read cache file: {}", path.display()))?;
320
321        let entry: CacheEntry<V> = serde_json::from_str(&contents)
322            .with_context(|| format!("Failed to parse cache file: {}", path.display()))?;
323
324        if entry.is_valid(self.ttl) {
325            Ok(Some(entry.data))
326        } else {
327            Ok(None)
328        }
329    }
330
331    async fn get_stale(&self, key: &str) -> Result<Option<V>> {
332        if !self.is_enabled() {
333            return Ok(None);
334        }
335
336        let Some(path) = self.cache_path(key) else {
337            return Ok(None);
338        };
339
340        if !tokio::fs::try_exists(&path)
341            .await
342            .with_context(|| format!("Failed to check cache file: {}", path.display()))?
343        {
344            return Ok(None);
345        }
346
347        let contents = tokio::fs::read_to_string(&path)
348            .await
349            .with_context(|| format!("Failed to read cache file: {}", path.display()))?;
350
351        let entry: CacheEntry<V> = serde_json::from_str(&contents)
352            .with_context(|| format!("Failed to parse cache file: {}", path.display()))?;
353
354        Ok(Some(entry.data))
355    }
356
357    async fn set(&self, key: &str, value: &V) -> Result<()> {
358        if !self.is_enabled() {
359            return Ok(());
360        }
361
362        let Some(path) = self.cache_path(key) else {
363            return Ok(());
364        };
365
366        // Create parent directories if needed
367        if let Some(parent) = path.parent() {
368            tokio::fs::create_dir_all(parent).await.with_context(|| {
369                format!("Failed to create cache directory: {}", parent.display())
370            })?;
371        }
372
373        let entry = CacheEntry::new(value);
374        let contents =
375            serde_json::to_string_pretty(&entry).context("Failed to serialize cache entry")?;
376
377        // Atomic write: write to temp file, then rename
378        let temp_path = path.with_extension("tmp");
379        tokio::fs::write(&temp_path, contents)
380            .await
381            .with_context(|| format!("Failed to write cache temp file: {}", temp_path.display()))?;
382
383        tokio::fs::rename(&temp_path, &path)
384            .await
385            .with_context(|| format!("Failed to rename cache file: {}", path.display()))?;
386
387        Ok(())
388    }
389
390    async fn remove(&self, key: &str) -> Result<()> {
391        if !self.is_enabled() {
392            return Ok(());
393        }
394
395        let Some(path) = self.cache_path(key) else {
396            return Ok(());
397        };
398
399        if tokio::fs::try_exists(&path)
400            .await
401            .with_context(|| format!("Failed to check cache file: {}", path.display()))?
402        {
403            tokio::fs::remove_file(&path)
404                .await
405                .with_context(|| format!("Failed to remove cache file: {}", path.display()))?;
406        }
407        Ok(())
408    }
409}
410
411#[cfg(test)]
412mod tests {
413    use super::*;
414
415    #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
416    struct TestData {
417        value: String,
418        count: u32,
419    }
420
421    #[test]
422    fn test_cache_entry_new() {
423        let data = TestData {
424            value: "test".to_string(),
425            count: 42,
426        };
427        let entry = CacheEntry::new(data.clone());
428
429        assert_eq!(entry.data, data);
430        assert!(entry.etag.is_none());
431    }
432
433    #[test]
434    fn test_cache_entry_with_etag() {
435        let data = TestData {
436            value: "test".to_string(),
437            count: 42,
438        };
439        let etag = "abc123".to_string();
440        let entry = CacheEntry::with_etag(data.clone(), etag.clone());
441
442        assert_eq!(entry.data, data);
443        assert_eq!(entry.etag, Some(etag));
444    }
445
446    #[test]
447    fn test_cache_entry_is_valid_within_ttl() {
448        let data = TestData {
449            value: "test".to_string(),
450            count: 42,
451        };
452        let entry = CacheEntry::new(data);
453        let ttl = Duration::hours(1);
454
455        assert!(entry.is_valid(ttl));
456    }
457
458    #[test]
459    fn test_cache_entry_is_valid_expired() {
460        let data = TestData {
461            value: "test".to_string(),
462            count: 42,
463        };
464        let mut entry = CacheEntry::new(data);
465        // Manually set cached_at to 2 hours ago
466        entry.cached_at = Utc::now() - Duration::hours(2);
467        let ttl = Duration::hours(1);
468
469        assert!(!entry.is_valid(ttl));
470    }
471
472    #[test]
473    fn test_cache_dir_path() {
474        let dir = cache_dir();
475        assert!(dir.is_some());
476        assert!(dir.unwrap().ends_with("aptu"));
477    }
478
479    #[test]
480    fn test_cache_serialization_with_etag() {
481        let data = TestData {
482            value: "test".to_string(),
483            count: 42,
484        };
485        let etag = "xyz789".to_string();
486        let entry = CacheEntry::with_etag(data.clone(), etag.clone());
487
488        let json = serde_json::to_string(&entry).expect("serialize");
489        let parsed: CacheEntry<TestData> = serde_json::from_str(&json).expect("deserialize");
490
491        assert_eq!(parsed.data, data);
492        assert_eq!(parsed.etag, Some(etag));
493    }
494
495    #[tokio::test]
496    async fn test_file_cache_get_set() {
497        let cache: FileCacheImpl<TestData> = FileCacheImpl::new("test_cache", Duration::hours(1));
498        let data = TestData {
499            value: "test".to_string(),
500            count: 42,
501        };
502
503        // Set value
504        cache.set("test_key", &data).await.expect("set cache");
505
506        // Get value
507        let result = cache.get("test_key").await.expect("get cache");
508        assert!(result.is_some());
509        assert_eq!(result.unwrap(), data);
510
511        // Cleanup
512        cache.remove("test_key").await.ok();
513    }
514
515    #[tokio::test]
516    async fn test_file_cache_get_miss() {
517        let cache: FileCacheImpl<TestData> = FileCacheImpl::new("test_cache", Duration::hours(1));
518
519        let result = cache.get("nonexistent").await.expect("get cache");
520        assert!(result.is_none());
521    }
522
523    #[tokio::test]
524    async fn test_file_cache_get_stale() {
525        let cache: FileCacheImpl<TestData> = FileCacheImpl::new("test_cache", Duration::seconds(0));
526        let data = TestData {
527            value: "stale".to_string(),
528            count: 99,
529        };
530
531        // Set value
532        cache.set("stale_key", &data).await.expect("set cache");
533
534        // Wait for TTL to expire
535        tokio::time::sleep(std::time::Duration::from_millis(10)).await;
536
537        // get() should return None (expired)
538        let result = cache.get("stale_key").await.expect("get cache");
539        assert!(result.is_none());
540
541        // get_stale() should return the value
542        let stale_result = cache.get_stale("stale_key").await.expect("get stale cache");
543        assert!(stale_result.is_some());
544        assert_eq!(stale_result.unwrap(), data);
545
546        // Cleanup
547        cache.remove("stale_key").await.ok();
548    }
549
550    #[tokio::test]
551    async fn test_file_cache_remove() {
552        let cache: FileCacheImpl<TestData> = FileCacheImpl::new("test_cache", Duration::hours(1));
553        let data = TestData {
554            value: "remove_me".to_string(),
555            count: 1,
556        };
557
558        // Set value
559        cache.set("remove_key", &data).await.expect("set cache");
560
561        // Verify it exists
562        assert!(cache.get("remove_key").await.expect("get cache").is_some());
563
564        // Remove it
565        cache.remove("remove_key").await.expect("remove cache");
566
567        // Verify it's gone
568        assert!(cache.get("remove_key").await.expect("get cache").is_none());
569    }
570
571    #[tokio::test]
572    #[should_panic(expected = "cache key must not contain path separators")]
573    async fn test_cache_key_rejects_forward_slash() {
574        let cache: FileCacheImpl<TestData> = FileCacheImpl::new("test_cache", Duration::hours(1));
575        let _ = cache.get("../etc/passwd").await;
576    }
577
578    #[tokio::test]
579    #[should_panic(expected = "cache key must not contain path separators")]
580    async fn test_cache_key_rejects_backslash() {
581        let cache: FileCacheImpl<TestData> = FileCacheImpl::new("test_cache", Duration::hours(1));
582        let _ = cache.get("..\\windows\\system32").await;
583    }
584
585    #[tokio::test]
586    #[should_panic(expected = "cache key must not contain path separators")]
587    async fn test_cache_key_rejects_parent_dir() {
588        let cache: FileCacheImpl<TestData> = FileCacheImpl::new("test_cache", Duration::hours(1));
589        let _ = cache.get("foo..bar").await;
590    }
591
592    #[tokio::test]
593    async fn test_disabled_cache_get_returns_none() {
594        let cache: FileCacheImpl<TestData> =
595            FileCacheImpl::with_dir(None, "test_cache", Duration::hours(1));
596        let result = cache.get("any_key").await.expect("get should succeed");
597        assert!(result.is_none());
598    }
599
600    #[tokio::test]
601    async fn test_disabled_cache_set_succeeds_silently() {
602        let cache: FileCacheImpl<TestData> =
603            FileCacheImpl::with_dir(None, "test_cache", Duration::hours(1));
604        let data = TestData {
605            value: "test".to_string(),
606            count: 42,
607        };
608        cache
609            .set("any_key", &data)
610            .await
611            .expect("set should succeed");
612    }
613
614    #[tokio::test]
615    async fn test_disabled_cache_remove_succeeds_silently() {
616        let cache: FileCacheImpl<TestData> =
617            FileCacheImpl::with_dir(None, "test_cache", Duration::hours(1));
618        cache
619            .remove("any_key")
620            .await
621            .expect("remove should succeed");
622    }
623
624    #[tokio::test]
625    async fn test_disabled_cache_get_stale_returns_none() {
626        let cache: FileCacheImpl<TestData> =
627            FileCacheImpl::with_dir(None, "test_cache", Duration::hours(1));
628        let result = cache
629            .get_stale("any_key")
630            .await
631            .expect("get_stale should succeed");
632        assert!(result.is_none());
633    }
634
635    #[tokio::test]
636    async fn test_evict_stale_removes_old_files() {
637        let cache: FileCacheImpl<TestData> = FileCacheImpl::new("test_evict", Duration::hours(1));
638        let data = TestData {
639            value: "old".to_string(),
640            count: 1,
641        };
642
643        // Set a value
644        cache.set("old_key", &data).await.expect("set cache");
645
646        // Manually modify the cached_at timestamp to be old
647        if let Some(path) = cache.cache_path("old_key") {
648            let contents = tokio::fs::read_to_string(&path)
649                .await
650                .expect("read cache file");
651            let mut entry: CacheEntry<TestData> =
652                serde_json::from_str(&contents).expect("parse cache entry");
653            entry.cached_at = Utc::now() - Duration::days(10);
654            let new_contents = serde_json::to_string_pretty(&entry).expect("serialize cache entry");
655            tokio::fs::write(&path, new_contents)
656                .await
657                .expect("write cache file");
658        }
659
660        // Evict files older than 7 days
661        let evicted = cache.evict_stale(7).await;
662        assert_eq!(evicted, 1);
663
664        // Verify the file is gone
665        let result = cache.get("old_key").await.expect("get cache");
666        assert!(result.is_none());
667    }
668
669    #[tokio::test]
670    async fn test_evict_stale_preserves_fresh_files() {
671        let cache: FileCacheImpl<TestData> =
672            FileCacheImpl::new("test_evict_fresh", Duration::hours(1));
673        let data = TestData {
674            value: "fresh".to_string(),
675            count: 2,
676        };
677
678        // Set a value
679        cache.set("fresh_key", &data).await.expect("set cache");
680
681        // Evict files older than 7 days (this file is fresh, so it should be preserved)
682        let evicted = cache.evict_stale(7).await;
683        assert_eq!(evicted, 0);
684
685        // Verify the file still exists
686        let result = cache.get("fresh_key").await.expect("get cache");
687        assert!(result.is_some());
688        assert_eq!(result.unwrap(), data);
689
690        // Cleanup
691        cache.remove("fresh_key").await.ok();
692    }
693}