Skip to main content

skilllite_core/
scan_cache.rs

1//! A3: LLM admission scan result cache.
2//!
3//! Persists scan results to ~/.skilllite/scan-cache.json. Key = content_hash (SHA256 of
4//! skill_md + script_samples). Same hash within TTL avoids redundant LLM calls.
5//!
6//! ## Concurrency safety
7//!
8//! `put_cached` uses a write-to-temp-then-rename strategy so that concurrent
9//! processes never see a partially-written (corrupt) JSON file.  On POSIX,
10//! `rename(2)` is atomic: the destination path atomically switches from the
11//! old content to the new content.  If two processes write at the same time
12//! the last rename wins (last-writer-wins), which is acceptable for a cache —
13//! the losing entry will simply be recomputed on the next miss.
14
15use anyhow::Result;
16use serde::{Deserialize, Serialize};
17use sha2::{Digest, Sha256};
18use std::collections::HashMap;
19use std::fs;
20use std::path::PathBuf;
21use std::time::{SystemTime, UNIX_EPOCH};
22
23const CACHE_TTL_SECS: u64 = 300;
24const CACHE_FILENAME: &str = "scan-cache.json";
25
26#[derive(Debug, Clone, Serialize, Deserialize)]
27struct CachedEntry {
28    risk: String,
29    reason: String,
30    timestamp: u64,
31}
32
33fn cache_path() -> PathBuf {
34    crate::paths::data_root().join(CACHE_FILENAME)
35}
36
37/// Compute SHA256 hash of content for cache key.
38pub fn content_hash(skill_md: &str, script_samples: &str) -> String {
39    let mut hasher = Sha256::new();
40    hasher.update(skill_md.as_bytes());
41    hasher.update(script_samples.as_bytes());
42    hex::encode(hasher.finalize())
43}
44
45/// Look up cached LLM admission result. Returns (risk, reason) if found and not expired.
46pub fn get_cached(content_hash: &str) -> Result<Option<(String, String)>> {
47    let path = cache_path();
48    if !path.exists() {
49        return Ok(None);
50    }
51    let content = fs::read_to_string(&path).map_err(|e| anyhow::anyhow!("read cache: {}", e))?;
52    let map: HashMap<String, CachedEntry> = serde_json::from_str(&content).unwrap_or_default();
53    let now = SystemTime::now()
54        .duration_since(UNIX_EPOCH)
55        .unwrap_or_default()
56        .as_secs();
57    if let Some(entry) = map.get(content_hash) {
58        if now.saturating_sub(entry.timestamp) < CACHE_TTL_SECS {
59            return Ok(Some((entry.risk.clone(), entry.reason.clone())));
60        }
61    }
62    Ok(None)
63}
64
65/// Store LLM admission result in cache.
66///
67/// Uses an atomic write (temp file + rename) to prevent concurrent processes
68/// from producing a partially-written / corrupt cache file.
69pub fn put_cached(content_hash: &str, risk: &str, reason: &str) -> Result<()> {
70    let path = cache_path();
71    let parent = path.parent().unwrap_or(path.as_path());
72    if !parent.exists() {
73        fs::create_dir_all(parent)?;
74    }
75    let now = SystemTime::now()
76        .duration_since(UNIX_EPOCH)
77        .unwrap_or_default()
78        .as_secs();
79    let mut map: HashMap<String, CachedEntry> = if path.exists() {
80        let content = fs::read_to_string(&path).unwrap_or_default();
81        serde_json::from_str(&content).unwrap_or_default()
82    } else {
83        HashMap::new()
84    };
85    // Evict expired entries before adding
86    map.retain(|_, e| now.saturating_sub(e.timestamp) < CACHE_TTL_SECS);
87    map.insert(
88        content_hash.to_string(),
89        CachedEntry {
90            risk: risk.to_string(),
91            reason: reason.to_string(),
92            timestamp: now,
93        },
94    );
95    let content = serde_json::to_string_pretty(&map)?;
96
97    // Atomic write: write to a per-process temp file, then rename.
98    // rename(2) is atomic on POSIX — readers never see a partial write.
99    let tmp_path = path.with_extension(format!("tmp.{}", std::process::id()));
100    fs::write(&tmp_path, content.as_bytes())
101        .map_err(|e| anyhow::anyhow!("write scan-cache tmp: {}", e))?;
102    if let Err(e) = fs::rename(&tmp_path, &path) {
103        // Best-effort cleanup of the temp file; ignore the secondary error.
104        let _ = fs::remove_file(&tmp_path);
105        return Err(anyhow::anyhow!("atomic rename scan-cache: {}", e));
106    }
107    Ok(())
108}
109
110#[cfg(test)]
111mod tests {
112    use super::*;
113
114    #[test]
115    fn test_content_hash_deterministic() {
116        let h1 = content_hash("skill a", "script b");
117        let h2 = content_hash("skill a", "script b");
118        assert_eq!(h1, h2);
119        assert_eq!(h1.len(), 64); // SHA256 hex
120    }
121
122    #[test]
123    fn test_content_hash_different_inputs() {
124        let h1 = content_hash("a", "b");
125        let h2 = content_hash("a", "c");
126        assert_ne!(h1, h2);
127    }
128
129    #[test]
130    fn test_cache_roundtrip() {
131        let hash = "test_hash_123";
132        put_cached(hash, "suspicious", "test reason").unwrap();
133        let cached = get_cached(hash).unwrap();
134        assert!(cached.is_some());
135        let (risk, reason) = cached.unwrap();
136        assert_eq!(risk, "suspicious");
137        assert_eq!(reason, "test reason");
138    }
139}