Skip to main content

aptu_core/security/
cache.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Security finding cache for LLM validation results.
4//!
5//! Caches validated findings using SHA-256 hashes of (repo, file, pattern, snippet)
6//! to avoid redundant LLM calls for identical findings across scans.
7
8use anyhow::Result;
9use chrono::Duration;
10use serde::{Deserialize, Serialize};
11use sha2::{Digest, Sha256};
12use tracing::instrument;
13
14use crate::cache::{FileCache, FileCacheImpl};
15
16use super::ValidatedFinding;
17
18/// A cached security finding with validation result.
19#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
20pub struct CachedFinding {
21    /// The validated finding.
22    pub validated: ValidatedFinding,
23}
24
25impl CachedFinding {
26    /// Create a new cached finding.
27    #[must_use]
28    pub fn new(validated: ValidatedFinding) -> Self {
29        Self { validated }
30    }
31}
32
33/// Generate a cache key for a security finding.
34///
35/// Creates a SHA-256 hash of the concatenated components:
36/// `{repo_owner}/{repo_name}:{file_path}:{pattern_id}:{matched_text}`
37///
38/// Uses incremental hashing to avoid allocating a large intermediate string,
39/// which is more memory-efficient when `matched_text` contains large code snippets.
40///
41/// This ensures that identical findings across scans are cached,
42/// while different contexts (repo, file, pattern, or code) produce unique keys.
43///
44/// # Arguments
45///
46/// * `repo_owner` - Repository owner (e.g., "octocat")
47/// * `repo_name` - Repository name (e.g., "hello-world")
48/// * `file_path` - File path where finding was detected
49/// * `pattern_id` - Pattern ID that matched
50/// * `matched_text` - The matched code snippet
51///
52/// # Returns
53///
54/// A 64-character hexadecimal SHA-256 hash.
55#[must_use]
56pub fn cache_key(
57    repo_owner: &str,
58    repo_name: &str,
59    file_path: &str,
60    pattern_id: &str,
61    matched_text: &str,
62) -> String {
63    let mut hasher = Sha256::new();
64    hasher.update(repo_owner.as_bytes());
65    hasher.update(b"/");
66    hasher.update(repo_name.as_bytes());
67    hasher.update(b":");
68    hasher.update(file_path.as_bytes());
69    hasher.update(b":");
70    hasher.update(pattern_id.as_bytes());
71    hasher.update(b":");
72    hasher.update(matched_text.as_bytes());
73    format!("{:x}", hasher.finalize())
74}
75
76/// Cache for security finding validation results.
77///
78/// Wraps `FileCacheImpl` with a 7-day TTL for validated findings.
79/// Uses SHA-256 hashes as cache keys to ensure privacy and uniqueness.
80pub struct FindingCache {
81    cache: FileCacheImpl<CachedFinding>,
82}
83
84impl FindingCache {
85    /// Create a new finding cache with default settings.
86    ///
87    /// Uses a 7-day TTL and stores cache files in `~/.cache/aptu/security`.
88    #[must_use]
89    pub fn new() -> Self {
90        Self {
91            cache: FileCacheImpl::new(
92                "security",
93                Duration::days(crate::cache::DEFAULT_SECURITY_TTL_DAYS),
94            ),
95        }
96    }
97
98    /// Get a cached validated finding.
99    ///
100    /// # Arguments
101    ///
102    /// * `repo_owner` - Repository owner
103    /// * `repo_name` - Repository name
104    /// * `file_path` - File path where finding was detected
105    /// * `pattern_id` - Pattern ID that matched
106    /// * `matched_text` - The matched code snippet
107    ///
108    /// # Returns
109    ///
110    /// The cached validated finding if it exists and is within TTL, `None` otherwise.
111    #[instrument(skip(self, matched_text), fields(cache_key))]
112    pub fn get(
113        &self,
114        repo_owner: &str,
115        repo_name: &str,
116        file_path: &str,
117        pattern_id: &str,
118        matched_text: &str,
119    ) -> Result<Option<ValidatedFinding>> {
120        let key = cache_key(repo_owner, repo_name, file_path, pattern_id, matched_text);
121        tracing::Span::current().record("cache_key", &key);
122
123        self.cache
124            .get(&key)
125            .map(|opt| opt.map(|cached| cached.validated))
126    }
127
128    /// Set a cached validated finding.
129    ///
130    /// # Arguments
131    ///
132    /// * `repo_owner` - Repository owner
133    /// * `repo_name` - Repository name
134    /// * `file_path` - File path where finding was detected
135    /// * `pattern_id` - Pattern ID that matched
136    /// * `matched_text` - The matched code snippet
137    /// * `validated` - The validated finding to cache
138    #[instrument(skip(self, matched_text, validated), fields(cache_key))]
139    pub fn set(
140        &self,
141        repo_owner: &str,
142        repo_name: &str,
143        file_path: &str,
144        pattern_id: &str,
145        matched_text: &str,
146        validated: ValidatedFinding,
147    ) -> Result<()> {
148        let key = cache_key(repo_owner, repo_name, file_path, pattern_id, matched_text);
149        tracing::Span::current().record("cache_key", &key);
150
151        let cached = CachedFinding::new(validated);
152        self.cache.set(&key, &cached)
153    }
154}
155
156impl Default for FindingCache {
157    fn default() -> Self {
158        Self::new()
159    }
160}
161
162#[cfg(test)]
163mod tests {
164    use super::*;
165    use crate::security::{Confidence, Finding, Severity};
166
167    #[test]
168    fn test_cache_key_uniqueness() {
169        // Different repos should produce different keys
170        let key1 = cache_key("owner1", "repo1", "src/main.rs", "pattern1", "code");
171        let key2 = cache_key("owner2", "repo1", "src/main.rs", "pattern1", "code");
172        assert_ne!(key1, key2);
173
174        // Different files should produce different keys
175        let key3 = cache_key("owner1", "repo1", "src/lib.rs", "pattern1", "code");
176        assert_ne!(key1, key3);
177
178        // Different patterns should produce different keys
179        let key4 = cache_key("owner1", "repo1", "src/main.rs", "pattern2", "code");
180        assert_ne!(key1, key4);
181
182        // Different code should produce different keys
183        let key5 = cache_key("owner1", "repo1", "src/main.rs", "pattern1", "different");
184        assert_ne!(key1, key5);
185
186        // Identical inputs should produce identical keys
187        let key6 = cache_key("owner1", "repo1", "src/main.rs", "pattern1", "code");
188        assert_eq!(key1, key6);
189    }
190
191    #[test]
192    fn test_cache_key_format() {
193        let key = cache_key("owner", "repo", "file.rs", "pattern", "code");
194        // SHA-256 produces 64 hex characters
195        assert_eq!(key.len(), 64);
196        assert!(key.chars().all(|c| c.is_ascii_hexdigit()));
197    }
198
199    #[test]
200    fn test_cache_key_privacy() {
201        // Cache key should not contain sensitive data
202        let key = cache_key(
203            "owner",
204            "repo",
205            "config.rs",
206            "hardcoded-secret",
207            "api_key = \"sk-secret123\"",
208        );
209        assert!(!key.contains("secret"));
210        assert!(!key.contains("api_key"));
211        assert!(!key.contains("sk-"));
212    }
213
214    #[test]
215    fn test_finding_cache_hit() {
216        let cache = FindingCache::new();
217        let validated = ValidatedFinding {
218            finding: Finding {
219                pattern_id: "test-pattern".to_string(),
220                description: "Test finding".to_string(),
221                severity: Severity::High,
222                confidence: Confidence::Medium,
223                file_path: "src/test.rs".to_string(),
224                line_number: 42,
225                matched_text: "test code".to_string(),
226                cwe: None,
227            },
228            is_valid: true,
229            reasoning: "Test reasoning".to_string(),
230            model_version: Some("test-model".to_string()),
231        };
232
233        // Set cache
234        cache
235            .set(
236                "owner",
237                "repo",
238                "src/test.rs",
239                "test-pattern",
240                "test code",
241                validated.clone(),
242            )
243            .expect("set cache");
244
245        // Get cache hit
246        let result = cache
247            .get("owner", "repo", "src/test.rs", "test-pattern", "test code")
248            .expect("get cache");
249
250        assert!(result.is_some());
251        assert_eq!(result.unwrap(), validated);
252
253        // Cleanup
254        let key = cache_key("owner", "repo", "src/test.rs", "test-pattern", "test code");
255        cache.cache.remove(&key).ok();
256    }
257
258    #[test]
259    fn test_finding_cache_miss() {
260        let cache = FindingCache::new();
261
262        let result = cache
263            .get("owner", "repo", "src/nonexistent.rs", "pattern", "code")
264            .expect("get cache");
265
266        assert!(result.is_none());
267    }
268
269    #[test]
270    fn test_finding_cache_different_context() {
271        let cache = FindingCache::new();
272        let validated = ValidatedFinding {
273            finding: Finding {
274                pattern_id: "pattern".to_string(),
275                description: "Finding".to_string(),
276                severity: Severity::Medium,
277                confidence: Confidence::High,
278                file_path: "src/file.rs".to_string(),
279                line_number: 10,
280                matched_text: "code".to_string(),
281                cwe: None,
282            },
283            is_valid: false,
284            reasoning: "False positive".to_string(),
285            model_version: None,
286        };
287
288        // Set cache for one context
289        cache
290            .set(
291                "owner1",
292                "repo1",
293                "src/file.rs",
294                "pattern",
295                "code",
296                validated,
297            )
298            .expect("set cache");
299
300        // Different owner should miss
301        let result = cache
302            .get("owner2", "repo1", "src/file.rs", "pattern", "code")
303            .expect("get cache");
304        assert!(result.is_none());
305
306        // Cleanup
307        let key = cache_key("owner1", "repo1", "src/file.rs", "pattern", "code");
308        cache.cache.remove(&key).ok();
309    }
310
311    #[test]
312    fn test_cached_finding_serialization() {
313        let validated = ValidatedFinding {
314            finding: Finding {
315                pattern_id: "test".to_string(),
316                description: "Test".to_string(),
317                severity: Severity::Low,
318                confidence: Confidence::Low,
319                file_path: "test.rs".to_string(),
320                line_number: 1,
321                matched_text: "test".to_string(),
322                cwe: Some("CWE-123".to_string()),
323            },
324            is_valid: true,
325            reasoning: "Valid".to_string(),
326            model_version: Some("model-v1".to_string()),
327        };
328
329        let cached = CachedFinding::new(validated.clone());
330        let json = serde_json::to_string(&cached).expect("serialize");
331        let deserialized: CachedFinding = serde_json::from_str(&json).expect("deserialize");
332
333        assert_eq!(cached, deserialized);
334        assert_eq!(deserialized.validated, validated);
335    }
336}