Skip to main content

aptu_core/security/
cache.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Security finding cache for LLM validation results.
4//!
5//! Caches validated findings using SHA-256 hashes of (repo, file, pattern, snippet)
6//! to avoid redundant LLM calls for identical findings across scans.
7
8use anyhow::Result;
9use chrono::Duration;
10use hex;
11use serde::{Deserialize, Serialize};
12use sha2::{Digest, Sha256};
13use tracing::instrument;
14
15use crate::cache::{FileCache, FileCacheImpl};
16
17use super::ValidatedFinding;
18
19/// A cached security finding with validation result.
20#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
21pub struct CachedFinding {
22    /// The validated finding.
23    pub validated: ValidatedFinding,
24}
25
26impl CachedFinding {
27    /// Create a new cached finding.
28    #[must_use]
29    pub fn new(validated: ValidatedFinding) -> Self {
30        Self { validated }
31    }
32}
33
34/// Generate a cache key for a security finding.
35///
36/// Creates a SHA-256 hash of the concatenated components:
37/// `{repo_owner}/{repo_name}:{file_path}:{pattern_id}:{matched_text}`
38///
39/// Uses incremental hashing to avoid allocating a large intermediate string,
40/// which is more memory-efficient when `matched_text` contains large code snippets.
41///
42/// This ensures that identical findings across scans are cached,
43/// while different contexts (repo, file, pattern, or code) produce unique keys.
44///
45/// # Arguments
46///
47/// * `repo_owner` - Repository owner (e.g., "octocat")
48/// * `repo_name` - Repository name (e.g., "hello-world")
49/// * `file_path` - File path where finding was detected
50/// * `pattern_id` - Pattern ID that matched
51/// * `matched_text` - The matched code snippet
52///
53/// # Returns
54///
55/// A 64-character hexadecimal SHA-256 hash.
56#[must_use]
57pub fn cache_key(
58    repo_owner: &str,
59    repo_name: &str,
60    file_path: &str,
61    pattern_id: &str,
62    matched_text: &str,
63) -> String {
64    let mut hasher = Sha256::new();
65    hasher.update(repo_owner.as_bytes());
66    hasher.update(b"/");
67    hasher.update(repo_name.as_bytes());
68    hasher.update(b":");
69    hasher.update(file_path.as_bytes());
70    hasher.update(b":");
71    hasher.update(pattern_id.as_bytes());
72    hasher.update(b":");
73    hasher.update(matched_text.as_bytes());
74    hex::encode(hasher.finalize())
75}
76
77/// Cache for security finding validation results.
78///
79/// Wraps `FileCacheImpl` with a 7-day TTL for validated findings.
80/// Uses SHA-256 hashes as cache keys to ensure privacy and uniqueness.
81pub struct FindingCache {
82    cache: FileCacheImpl<CachedFinding>,
83}
84
85impl FindingCache {
86    /// Create a new finding cache with default settings.
87    ///
88    /// Uses a 7-day TTL and stores cache files in `~/.cache/aptu/security`.
89    #[must_use]
90    pub fn new() -> Self {
91        Self {
92            cache: FileCacheImpl::new(
93                "security",
94                Duration::days(crate::cache::DEFAULT_SECURITY_TTL_DAYS),
95            ),
96        }
97    }
98
99    /// Get a cached validated finding.
100    ///
101    /// # Arguments
102    ///
103    /// * `repo_owner` - Repository owner
104    /// * `repo_name` - Repository name
105    /// * `file_path` - File path where finding was detected
106    /// * `pattern_id` - Pattern ID that matched
107    /// * `matched_text` - The matched code snippet
108    ///
109    /// # Returns
110    ///
111    /// The cached validated finding if it exists and is within TTL, `None` otherwise.
112    #[instrument(skip(self, matched_text), fields(cache_key))]
113    pub fn get(
114        &self,
115        repo_owner: &str,
116        repo_name: &str,
117        file_path: &str,
118        pattern_id: &str,
119        matched_text: &str,
120    ) -> Result<Option<ValidatedFinding>> {
121        let key = cache_key(repo_owner, repo_name, file_path, pattern_id, matched_text);
122        tracing::Span::current().record("cache_key", &key);
123
124        self.cache
125            .get(&key)
126            .map(|opt| opt.map(|cached| cached.validated))
127    }
128
129    /// Set a cached validated finding.
130    ///
131    /// # Arguments
132    ///
133    /// * `repo_owner` - Repository owner
134    /// * `repo_name` - Repository name
135    /// * `file_path` - File path where finding was detected
136    /// * `pattern_id` - Pattern ID that matched
137    /// * `matched_text` - The matched code snippet
138    /// * `validated` - The validated finding to cache
139    #[instrument(skip(self, matched_text, validated), fields(cache_key))]
140    pub fn set(
141        &self,
142        repo_owner: &str,
143        repo_name: &str,
144        file_path: &str,
145        pattern_id: &str,
146        matched_text: &str,
147        validated: ValidatedFinding,
148    ) -> Result<()> {
149        let key = cache_key(repo_owner, repo_name, file_path, pattern_id, matched_text);
150        tracing::Span::current().record("cache_key", &key);
151
152        let cached = CachedFinding::new(validated);
153        self.cache.set(&key, &cached)
154    }
155}
156
157impl Default for FindingCache {
158    fn default() -> Self {
159        Self::new()
160    }
161}
162
163#[cfg(test)]
164mod tests {
165    use super::*;
166    use crate::security::{Confidence, Finding, Severity};
167
168    #[test]
169    fn test_cache_key_uniqueness() {
170        // Different repos should produce different keys
171        let key1 = cache_key("owner1", "repo1", "src/main.rs", "pattern1", "code");
172        let key2 = cache_key("owner2", "repo1", "src/main.rs", "pattern1", "code");
173        assert_ne!(key1, key2);
174
175        // Different files should produce different keys
176        let key3 = cache_key("owner1", "repo1", "src/lib.rs", "pattern1", "code");
177        assert_ne!(key1, key3);
178
179        // Different patterns should produce different keys
180        let key4 = cache_key("owner1", "repo1", "src/main.rs", "pattern2", "code");
181        assert_ne!(key1, key4);
182
183        // Different code should produce different keys
184        let key5 = cache_key("owner1", "repo1", "src/main.rs", "pattern1", "different");
185        assert_ne!(key1, key5);
186
187        // Identical inputs should produce identical keys
188        let key6 = cache_key("owner1", "repo1", "src/main.rs", "pattern1", "code");
189        assert_eq!(key1, key6);
190    }
191
192    #[test]
193    fn test_cache_key_format() {
194        let key = cache_key("owner", "repo", "file.rs", "pattern", "code");
195        // SHA-256 produces 64 hex characters
196        assert_eq!(key.len(), 64);
197        assert!(key.chars().all(|c| c.is_ascii_hexdigit()));
198    }
199
200    #[test]
201    fn test_cache_key_privacy() {
202        // Cache key should not contain sensitive data
203        let key = cache_key(
204            "owner",
205            "repo",
206            "config.rs",
207            "hardcoded-secret",
208            "api_key = \"sk-secret123\"",
209        );
210        assert!(!key.contains("secret"));
211        assert!(!key.contains("api_key"));
212        assert!(!key.contains("sk-"));
213    }
214
215    #[test]
216    fn test_finding_cache_hit() {
217        let cache = FindingCache::new();
218        let validated = ValidatedFinding {
219            finding: Finding {
220                pattern_id: "test-pattern".to_string(),
221                description: "Test finding".to_string(),
222                severity: Severity::High,
223                confidence: Confidence::Medium,
224                file_path: "src/test.rs".to_string(),
225                line_number: 42,
226                matched_text: "test code".to_string(),
227                cwe: None,
228            },
229            is_valid: true,
230            reasoning: "Test reasoning".to_string(),
231            model_version: Some("test-model".to_string()),
232        };
233
234        // Set cache
235        cache
236            .set(
237                "owner",
238                "repo",
239                "src/test.rs",
240                "test-pattern",
241                "test code",
242                validated.clone(),
243            )
244            .expect("set cache");
245
246        // Get cache hit
247        let result = cache
248            .get("owner", "repo", "src/test.rs", "test-pattern", "test code")
249            .expect("get cache");
250
251        assert!(result.is_some());
252        assert_eq!(result.unwrap(), validated);
253
254        // Cleanup
255        let key = cache_key("owner", "repo", "src/test.rs", "test-pattern", "test code");
256        cache.cache.remove(&key).ok();
257    }
258
259    #[test]
260    fn test_finding_cache_miss() {
261        let cache = FindingCache::new();
262
263        let result = cache
264            .get("owner", "repo", "src/nonexistent.rs", "pattern", "code")
265            .expect("get cache");
266
267        assert!(result.is_none());
268    }
269
270    #[test]
271    fn test_finding_cache_different_context() {
272        let cache = FindingCache::new();
273        let validated = ValidatedFinding {
274            finding: Finding {
275                pattern_id: "pattern".to_string(),
276                description: "Finding".to_string(),
277                severity: Severity::Medium,
278                confidence: Confidence::High,
279                file_path: "src/file.rs".to_string(),
280                line_number: 10,
281                matched_text: "code".to_string(),
282                cwe: None,
283            },
284            is_valid: false,
285            reasoning: "False positive".to_string(),
286            model_version: None,
287        };
288
289        // Set cache for one context
290        cache
291            .set(
292                "owner1",
293                "repo1",
294                "src/file.rs",
295                "pattern",
296                "code",
297                validated,
298            )
299            .expect("set cache");
300
301        // Different owner should miss
302        let result = cache
303            .get("owner2", "repo1", "src/file.rs", "pattern", "code")
304            .expect("get cache");
305        assert!(result.is_none());
306
307        // Cleanup
308        let key = cache_key("owner1", "repo1", "src/file.rs", "pattern", "code");
309        cache.cache.remove(&key).ok();
310    }
311
312    #[test]
313    fn test_cached_finding_serialization() {
314        let validated = ValidatedFinding {
315            finding: Finding {
316                pattern_id: "test".to_string(),
317                description: "Test".to_string(),
318                severity: Severity::Low,
319                confidence: Confidence::Low,
320                file_path: "test.rs".to_string(),
321                line_number: 1,
322                matched_text: "test".to_string(),
323                cwe: Some("CWE-123".to_string()),
324            },
325            is_valid: true,
326            reasoning: "Valid".to_string(),
327            model_version: Some("model-v1".to_string()),
328        };
329
330        let cached = CachedFinding::new(validated.clone());
331        let json = serde_json::to_string(&cached).expect("serialize");
332        let deserialized: CachedFinding = serde_json::from_str(&json).expect("deserialize");
333
334        assert_eq!(cached, deserialized);
335        assert_eq!(deserialized.validated, validated);
336    }
337}