Skip to main content

aptu_core/security/
cache.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Security finding cache for LLM validation results.
4//!
5//! Caches validated findings using SHA-256 hashes of (repo, file, pattern, snippet)
6//! to avoid redundant LLM calls for identical findings across scans.
7
8use anyhow::Result;
9use chrono::Duration;
10use hex;
11use serde::{Deserialize, Serialize};
12use sha2::{Digest, Sha256};
13use tracing::instrument;
14
15use crate::cache::{FileCache, FileCacheImpl};
16
17use super::ValidatedFinding;
18
19/// A cached security finding with validation result.
20#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
21pub struct CachedFinding {
22    /// The validated finding.
23    pub validated: ValidatedFinding,
24}
25
26impl CachedFinding {
27    /// Create a new cached finding.
28    #[must_use]
29    pub fn new(validated: ValidatedFinding) -> Self {
30        Self { validated }
31    }
32}
33
34/// Generate a cache key for a security finding.
35///
36/// Creates a SHA-256 hash of the concatenated components:
37/// `{repo_owner}/{repo_name}:{file_path}:{pattern_id}:{matched_text}`
38///
39/// Uses incremental hashing to avoid allocating a large intermediate string,
40/// which is more memory-efficient when `matched_text` contains large code snippets.
41///
42/// This ensures that identical findings across scans are cached,
43/// while different contexts (repo, file, pattern, or code) produce unique keys.
44///
45/// # Arguments
46///
47/// * `repo_owner` - Repository owner (e.g., "octocat")
48/// * `repo_name` - Repository name (e.g., "hello-world")
49/// * `file_path` - File path where finding was detected
50/// * `pattern_id` - Pattern ID that matched
51/// * `matched_text` - The matched code snippet
52///
53/// # Returns
54///
55/// A 64-character hexadecimal SHA-256 hash.
56#[must_use]
57pub fn cache_key(
58    repo_owner: &str,
59    repo_name: &str,
60    file_path: &str,
61    pattern_id: &str,
62    matched_text: &str,
63) -> String {
64    let mut hasher = Sha256::new();
65    hasher.update(repo_owner.as_bytes());
66    hasher.update(b"/");
67    hasher.update(repo_name.as_bytes());
68    hasher.update(b":");
69    hasher.update(file_path.as_bytes());
70    hasher.update(b":");
71    hasher.update(pattern_id.as_bytes());
72    hasher.update(b":");
73    hasher.update(matched_text.as_bytes());
74    hex::encode(hasher.finalize())
75}
76
77/// Cache for security finding validation results.
78///
79/// Wraps `FileCacheImpl` with a 7-day TTL for validated findings.
80/// Uses SHA-256 hashes as cache keys to ensure privacy and uniqueness.
81pub struct FindingCache {
82    cache: FileCacheImpl<CachedFinding>,
83}
84
85impl FindingCache {
86    /// Create a new finding cache with default settings.
87    ///
88    /// Uses a 7-day TTL and stores cache files in `~/.cache/aptu/security`.
89    #[must_use]
90    pub fn new() -> Self {
91        Self {
92            cache: FileCacheImpl::new(
93                "security",
94                Duration::days(crate::cache::DEFAULT_SECURITY_TTL_DAYS),
95            ),
96        }
97    }
98
99    /// Get a cached validated finding.
100    ///
101    /// # Arguments
102    ///
103    /// * `repo_owner` - Repository owner
104    /// * `repo_name` - Repository name
105    /// * `file_path` - File path where finding was detected
106    /// * `pattern_id` - Pattern ID that matched
107    /// * `matched_text` - The matched code snippet
108    ///
109    /// # Returns
110    ///
111    /// The cached validated finding if it exists and is within TTL, `None` otherwise.
112    #[instrument(skip(self, matched_text), fields(cache_key))]
113    pub async fn get(
114        &self,
115        repo_owner: &str,
116        repo_name: &str,
117        file_path: &str,
118        pattern_id: &str,
119        matched_text: &str,
120    ) -> Result<Option<ValidatedFinding>> {
121        let key = cache_key(repo_owner, repo_name, file_path, pattern_id, matched_text);
122        tracing::Span::current().record("cache_key", &key);
123
124        self.cache
125            .get(&key)
126            .await
127            .map(|opt| opt.map(|cached| cached.validated))
128    }
129
130    /// Set a cached validated finding.
131    ///
132    /// # Arguments
133    ///
134    /// * `repo_owner` - Repository owner
135    /// * `repo_name` - Repository name
136    /// * `file_path` - File path where finding was detected
137    /// * `pattern_id` - Pattern ID that matched
138    /// * `matched_text` - The matched code snippet
139    /// * `validated` - The validated finding to cache
140    #[instrument(skip(self, matched_text, validated), fields(cache_key))]
141    pub async fn set(
142        &self,
143        repo_owner: &str,
144        repo_name: &str,
145        file_path: &str,
146        pattern_id: &str,
147        matched_text: &str,
148        validated: ValidatedFinding,
149    ) -> Result<()> {
150        let key = cache_key(repo_owner, repo_name, file_path, pattern_id, matched_text);
151        tracing::Span::current().record("cache_key", &key);
152
153        let cached = CachedFinding::new(validated);
154        self.cache.set(&key, &cached).await
155    }
156}
157
158impl Default for FindingCache {
159    fn default() -> Self {
160        Self::new()
161    }
162}
163
164#[cfg(test)]
165mod tests {
166    use super::*;
167    use crate::security::{Confidence, Finding, Severity};
168
169    #[test]
170    fn test_cache_key_uniqueness() {
171        // Different repos should produce different keys
172        let key1 = cache_key("owner1", "repo1", "src/main.rs", "pattern1", "code");
173        let key2 = cache_key("owner2", "repo1", "src/main.rs", "pattern1", "code");
174        assert_ne!(key1, key2);
175
176        // Different files should produce different keys
177        let key3 = cache_key("owner1", "repo1", "src/lib.rs", "pattern1", "code");
178        assert_ne!(key1, key3);
179
180        // Different patterns should produce different keys
181        let key4 = cache_key("owner1", "repo1", "src/main.rs", "pattern2", "code");
182        assert_ne!(key1, key4);
183
184        // Different code should produce different keys
185        let key5 = cache_key("owner1", "repo1", "src/main.rs", "pattern1", "different");
186        assert_ne!(key1, key5);
187
188        // Identical inputs should produce identical keys
189        let key6 = cache_key("owner1", "repo1", "src/main.rs", "pattern1", "code");
190        assert_eq!(key1, key6);
191    }
192
193    #[test]
194    fn test_cache_key_format() {
195        let key = cache_key("owner", "repo", "file.rs", "pattern", "code");
196        // SHA-256 produces 64 hex characters
197        assert_eq!(key.len(), 64);
198        assert!(key.chars().all(|c| c.is_ascii_hexdigit()));
199    }
200
201    #[test]
202    fn test_cache_key_privacy() {
203        // Cache key should not contain sensitive data
204        let key = cache_key(
205            "owner",
206            "repo",
207            "config.rs",
208            "hardcoded-secret",
209            "api_key = \"sk-secret123\"",
210        );
211        assert!(!key.contains("secret"));
212        assert!(!key.contains("api_key"));
213        assert!(!key.contains("sk-"));
214    }
215
216    #[tokio::test]
217    async fn test_finding_cache_hit() {
218        let cache = FindingCache::new();
219        let validated = ValidatedFinding {
220            finding: Finding {
221                pattern_id: "test-pattern".to_string(),
222                description: "Test finding".to_string(),
223                severity: Severity::High,
224                confidence: Confidence::Medium,
225                file_path: "src/test.rs".to_string(),
226                line_number: 42,
227                matched_text: "test code".to_string(),
228                cwe: None,
229            },
230            is_valid: true,
231            reasoning: "Test reasoning".to_string(),
232            model_version: Some("test-model".to_string()),
233        };
234
235        // Set cache
236        cache
237            .set(
238                "owner",
239                "repo",
240                "src/test.rs",
241                "test-pattern",
242                "test code",
243                validated.clone(),
244            )
245            .await
246            .expect("set cache");
247
248        // Get cache hit
249        let result = cache
250            .get("owner", "repo", "src/test.rs", "test-pattern", "test code")
251            .await
252            .expect("get cache");
253
254        assert!(result.is_some());
255        assert_eq!(result.unwrap(), validated);
256
257        // Cleanup
258        let key = cache_key("owner", "repo", "src/test.rs", "test-pattern", "test code");
259        cache.cache.remove(&key).await.ok();
260    }
261
262    #[tokio::test]
263    async fn test_finding_cache_miss() {
264        let cache = FindingCache::new();
265
266        let result = cache
267            .get("owner", "repo", "src/nonexistent.rs", "pattern", "code")
268            .await
269            .expect("get cache");
270
271        assert!(result.is_none());
272    }
273
274    #[tokio::test]
275    async fn test_finding_cache_different_context() {
276        let cache = FindingCache::new();
277        let validated = ValidatedFinding {
278            finding: Finding {
279                pattern_id: "pattern".to_string(),
280                description: "Finding".to_string(),
281                severity: Severity::Medium,
282                confidence: Confidence::High,
283                file_path: "src/file.rs".to_string(),
284                line_number: 10,
285                matched_text: "code".to_string(),
286                cwe: None,
287            },
288            is_valid: false,
289            reasoning: "False positive".to_string(),
290            model_version: None,
291        };
292
293        // Set cache for one context
294        cache
295            .set(
296                "owner1",
297                "repo1",
298                "src/file.rs",
299                "pattern",
300                "code",
301                validated,
302            )
303            .await
304            .expect("set cache");
305
306        // Different owner should miss
307        let result = cache
308            .get("owner2", "repo1", "src/file.rs", "pattern", "code")
309            .await
310            .expect("get cache");
311        assert!(result.is_none());
312
313        // Cleanup
314        let key = cache_key("owner1", "repo1", "src/file.rs", "pattern", "code");
315        cache.cache.remove(&key).await.ok();
316    }
317
318    #[test]
319    fn test_cached_finding_serialization() {
320        let validated = ValidatedFinding {
321            finding: Finding {
322                pattern_id: "test".to_string(),
323                description: "Test".to_string(),
324                severity: Severity::Low,
325                confidence: Confidence::Low,
326                file_path: "test.rs".to_string(),
327                line_number: 1,
328                matched_text: "test".to_string(),
329                cwe: Some("CWE-123".to_string()),
330            },
331            is_valid: true,
332            reasoning: "Valid".to_string(),
333            model_version: Some("model-v1".to_string()),
334        };
335
336        let cached = CachedFinding::new(validated.clone());
337        let json = serde_json::to_string(&cached).expect("serialize");
338        let deserialized: CachedFinding = serde_json::from_str(&json).expect("deserialize");
339
340        assert_eq!(cached, deserialized);
341        assert_eq!(deserialized.validated, validated);
342    }
343}