Skip to main content

aptu_core/security/
cache.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Security finding cache for LLM validation results.
4//!
5//! Caches validated findings using SHA-256 hashes of (repo, file, pattern, snippet)
6//! to avoid redundant LLM calls for identical findings across scans.
7
8use anyhow::Result;
9use chrono::Duration;
10use hex;
11use serde::{Deserialize, Serialize};
12use sha2::{Digest, Sha256};
13use tracing::instrument;
14
15#[cfg(not(target_arch = "wasm32"))]
16use crate::cache::{FileCache, FileCacheImpl};
17
18use super::ValidatedFinding;
19
20/// A cached security finding with validation result.
21#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
22pub struct CachedFinding {
23    /// The validated finding.
24    pub validated: ValidatedFinding,
25}
26
27impl CachedFinding {
28    /// Create a new cached finding.
29    #[must_use]
30    pub fn new(validated: ValidatedFinding) -> Self {
31        Self { validated }
32    }
33}
34
35/// Generate a cache key for a security finding.
36///
37/// Creates a SHA-256 hash of the concatenated components:
38/// `{repo_owner}/{repo_name}:{file_path}:{pattern_id}:{matched_text}`
39///
40/// Uses incremental hashing to avoid allocating a large intermediate string,
41/// which is more memory-efficient when `matched_text` contains large code snippets.
42///
43/// This ensures that identical findings across scans are cached,
44/// while different contexts (repo, file, pattern, or code) produce unique keys.
45///
46/// # Arguments
47///
48/// * `repo_owner` - Repository owner (e.g., "octocat")
49/// * `repo_name` - Repository name (e.g., "hello-world")
50/// * `file_path` - File path where finding was detected
51/// * `pattern_id` - Pattern ID that matched
52/// * `matched_text` - The matched code snippet
53///
54/// # Returns
55///
56/// A 64-character hexadecimal SHA-256 hash.
57#[must_use]
58pub fn cache_key(
59    repo_owner: &str,
60    repo_name: &str,
61    file_path: &str,
62    pattern_id: &str,
63    matched_text: &str,
64) -> String {
65    let mut hasher = Sha256::new();
66    hasher.update(repo_owner.as_bytes());
67    hasher.update(b"/");
68    hasher.update(repo_name.as_bytes());
69    hasher.update(b":");
70    hasher.update(file_path.as_bytes());
71    hasher.update(b":");
72    hasher.update(pattern_id.as_bytes());
73    hasher.update(b":");
74    hasher.update(matched_text.as_bytes());
75    hex::encode(hasher.finalize())
76}
77
78/// Cache for security finding validation results.
79///
80/// Wraps `FileCacheImpl` with a 7-day TTL for validated findings.
81/// Uses SHA-256 hashes as cache keys to ensure privacy and uniqueness.
82#[cfg(not(target_arch = "wasm32"))]
83pub struct FindingCache {
84    cache: FileCacheImpl<CachedFinding>,
85}
86
87#[cfg(not(target_arch = "wasm32"))]
88impl FindingCache {
89    /// Create a new finding cache with default settings.
90    ///
91    /// Uses a 7-day TTL and stores cache files in `~/.cache/aptu/security`.
92    #[must_use]
93    pub fn new() -> Self {
94        Self {
95            cache: FileCacheImpl::new(
96                "security",
97                Duration::days(crate::cache::DEFAULT_SECURITY_TTL_DAYS),
98            ),
99        }
100    }
101
102    /// Get a cached validated finding.
103    ///
104    /// # Arguments
105    ///
106    /// * `repo_owner` - Repository owner
107    /// * `repo_name` - Repository name
108    /// * `file_path` - File path where finding was detected
109    /// * `pattern_id` - Pattern ID that matched
110    /// * `matched_text` - The matched code snippet
111    ///
112    /// # Returns
113    ///
114    /// The cached validated finding if it exists and is within TTL, `None` otherwise.
115    #[instrument(skip(self, matched_text), fields(cache_key))]
116    pub async fn get(
117        &self,
118        repo_owner: &str,
119        repo_name: &str,
120        file_path: &str,
121        pattern_id: &str,
122        matched_text: &str,
123    ) -> Result<Option<ValidatedFinding>> {
124        let key = cache_key(repo_owner, repo_name, file_path, pattern_id, matched_text);
125        tracing::Span::current().record("cache_key", &key);
126
127        self.cache
128            .get(&key)
129            .await
130            .map(|opt| opt.map(|cached| cached.validated))
131    }
132
133    /// Set a cached validated finding.
134    ///
135    /// # Arguments
136    ///
137    /// * `repo_owner` - Repository owner
138    /// * `repo_name` - Repository name
139    /// * `file_path` - File path where finding was detected
140    /// * `pattern_id` - Pattern ID that matched
141    /// * `matched_text` - The matched code snippet
142    /// * `validated` - The validated finding to cache
143    #[instrument(skip(self, matched_text, validated), fields(cache_key))]
144    pub async fn set(
145        &self,
146        repo_owner: &str,
147        repo_name: &str,
148        file_path: &str,
149        pattern_id: &str,
150        matched_text: &str,
151        validated: ValidatedFinding,
152    ) -> Result<()> {
153        let key = cache_key(repo_owner, repo_name, file_path, pattern_id, matched_text);
154        tracing::Span::current().record("cache_key", &key);
155
156        let cached = CachedFinding::new(validated);
157        self.cache.set(&key, &cached).await
158    }
159}
160
161#[cfg(not(target_arch = "wasm32"))]
162impl Default for FindingCache {
163    fn default() -> Self {
164        Self::new()
165    }
166}
167
168#[cfg(test)]
169mod tests {
170    use super::*;
171    use crate::security::{Confidence, Finding, Severity};
172
173    #[test]
174    fn test_cache_key_uniqueness() {
175        // Different repos should produce different keys
176        let key1 = cache_key("owner1", "repo1", "src/main.rs", "pattern1", "code");
177        let key2 = cache_key("owner2", "repo1", "src/main.rs", "pattern1", "code");
178        assert_ne!(key1, key2);
179
180        // Different files should produce different keys
181        let key3 = cache_key("owner1", "repo1", "src/lib.rs", "pattern1", "code");
182        assert_ne!(key1, key3);
183
184        // Different patterns should produce different keys
185        let key4 = cache_key("owner1", "repo1", "src/main.rs", "pattern2", "code");
186        assert_ne!(key1, key4);
187
188        // Different code should produce different keys
189        let key5 = cache_key("owner1", "repo1", "src/main.rs", "pattern1", "different");
190        assert_ne!(key1, key5);
191
192        // Identical inputs should produce identical keys
193        let key6 = cache_key("owner1", "repo1", "src/main.rs", "pattern1", "code");
194        assert_eq!(key1, key6);
195    }
196
197    #[test]
198    fn test_cache_key_format() {
199        let key = cache_key("owner", "repo", "file.rs", "pattern", "code");
200        // SHA-256 produces 64 hex characters
201        assert_eq!(key.len(), 64);
202        assert!(key.chars().all(|c| c.is_ascii_hexdigit()));
203    }
204
205    #[test]
206    fn test_cache_key_privacy() {
207        // Cache key should not contain sensitive data
208        let key = cache_key(
209            "owner",
210            "repo",
211            "config.rs",
212            "hardcoded-secret",
213            "api_key = \"sk-secret123\"",
214        );
215        assert!(!key.contains("secret"));
216        assert!(!key.contains("api_key"));
217        assert!(!key.contains("sk-"));
218    }
219
220    #[tokio::test]
221    async fn test_finding_cache_hit() {
222        let cache = FindingCache::new();
223        let validated = ValidatedFinding {
224            finding: Finding {
225                pattern_id: "test-pattern".to_string(),
226                description: "Test finding".to_string(),
227                severity: Severity::High,
228                confidence: Confidence::Medium,
229                file_path: "src/test.rs".to_string(),
230                line_number: 42,
231                matched_text: "test code".to_string(),
232                cwe: None,
233            },
234            is_valid: true,
235            reasoning: "Test reasoning".to_string(),
236            model_version: Some("test-model".to_string()),
237        };
238
239        // Set cache
240        cache
241            .set(
242                "owner",
243                "repo",
244                "src/test.rs",
245                "test-pattern",
246                "test code",
247                validated.clone(),
248            )
249            .await
250            .expect("set cache");
251
252        // Get cache hit
253        let result = cache
254            .get("owner", "repo", "src/test.rs", "test-pattern", "test code")
255            .await
256            .expect("get cache");
257
258        assert!(result.is_some());
259        assert_eq!(result.unwrap(), validated);
260
261        // Cleanup
262        let key = cache_key("owner", "repo", "src/test.rs", "test-pattern", "test code");
263        cache.cache.remove(&key).await.ok();
264    }
265
266    #[tokio::test]
267    async fn test_finding_cache_miss() {
268        let cache = FindingCache::new();
269
270        let result = cache
271            .get("owner", "repo", "src/nonexistent.rs", "pattern", "code")
272            .await
273            .expect("get cache");
274
275        assert!(result.is_none());
276    }
277
278    #[tokio::test]
279    async fn test_finding_cache_different_context() {
280        let cache = FindingCache::new();
281        let validated = ValidatedFinding {
282            finding: Finding {
283                pattern_id: "pattern".to_string(),
284                description: "Finding".to_string(),
285                severity: Severity::Medium,
286                confidence: Confidence::High,
287                file_path: "src/file.rs".to_string(),
288                line_number: 10,
289                matched_text: "code".to_string(),
290                cwe: None,
291            },
292            is_valid: false,
293            reasoning: "False positive".to_string(),
294            model_version: None,
295        };
296
297        // Set cache for one context
298        cache
299            .set(
300                "owner1",
301                "repo1",
302                "src/file.rs",
303                "pattern",
304                "code",
305                validated,
306            )
307            .await
308            .expect("set cache");
309
310        // Different owner should miss
311        let result = cache
312            .get("owner2", "repo1", "src/file.rs", "pattern", "code")
313            .await
314            .expect("get cache");
315        assert!(result.is_none());
316
317        // Cleanup
318        let key = cache_key("owner1", "repo1", "src/file.rs", "pattern", "code");
319        cache.cache.remove(&key).await.ok();
320    }
321
322    #[test]
323    fn test_cached_finding_serialization() {
324        let validated = ValidatedFinding {
325            finding: Finding {
326                pattern_id: "test".to_string(),
327                description: "Test".to_string(),
328                severity: Severity::Low,
329                confidence: Confidence::Low,
330                file_path: "test.rs".to_string(),
331                line_number: 1,
332                matched_text: "test".to_string(),
333                cwe: Some("CWE-123".to_string()),
334            },
335            is_valid: true,
336            reasoning: "Valid".to_string(),
337            model_version: Some("model-v1".to_string()),
338        };
339
340        let cached = CachedFinding::new(validated.clone());
341        let json = serde_json::to_string(&cached).expect("serialize");
342        let deserialized: CachedFinding = serde_json::from_str(&json).expect("deserialize");
343
344        assert_eq!(cached, deserialized);
345        assert_eq!(deserialized.validated, validated);
346    }
347}