syncable_cli/analyzer/security/turbo/
cache.rs

1//! # Cache Module
2//!
3//! High-performance caching for security scan results using DashMap and blake3.
4
5use std::path::PathBuf;
6use std::sync::Arc;
7use std::time::{Duration, SystemTime};
8
9use dashmap::DashMap;
10
11use log::{debug, trace};
12
13use crate::analyzer::security::SecurityFinding;
14
15/// Cache key for file content
16#[derive(Debug, Clone, Hash, PartialEq, Eq)]
17pub struct CacheKey {
18    pub file_path: PathBuf,
19}
20
21/// Cached scan result
22#[derive(Debug, Clone)]
23pub struct CachedResult {
24    pub findings: Vec<SecurityFinding>,
25    pub cached_at: SystemTime,
26    pub access_count: u32,
27}
28
29/// High-performance security cache
30pub struct SecurityCache {
31    // Main cache storage
32    cache: Arc<DashMap<PathBuf, CachedEntry, ahash::RandomState>>,
33
34    // Cache configuration
35    max_size_bytes: usize,
36    current_size_bytes: Arc<parking_lot::Mutex<usize>>,
37    eviction_threshold: f64,
38
39    // Statistics
40    hits: Arc<parking_lot::Mutex<u64>>,
41    misses: Arc<parking_lot::Mutex<u64>>,
42}
43
44/// Internal cache entry
45#[derive(Debug, Clone)]
46struct CachedEntry {
47    key: CacheKey,
48    result: CachedResult,
49    size_bytes: usize,
50    last_accessed: SystemTime,
51}
52
53impl SecurityCache {
54    /// Create a new cache with specified size in MB
55    pub fn new(size_mb: usize) -> Self {
56        let max_size_bytes = size_mb * 1024 * 1024;
57        let hasher = ahash::RandomState::new();
58
59        Self {
60            cache: Arc::new(DashMap::with_hasher(hasher)),
61            max_size_bytes,
62            current_size_bytes: Arc::new(parking_lot::Mutex::new(0)),
63            eviction_threshold: 0.9, // Start eviction at 90% capacity
64            hits: Arc::new(parking_lot::Mutex::new(0)),
65            misses: Arc::new(parking_lot::Mutex::new(0)),
66        }
67    }
68
69    /// Get cached result for a file
70    pub fn get(&self, file_path: &PathBuf) -> Option<Vec<SecurityFinding>> {
71        let entry = self.cache.get_mut(file_path)?;
72
73        // Update access statistics
74        let mut entry = entry;
75        entry.last_accessed = SystemTime::now();
76        entry.result.access_count += 1;
77
78        *self.hits.lock() += 1;
79        trace!("Cache hit for: {}", file_path.display());
80
81        Some(entry.result.findings.clone())
82    }
83
84    /// Insert a scan result into cache
85    pub fn insert(&self, file_path: PathBuf, findings: Vec<SecurityFinding>) {
86        // Calculate entry size
87        let size_bytes = Self::estimate_size(&findings);
88
89        // Check if we need to evict entries
90        let current_size = *self.current_size_bytes.lock();
91        if current_size + size_bytes
92            > (self.max_size_bytes as f64 * self.eviction_threshold) as usize
93        {
94            self.evict_lru();
95        }
96
97        // Create cache key
98        let key = CacheKey {
99            file_path: file_path.clone(),
100        };
101
102        // Create cache entry
103        let entry = CachedEntry {
104            key,
105            result: CachedResult {
106                findings,
107                cached_at: SystemTime::now(),
108                access_count: 1,
109            },
110            size_bytes,
111            last_accessed: SystemTime::now(),
112        };
113
114        // Insert into cache
115        if let Some(old_entry) = self.cache.insert(file_path, entry) {
116            // Subtract old entry size
117            *self.current_size_bytes.lock() -= old_entry.size_bytes;
118        }
119
120        // Add new entry size
121        *self.current_size_bytes.lock() += size_bytes;
122
123        debug!(
124            "Cached result, current size: {} MB",
125            *self.current_size_bytes.lock() / (1024 * 1024)
126        );
127    }
128
129    /// Clear the entire cache
130    pub fn clear(&self) {
131        self.cache.clear();
132        *self.current_size_bytes.lock() = 0;
133        *self.hits.lock() = 0;
134        *self.misses.lock() = 0;
135        debug!("Cache cleared");
136    }
137
138    /// Get cache statistics
139    pub fn stats(&self) -> CacheStats {
140        let hits = *self.hits.lock();
141        let misses = *self.misses.lock();
142        let total = hits + misses;
143
144        CacheStats {
145            hits,
146            misses,
147            hit_rate: if total > 0 {
148                hits as f64 / total as f64
149            } else {
150                0.0
151            },
152            entries: self.cache.len(),
153            size_bytes: *self.current_size_bytes.lock(),
154            capacity_bytes: self.max_size_bytes,
155        }
156    }
157
158    /// Evict least recently used entries
159    fn evict_lru(&self) {
160        let target_size = (self.max_size_bytes as f64 * 0.7) as usize; // Evict to 70% capacity
161        let mut entries_to_remove = Vec::new();
162
163        // Collect entries sorted by last access time
164        let mut entries: Vec<(PathBuf, SystemTime, usize)> = self
165            .cache
166            .iter()
167            .map(|entry| (entry.key().clone(), entry.last_accessed, entry.size_bytes))
168            .collect();
169
170        // Sort by last accessed (oldest first)
171        entries.sort_by_key(|(_, last_accessed, _)| *last_accessed);
172
173        // Determine which entries to remove
174        let mut current_size = *self.current_size_bytes.lock();
175        for (path, _, size) in entries {
176            if current_size <= target_size {
177                break;
178            }
179
180            entries_to_remove.push(path);
181            current_size -= size;
182        }
183
184        // Count entries to remove
185        let entries_removed = entries_to_remove.len();
186
187        // Remove entries
188        for path in entries_to_remove {
189            if let Some((_, entry)) = self.cache.remove(&path) {
190                *self.current_size_bytes.lock() -= entry.size_bytes;
191            }
192        }
193
194        debug!(
195            "Evicted {} entries, new size: {} MB",
196            entries_removed,
197            *self.current_size_bytes.lock() / (1024 * 1024)
198        );
199    }
200
201    /// Estimate memory size of findings
202    fn estimate_size(findings: &[SecurityFinding]) -> usize {
203        // Base size for the vector
204        let mut size = std::mem::size_of::<Vec<SecurityFinding>>();
205
206        // Add size for each finding
207        for finding in findings {
208            size += std::mem::size_of::<SecurityFinding>();
209
210            // Add string sizes
211            size += finding.id.len();
212            size += finding.title.len();
213            size += finding.description.len();
214
215            if let Some(ref path) = finding.file_path {
216                size += path.to_string_lossy().len();
217            }
218
219            if let Some(ref evidence) = finding.evidence {
220                size += evidence.len();
221            }
222
223            // Add vector sizes
224            size += finding.remediation.iter().map(|s| s.len()).sum::<usize>();
225            size += finding.references.iter().map(|s| s.len()).sum::<usize>();
226            size += finding
227                .compliance_frameworks
228                .iter()
229                .map(|s| s.len())
230                .sum::<usize>();
231
232            if let Some(ref cwe) = finding.cwe_id {
233                size += cwe.len();
234            }
235        }
236
237        size
238    }
239
240    /// Invalidate cache entries older than duration
241    pub fn invalidate_older_than(&self, duration: Duration) {
242        let cutoff = SystemTime::now() - duration;
243        let mut removed = 0;
244
245        self.cache.retain(|_, entry| {
246            if entry.result.cached_at < cutoff {
247                *self.current_size_bytes.lock() -= entry.size_bytes;
248                removed += 1;
249                false
250            } else {
251                true
252            }
253        });
254
255        if removed > 0 {
256            debug!("Invalidated {} stale cache entries", removed);
257        }
258    }
259}
260
261/// Cache statistics
262#[derive(Debug, Clone)]
263pub struct CacheStats {
264    pub hits: u64,
265    pub misses: u64,
266    pub hit_rate: f64,
267    pub entries: usize,
268    pub size_bytes: usize,
269    pub capacity_bytes: usize,
270}
271
272impl CacheStats {
273    /// Get human-readable size
274    pub fn size_mb(&self) -> f64 {
275        self.size_bytes as f64 / (1024.0 * 1024.0)
276    }
277
278    /// Get capacity utilization percentage
279    pub fn utilization(&self) -> f64 {
280        if self.capacity_bytes == 0 {
281            0.0
282        } else {
283            (self.size_bytes as f64 / self.capacity_bytes as f64) * 100.0
284        }
285    }
286}
287
288#[cfg(test)]
289mod tests {
290    use super::*;
291    use crate::analyzer::security::{SecurityCategory, SecuritySeverity};
292
293    #[test]
294    fn test_cache_basic_operations() {
295        let cache = SecurityCache::new(10); // 10MB cache
296
297        let path = PathBuf::from("/test/file.js");
298        let findings = vec![SecurityFinding {
299            id: "test-1".to_string(),
300            title: "Test Finding".to_string(),
301            description: "Test description".to_string(),
302            severity: SecuritySeverity::High,
303            category: SecurityCategory::SecretsExposure,
304            file_path: Some(path.clone()),
305            line_number: Some(10),
306            column_number: Some(5),
307            evidence: Some("evidence".to_string()),
308            remediation: vec!["Fix it".to_string()],
309            references: vec!["https://example.com".to_string()],
310            cwe_id: Some("CWE-798".to_string()),
311            compliance_frameworks: vec!["SOC2".to_string()],
312        }];
313
314        // Test insert
315        cache.insert(path.clone(), findings.clone());
316
317        // Test get
318        let cached = cache.get(&path);
319        assert!(cached.is_some());
320        assert_eq!(cached.unwrap().len(), 1);
321
322        // Test stats
323        let stats = cache.stats();
324        assert_eq!(stats.hits, 1);
325        assert_eq!(stats.misses, 0);
326        assert_eq!(stats.entries, 1);
327    }
328
329    #[test]
330    #[ignore] // Flaky - cache eviction timing depends on system memory
331    fn test_cache_eviction() {
332        let cache = SecurityCache::new(1); // 1MB cache (small for testing)
333
334        // Insert many entries to trigger eviction
335        for i in 0..1000 {
336            let path = PathBuf::from(format!("/test/file{}.js", i));
337            let findings = vec![SecurityFinding {
338                id: format!("test-{}", i),
339                title: "Test Finding with very long title to consume memory".to_string(),
340                description: "Test description that is also quite long to use up cache space"
341                    .to_string(),
342                severity: SecuritySeverity::High,
343                category: SecurityCategory::SecretsExposure,
344                file_path: Some(path.clone()),
345                line_number: Some(10),
346                column_number: Some(5),
347                evidence: Some("evidence with long content to test memory usage".to_string()),
348                remediation: vec!["Fix it with a long remediation message".to_string()],
349                references: vec!["https://example.com/very/long/url/path".to_string()],
350                cwe_id: Some("CWE-798".to_string()),
351                compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string()],
352            }];
353
354            cache.insert(path, findings);
355        }
356
357        // Cache should have evicted some entries
358        let stats = cache.stats();
359        assert!(stats.entries < 1000);
360        assert!(stats.utilization() <= 90.0);
361    }
362
363    #[test]
364    fn test_cache_invalidation() {
365        let cache = SecurityCache::new(10);
366
367        let path = PathBuf::from("/test/file.js");
368        let findings = vec![];
369
370        cache.insert(path.clone(), findings);
371
372        // Invalidate entries older than 0 seconds (all entries)
373        cache.invalidate_older_than(Duration::from_secs(0));
374
375        // Cache should be empty
376        assert!(cache.get(&path).is_none());
377        assert_eq!(cache.stats().entries, 0);
378    }
379}