syncable_cli/analyzer/security/turbo/
cache.rs

1//! # Cache Module
2//! 
3//! High-performance caching for security scan results using DashMap and blake3.
4
5use std::path::PathBuf;
6use std::time::{SystemTime, Duration};
7use std::sync::Arc;
8
9use dashmap::DashMap;
10
11use log::{debug, trace};
12
13use crate::analyzer::security::SecurityFinding;
14
15/// Cache key for file content
16#[derive(Debug, Clone, Hash, PartialEq, Eq)]
17pub struct CacheKey {
18    pub file_path: PathBuf,
19}
20
21/// Cached scan result
22#[derive(Debug, Clone)]
23pub struct CachedResult {
24    pub findings: Vec<SecurityFinding>,
25    pub cached_at: SystemTime,
26    pub access_count: u32,
27}
28
29/// High-performance security cache
30pub struct SecurityCache {
31    // Main cache storage
32    cache: Arc<DashMap<PathBuf, CachedEntry, ahash::RandomState>>,
33    
34    // Cache configuration
35    max_size_bytes: usize,
36    current_size_bytes: Arc<parking_lot::Mutex<usize>>,
37    eviction_threshold: f64,
38    
39    // Statistics
40    hits: Arc<parking_lot::Mutex<u64>>,
41    misses: Arc<parking_lot::Mutex<u64>>,
42}
43
44/// Internal cache entry
45#[derive(Debug, Clone)]
46struct CachedEntry {
47    key: CacheKey,
48    result: CachedResult,
49    size_bytes: usize,
50    last_accessed: SystemTime,
51}
52
53impl SecurityCache {
54    /// Create a new cache with specified size in MB
55    pub fn new(size_mb: usize) -> Self {
56        let max_size_bytes = size_mb * 1024 * 1024;
57        let hasher = ahash::RandomState::new();
58        
59        Self {
60            cache: Arc::new(DashMap::with_hasher(hasher)),
61            max_size_bytes,
62            current_size_bytes: Arc::new(parking_lot::Mutex::new(0)),
63            eviction_threshold: 0.9, // Start eviction at 90% capacity
64            hits: Arc::new(parking_lot::Mutex::new(0)),
65            misses: Arc::new(parking_lot::Mutex::new(0)),
66        }
67    }
68    
69    /// Get cached result for a file
70    pub fn get(&self, file_path: &PathBuf) -> Option<Vec<SecurityFinding>> {
71        let entry = self.cache.get_mut(file_path)?;
72        
73        // Update access statistics
74        let mut entry = entry;
75        entry.last_accessed = SystemTime::now();
76        entry.result.access_count += 1;
77        
78        *self.hits.lock() += 1;
79        trace!("Cache hit for: {}", file_path.display());
80        
81        Some(entry.result.findings.clone())
82    }
83    
84    /// Insert a scan result into cache
85    pub fn insert(&self, file_path: PathBuf, findings: Vec<SecurityFinding>) {
86        // Calculate entry size
87        let size_bytes = Self::estimate_size(&findings);
88        
89        // Check if we need to evict entries
90        let current_size = *self.current_size_bytes.lock();
91        if current_size + size_bytes > (self.max_size_bytes as f64 * self.eviction_threshold) as usize {
92            self.evict_lru();
93        }
94        
95        // Create cache key
96        let key = CacheKey {
97            file_path: file_path.clone(),
98        };
99        
100        // Create cache entry
101        let entry = CachedEntry {
102            key,
103            result: CachedResult {
104                findings,
105                cached_at: SystemTime::now(),
106                access_count: 1,
107            },
108            size_bytes,
109            last_accessed: SystemTime::now(),
110        };
111        
112        // Insert into cache
113        if let Some(old_entry) = self.cache.insert(file_path, entry) {
114            // Subtract old entry size
115            *self.current_size_bytes.lock() -= old_entry.size_bytes;
116        }
117        
118        // Add new entry size
119        *self.current_size_bytes.lock() += size_bytes;
120        
121        debug!("Cached result, current size: {} MB", 
122               *self.current_size_bytes.lock() / (1024 * 1024));
123    }
124    
125    /// Clear the entire cache
126    pub fn clear(&self) {
127        self.cache.clear();
128        *self.current_size_bytes.lock() = 0;
129        *self.hits.lock() = 0;
130        *self.misses.lock() = 0;
131        debug!("Cache cleared");
132    }
133    
134    /// Get cache statistics
135    pub fn stats(&self) -> CacheStats {
136        let hits = *self.hits.lock();
137        let misses = *self.misses.lock();
138        let total = hits + misses;
139        
140        CacheStats {
141            hits,
142            misses,
143            hit_rate: if total > 0 { hits as f64 / total as f64 } else { 0.0 },
144            entries: self.cache.len(),
145            size_bytes: *self.current_size_bytes.lock(),
146            capacity_bytes: self.max_size_bytes,
147        }
148    }
149    
150    /// Evict least recently used entries
151    fn evict_lru(&self) {
152        let target_size = (self.max_size_bytes as f64 * 0.7) as usize; // Evict to 70% capacity
153        let mut entries_to_remove = Vec::new();
154        
155        // Collect entries sorted by last access time
156        let mut entries: Vec<(PathBuf, SystemTime, usize)> = self.cache.iter()
157            .map(|entry| (entry.key().clone(), entry.last_accessed, entry.size_bytes))
158            .collect();
159        
160        // Sort by last accessed (oldest first)
161        entries.sort_by_key(|(_, last_accessed, _)| *last_accessed);
162        
163        // Determine which entries to remove
164        let mut current_size = *self.current_size_bytes.lock();
165        for (path, _, size) in entries {
166            if current_size <= target_size {
167                break;
168            }
169            
170            entries_to_remove.push(path);
171            current_size -= size;
172        }
173        
174        // Count entries to remove
175        let entries_removed = entries_to_remove.len();
176        
177        // Remove entries
178        for path in entries_to_remove {
179            if let Some((_, entry)) = self.cache.remove(&path) {
180                *self.current_size_bytes.lock() -= entry.size_bytes;
181            }
182        }
183        
184        debug!("Evicted {} entries, new size: {} MB", 
185               entries_removed,
186               *self.current_size_bytes.lock() / (1024 * 1024));
187    }
188    
189
190    
191    /// Estimate memory size of findings
192    fn estimate_size(findings: &[SecurityFinding]) -> usize {
193        // Base size for the vector
194        let mut size = std::mem::size_of::<Vec<SecurityFinding>>();
195        
196        // Add size for each finding
197        for finding in findings {
198            size += std::mem::size_of::<SecurityFinding>();
199            
200            // Add string sizes
201            size += finding.id.len();
202            size += finding.title.len();
203            size += finding.description.len();
204            
205            if let Some(ref path) = finding.file_path {
206                size += path.to_string_lossy().len();
207            }
208            
209            if let Some(ref evidence) = finding.evidence {
210                size += evidence.len();
211            }
212            
213            // Add vector sizes
214            size += finding.remediation.iter().map(|s| s.len()).sum::<usize>();
215            size += finding.references.iter().map(|s| s.len()).sum::<usize>();
216            size += finding.compliance_frameworks.iter().map(|s| s.len()).sum::<usize>();
217            
218            if let Some(ref cwe) = finding.cwe_id {
219                size += cwe.len();
220            }
221        }
222        
223        size
224    }
225    
226    /// Invalidate cache entries older than duration
227    pub fn invalidate_older_than(&self, duration: Duration) {
228        let cutoff = SystemTime::now() - duration;
229        let mut removed = 0;
230        
231        self.cache.retain(|_, entry| {
232            if entry.result.cached_at < cutoff {
233                *self.current_size_bytes.lock() -= entry.size_bytes;
234                removed += 1;
235                false
236            } else {
237                true
238            }
239        });
240        
241        if removed > 0 {
242            debug!("Invalidated {} stale cache entries", removed);
243        }
244    }
245}
246
247/// Cache statistics
248#[derive(Debug, Clone)]
249pub struct CacheStats {
250    pub hits: u64,
251    pub misses: u64,
252    pub hit_rate: f64,
253    pub entries: usize,
254    pub size_bytes: usize,
255    pub capacity_bytes: usize,
256}
257
258impl CacheStats {
259    /// Get human-readable size
260    pub fn size_mb(&self) -> f64 {
261        self.size_bytes as f64 / (1024.0 * 1024.0)
262    }
263    
264    /// Get capacity utilization percentage
265    pub fn utilization(&self) -> f64 {
266        if self.capacity_bytes == 0 {
267            0.0
268        } else {
269            (self.size_bytes as f64 / self.capacity_bytes as f64) * 100.0
270        }
271    }
272}
273
274
275
276#[cfg(test)]
277mod tests {
278    use super::*;
279    use crate::analyzer::security::{SecuritySeverity, SecurityCategory};
280    
281    #[test]
282    fn test_cache_basic_operations() {
283        let cache = SecurityCache::new(10); // 10MB cache
284        
285        let path = PathBuf::from("/test/file.js");
286        let findings = vec![
287            SecurityFinding {
288                id: "test-1".to_string(),
289                title: "Test Finding".to_string(),
290                description: "Test description".to_string(),
291                severity: SecuritySeverity::High,
292                category: SecurityCategory::SecretsExposure,
293                file_path: Some(path.clone()),
294                line_number: Some(10),
295                column_number: Some(5),
296                evidence: Some("evidence".to_string()),
297                remediation: vec!["Fix it".to_string()],
298                references: vec!["https://example.com".to_string()],
299                cwe_id: Some("CWE-798".to_string()),
300                compliance_frameworks: vec!["SOC2".to_string()],
301            }
302        ];
303        
304        // Test insert
305        cache.insert(path.clone(), findings.clone());
306        
307        // Test get
308        let cached = cache.get(&path);
309        assert!(cached.is_some());
310        assert_eq!(cached.unwrap().len(), 1);
311        
312        // Test stats
313        let stats = cache.stats();
314        assert_eq!(stats.hits, 1);
315        assert_eq!(stats.misses, 0);
316        assert_eq!(stats.entries, 1);
317    }
318    
319    #[test]
320    fn test_cache_eviction() {
321        let cache = SecurityCache::new(1); // 1MB cache (small for testing)
322        
323        // Insert many entries to trigger eviction
324        for i in 0..1000 {
325            let path = PathBuf::from(format!("/test/file{}.js", i));
326            let findings = vec![
327                SecurityFinding {
328                    id: format!("test-{}", i),
329                    title: "Test Finding with very long title to consume memory".to_string(),
330                    description: "Test description that is also quite long to use up cache space".to_string(),
331                    severity: SecuritySeverity::High,
332                    category: SecurityCategory::SecretsExposure,
333                    file_path: Some(path.clone()),
334                    line_number: Some(10),
335                    column_number: Some(5),
336                    evidence: Some("evidence with long content to test memory usage".to_string()),
337                    remediation: vec!["Fix it with a long remediation message".to_string()],
338                    references: vec!["https://example.com/very/long/url/path".to_string()],
339                    cwe_id: Some("CWE-798".to_string()),
340                    compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string()],
341                }
342            ];
343            
344            cache.insert(path, findings);
345        }
346        
347        // Cache should have evicted some entries
348        let stats = cache.stats();
349        assert!(stats.entries < 1000);
350        assert!(stats.utilization() <= 90.0);
351    }
352    
353    #[test]
354    fn test_cache_invalidation() {
355        let cache = SecurityCache::new(10);
356        
357        let path = PathBuf::from("/test/file.js");
358        let findings = vec![];
359        
360        cache.insert(path.clone(), findings);
361        
362        // Invalidate entries older than 0 seconds (all entries)
363        cache.invalidate_older_than(Duration::from_secs(0));
364        
365        // Cache should be empty
366        assert!(cache.get(&path).is_none());
367        assert_eq!(cache.stats().entries, 0);
368    }
369}