syncable_cli/analyzer/security/turbo/
cache.rs

1//! # Cache Module
2//!
3//! High-performance caching for security scan results using DashMap and blake3.
4
5use std::path::PathBuf;
6use std::sync::Arc;
7use std::time::{Duration, SystemTime};
8
9use dashmap::DashMap;
10
11use log::{debug, trace};
12
13use crate::analyzer::security::SecurityFinding;
14
15/// Cache key for file content
16#[derive(Debug, Clone, Hash, PartialEq, Eq)]
17pub struct CacheKey {
18    pub file_path: PathBuf,
19}
20
21/// Cached scan result
22#[derive(Debug, Clone)]
23pub struct CachedResult {
24    pub findings: Vec<SecurityFinding>,
25    pub cached_at: SystemTime,
26    pub access_count: u32,
27}
28
29/// High-performance security cache
30pub struct SecurityCache {
31    // Main cache storage
32    cache: Arc<DashMap<PathBuf, CachedEntry, ahash::RandomState>>,
33
34    // Cache configuration
35    max_size_bytes: usize,
36    current_size_bytes: Arc<parking_lot::Mutex<usize>>,
37    eviction_threshold: f64,
38
39    // Statistics
40    hits: Arc<parking_lot::Mutex<u64>>,
41    misses: Arc<parking_lot::Mutex<u64>>,
42}
43
44/// Internal cache entry
45#[derive(Debug, Clone)]
46#[allow(dead_code)]
47struct CachedEntry {
48    key: CacheKey,
49    result: CachedResult,
50    size_bytes: usize,
51    last_accessed: SystemTime,
52}
53
54impl SecurityCache {
55    /// Create a new cache with specified size in MB
56    pub fn new(size_mb: usize) -> Self {
57        let max_size_bytes = size_mb * 1024 * 1024;
58        let hasher = ahash::RandomState::new();
59
60        Self {
61            cache: Arc::new(DashMap::with_hasher(hasher)),
62            max_size_bytes,
63            current_size_bytes: Arc::new(parking_lot::Mutex::new(0)),
64            eviction_threshold: 0.9, // Start eviction at 90% capacity
65            hits: Arc::new(parking_lot::Mutex::new(0)),
66            misses: Arc::new(parking_lot::Mutex::new(0)),
67        }
68    }
69
70    /// Get cached result for a file
71    pub fn get(&self, file_path: &PathBuf) -> Option<Vec<SecurityFinding>> {
72        let entry = self.cache.get_mut(file_path)?;
73
74        // Update access statistics
75        let mut entry = entry;
76        entry.last_accessed = SystemTime::now();
77        entry.result.access_count += 1;
78
79        *self.hits.lock() += 1;
80        trace!("Cache hit for: {}", file_path.display());
81
82        Some(entry.result.findings.clone())
83    }
84
85    /// Insert a scan result into cache
86    pub fn insert(&self, file_path: PathBuf, findings: Vec<SecurityFinding>) {
87        // Calculate entry size
88        let size_bytes = Self::estimate_size(&findings);
89
90        // Check if we need to evict entries
91        let current_size = *self.current_size_bytes.lock();
92        if current_size + size_bytes
93            > (self.max_size_bytes as f64 * self.eviction_threshold) as usize
94        {
95            self.evict_lru();
96        }
97
98        // Create cache key
99        let key = CacheKey {
100            file_path: file_path.clone(),
101        };
102
103        // Create cache entry
104        let entry = CachedEntry {
105            key,
106            result: CachedResult {
107                findings,
108                cached_at: SystemTime::now(),
109                access_count: 1,
110            },
111            size_bytes,
112            last_accessed: SystemTime::now(),
113        };
114
115        // Insert into cache
116        if let Some(old_entry) = self.cache.insert(file_path, entry) {
117            // Subtract old entry size
118            *self.current_size_bytes.lock() -= old_entry.size_bytes;
119        }
120
121        // Add new entry size
122        *self.current_size_bytes.lock() += size_bytes;
123
124        debug!(
125            "Cached result, current size: {} MB",
126            *self.current_size_bytes.lock() / (1024 * 1024)
127        );
128    }
129
130    /// Clear the entire cache
131    pub fn clear(&self) {
132        self.cache.clear();
133        *self.current_size_bytes.lock() = 0;
134        *self.hits.lock() = 0;
135        *self.misses.lock() = 0;
136        debug!("Cache cleared");
137    }
138
139    /// Get cache statistics
140    pub fn stats(&self) -> CacheStats {
141        let hits = *self.hits.lock();
142        let misses = *self.misses.lock();
143        let total = hits + misses;
144
145        CacheStats {
146            hits,
147            misses,
148            hit_rate: if total > 0 {
149                hits as f64 / total as f64
150            } else {
151                0.0
152            },
153            entries: self.cache.len(),
154            size_bytes: *self.current_size_bytes.lock(),
155            capacity_bytes: self.max_size_bytes,
156        }
157    }
158
159    /// Evict least recently used entries
160    fn evict_lru(&self) {
161        let target_size = (self.max_size_bytes as f64 * 0.7) as usize; // Evict to 70% capacity
162        let mut entries_to_remove = Vec::new();
163
164        // Collect entries sorted by last access time
165        let mut entries: Vec<(PathBuf, SystemTime, usize)> = self
166            .cache
167            .iter()
168            .map(|entry| (entry.key().clone(), entry.last_accessed, entry.size_bytes))
169            .collect();
170
171        // Sort by last accessed (oldest first)
172        entries.sort_by_key(|(_, last_accessed, _)| *last_accessed);
173
174        // Determine which entries to remove
175        let mut current_size = *self.current_size_bytes.lock();
176        for (path, _, size) in entries {
177            if current_size <= target_size {
178                break;
179            }
180
181            entries_to_remove.push(path);
182            current_size -= size;
183        }
184
185        // Count entries to remove
186        let entries_removed = entries_to_remove.len();
187
188        // Remove entries
189        for path in entries_to_remove {
190            if let Some((_, entry)) = self.cache.remove(&path) {
191                *self.current_size_bytes.lock() -= entry.size_bytes;
192            }
193        }
194
195        debug!(
196            "Evicted {} entries, new size: {} MB",
197            entries_removed,
198            *self.current_size_bytes.lock() / (1024 * 1024)
199        );
200    }
201
202    /// Estimate memory size of findings
203    fn estimate_size(findings: &[SecurityFinding]) -> usize {
204        // Base size for the vector
205        let mut size = std::mem::size_of::<Vec<SecurityFinding>>();
206
207        // Add size for each finding
208        for finding in findings {
209            size += std::mem::size_of::<SecurityFinding>();
210
211            // Add string sizes
212            size += finding.id.len();
213            size += finding.title.len();
214            size += finding.description.len();
215
216            if let Some(ref path) = finding.file_path {
217                size += path.to_string_lossy().len();
218            }
219
220            if let Some(ref evidence) = finding.evidence {
221                size += evidence.len();
222            }
223
224            // Add vector sizes
225            size += finding.remediation.iter().map(|s| s.len()).sum::<usize>();
226            size += finding.references.iter().map(|s| s.len()).sum::<usize>();
227            size += finding
228                .compliance_frameworks
229                .iter()
230                .map(|s| s.len())
231                .sum::<usize>();
232
233            if let Some(ref cwe) = finding.cwe_id {
234                size += cwe.len();
235            }
236        }
237
238        size
239    }
240
241    /// Invalidate cache entries older than duration
242    pub fn invalidate_older_than(&self, duration: Duration) {
243        let cutoff = SystemTime::now() - duration;
244        let mut removed = 0;
245
246        self.cache.retain(|_, entry| {
247            if entry.result.cached_at < cutoff {
248                *self.current_size_bytes.lock() -= entry.size_bytes;
249                removed += 1;
250                false
251            } else {
252                true
253            }
254        });
255
256        if removed > 0 {
257            debug!("Invalidated {} stale cache entries", removed);
258        }
259    }
260}
261
262/// Cache statistics
263#[derive(Debug, Clone)]
264pub struct CacheStats {
265    pub hits: u64,
266    pub misses: u64,
267    pub hit_rate: f64,
268    pub entries: usize,
269    pub size_bytes: usize,
270    pub capacity_bytes: usize,
271}
272
273impl CacheStats {
274    /// Get human-readable size
275    pub fn size_mb(&self) -> f64 {
276        self.size_bytes as f64 / (1024.0 * 1024.0)
277    }
278
279    /// Get capacity utilization percentage
280    pub fn utilization(&self) -> f64 {
281        if self.capacity_bytes == 0 {
282            0.0
283        } else {
284            (self.size_bytes as f64 / self.capacity_bytes as f64) * 100.0
285        }
286    }
287}
288
289#[cfg(test)]
290mod tests {
291    use super::*;
292    use crate::analyzer::security::{SecurityCategory, SecuritySeverity};
293
294    #[test]
295    fn test_cache_basic_operations() {
296        let cache = SecurityCache::new(10); // 10MB cache
297
298        let path = PathBuf::from("/test/file.js");
299        let findings = vec![SecurityFinding {
300            id: "test-1".to_string(),
301            title: "Test Finding".to_string(),
302            description: "Test description".to_string(),
303            severity: SecuritySeverity::High,
304            category: SecurityCategory::SecretsExposure,
305            file_path: Some(path.clone()),
306            line_number: Some(10),
307            column_number: Some(5),
308            evidence: Some("evidence".to_string()),
309            remediation: vec!["Fix it".to_string()],
310            references: vec!["https://example.com".to_string()],
311            cwe_id: Some("CWE-798".to_string()),
312            compliance_frameworks: vec!["SOC2".to_string()],
313        }];
314
315        // Test insert
316        cache.insert(path.clone(), findings.clone());
317
318        // Test get
319        let cached = cache.get(&path);
320        assert!(cached.is_some());
321        assert_eq!(cached.unwrap().len(), 1);
322
323        // Test stats
324        let stats = cache.stats();
325        assert_eq!(stats.hits, 1);
326        assert_eq!(stats.misses, 0);
327        assert_eq!(stats.entries, 1);
328    }
329
330    #[test]
331    #[ignore] // Flaky - cache eviction timing depends on system memory
332    fn test_cache_eviction() {
333        let cache = SecurityCache::new(1); // 1MB cache (small for testing)
334
335        // Insert many entries to trigger eviction
336        for i in 0..1000 {
337            let path = PathBuf::from(format!("/test/file{}.js", i));
338            let findings = vec![SecurityFinding {
339                id: format!("test-{}", i),
340                title: "Test Finding with very long title to consume memory".to_string(),
341                description: "Test description that is also quite long to use up cache space"
342                    .to_string(),
343                severity: SecuritySeverity::High,
344                category: SecurityCategory::SecretsExposure,
345                file_path: Some(path.clone()),
346                line_number: Some(10),
347                column_number: Some(5),
348                evidence: Some("evidence with long content to test memory usage".to_string()),
349                remediation: vec!["Fix it with a long remediation message".to_string()],
350                references: vec!["https://example.com/very/long/url/path".to_string()],
351                cwe_id: Some("CWE-798".to_string()),
352                compliance_frameworks: vec!["SOC2".to_string(), "GDPR".to_string()],
353            }];
354
355            cache.insert(path, findings);
356        }
357
358        // Cache should have evicted some entries
359        let stats = cache.stats();
360        assert!(stats.entries < 1000);
361        assert!(stats.utilization() <= 90.0);
362    }
363
364    #[test]
365    fn test_cache_invalidation() {
366        let cache = SecurityCache::new(10);
367
368        let path = PathBuf::from("/test/file.js");
369        let findings = vec![];
370
371        cache.insert(path.clone(), findings);
372
373        // Invalidate entries older than 0 seconds (all entries)
374        cache.invalidate_older_than(Duration::from_secs(0));
375
376        // Cache should be empty
377        assert!(cache.get(&path).is_none());
378        assert_eq!(cache.stats().entries, 0);
379    }
380}