Skip to main content

rma_analyzer/
cache.rs

1//! Analysis Cache for Incremental Scanning
2//!
3//! Caches analysis results based on file content hashes to avoid
4//! re-analyzing unchanged files. This can reduce scan time by 80-90%
5//! for repeated scans of the same codebase.
6//!
7//! # Cache Structure
8//!
9//! ```text
10//! .qryon/cache/
11//!   analysis/
12//!     {content_hash}.json  # Per-file analysis results
13//!   manifest.json          # File path -> hash mapping
14//! ```
15
16use crate::FileAnalysis;
17use anyhow::Result;
18use rma_common::Language;
19use serde::{Deserialize, Serialize};
20use std::collections::HashMap;
21use std::fs;
22use std::hash::{DefaultHasher, Hash, Hasher};
23use std::path::{Path, PathBuf};
24use std::time::SystemTime;
25
26/// Fast content hash using DefaultHasher (FxHash-based)
27/// Good enough for cache keys, not cryptographic
28pub fn hash_content(content: &str) -> u64 {
29    let mut hasher = DefaultHasher::new();
30    content.hash(&mut hasher);
31    hasher.finish()
32}
33
34/// Cache manifest tracking file -> hash mappings
35#[derive(Debug, Default, Serialize, Deserialize)]
36pub struct CacheManifest {
37    /// Map of file path -> (content hash, last modified time)
38    pub files: HashMap<PathBuf, CacheEntry>,
39    /// Version of cache format (for invalidation on schema changes)
40    pub version: u32,
41}
42
43/// Entry for a single cached file
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct CacheEntry {
46    /// Hash of file content
47    pub content_hash: u64,
48    /// Last modified time (for quick staleness check)
49    pub mtime: u64,
50    /// Whether file was analyzed (vs just parsed)
51    pub analyzed: bool,
52}
53
54/// Summary of cached file analysis results
55///
56/// This is a lightweight summary stored in memory for quick lookups.
57/// The full FileAnalysis is stored on disk in `.qryon/cache/analysis/{hash}.json`.
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct CachedFileAnalysis {
60    /// Number of findings in this file
61    pub findings_count: usize,
62    /// Whether any finding has Critical severity
63    pub has_critical: bool,
64    /// Programming language of the file
65    pub language: Language,
66    /// Summary of code metrics
67    pub metrics_summary: MetricsSummary,
68}
69
70/// Lightweight metrics summary for cache entries
71#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct MetricsSummary {
73    pub lines_of_code: usize,
74    pub cyclomatic_complexity: usize,
75    pub function_count: usize,
76}
77
78impl CacheManifest {
79    const CURRENT_VERSION: u32 = 1;
80
81    /// Load manifest from cache directory
82    pub fn load(cache_dir: &Path) -> Result<Self> {
83        let manifest_path = cache_dir.join("manifest.json");
84        if manifest_path.exists() {
85            let content = fs::read_to_string(&manifest_path)?;
86            let manifest: Self = serde_json::from_str(&content)?;
87            if manifest.version == Self::CURRENT_VERSION {
88                return Ok(manifest);
89            }
90        }
91        Ok(Self::default())
92    }
93
94    /// Save manifest to cache directory
95    pub fn save(&self, cache_dir: &Path) -> Result<()> {
96        fs::create_dir_all(cache_dir)?;
97        let manifest_path = cache_dir.join("manifest.json");
98        let content = serde_json::to_string_pretty(self)?;
99        fs::write(manifest_path, content)?;
100        Ok(())
101    }
102
103    /// Check if a file needs re-analysis
104    ///
105    /// Uses a two-level check:
106    /// 1. Fast path: if mtime changed, assume content changed (most common case)
107    /// 2. Slow path: compare content hash (handles edge cases like `touch`)
108    pub fn needs_analysis(&self, path: &Path, content: &str, mtime: SystemTime) -> bool {
109        let mtime_secs = mtime
110            .duration_since(SystemTime::UNIX_EPOCH)
111            .map(|d| d.as_secs())
112            .unwrap_or(0);
113
114        match self.files.get(path) {
115            Some(entry) => {
116                if !entry.analyzed {
117                    return true; // Was parsed but not analyzed
118                }
119                // Fast check: mtime changed -> definitely need to re-analyze
120                if entry.mtime != mtime_secs {
121                    return true;
122                }
123                // Even if mtime is same, check content hash (handles weird edge cases)
124                // In practice, this branch is rarely taken
125                let new_hash = hash_content(content);
126                entry.content_hash != new_hash
127            }
128            None => true, // Never seen this file
129        }
130    }
131
132    /// Update cache entry for a file
133    pub fn update(&mut self, path: PathBuf, content: &str, mtime: SystemTime) {
134        let mtime_secs = mtime
135            .duration_since(SystemTime::UNIX_EPOCH)
136            .map(|d| d.as_secs())
137            .unwrap_or(0);
138
139        self.files.insert(
140            path,
141            CacheEntry {
142                content_hash: hash_content(content),
143                mtime: mtime_secs,
144                analyzed: true,
145            },
146        );
147    }
148
149    /// Get number of cached files
150    pub fn len(&self) -> usize {
151        self.files.len()
152    }
153
154    /// Check if cache is empty
155    pub fn is_empty(&self) -> bool {
156        self.files.is_empty()
157    }
158
159    /// Clear all entries
160    pub fn clear(&mut self) {
161        self.files.clear();
162    }
163}
164
165/// Analysis cache manager
166pub struct AnalysisCache {
167    cache_dir: PathBuf,
168    manifest: CacheManifest,
169    enabled: bool,
170}
171
172impl AnalysisCache {
173    /// Create a new cache manager
174    pub fn new(project_root: &Path) -> Self {
175        let cache_dir = project_root.join(".rma").join("cache").join("analysis");
176        let manifest = CacheManifest::load(&cache_dir).unwrap_or_default();
177        Self {
178            cache_dir,
179            manifest,
180            enabled: true,
181        }
182    }
183
184    /// Create a disabled cache (for testing or --no-cache flag)
185    pub fn disabled() -> Self {
186        Self {
187            cache_dir: PathBuf::new(),
188            manifest: CacheManifest::default(),
189            enabled: false,
190        }
191    }
192
193    /// Check if file needs re-analysis
194    pub fn needs_analysis(&self, path: &Path, content: &str, mtime: SystemTime) -> bool {
195        if !self.enabled {
196            return true;
197        }
198        self.manifest.needs_analysis(path, content, mtime)
199    }
200
201    /// Mark file as analyzed
202    pub fn mark_analyzed(&mut self, path: PathBuf, content: &str, mtime: SystemTime) {
203        if self.enabled {
204            self.manifest.update(path, content, mtime);
205        }
206    }
207
208    /// Save cache to disk
209    pub fn save(&self) -> Result<()> {
210        if self.enabled {
211            self.manifest.save(&self.cache_dir)?;
212        }
213        Ok(())
214    }
215
216    /// Get cache stats
217    pub fn stats(&self) -> (usize, bool) {
218        (self.manifest.len(), self.enabled)
219    }
220
221    /// Check if cache is enabled
222    pub fn is_enabled(&self) -> bool {
223        self.enabled
224    }
225
226    /// Get the content hash for a file path
227    pub fn get_content_hash(&self, path: &Path) -> Option<u64> {
228        self.manifest.files.get(path).map(|e| e.content_hash)
229    }
230
231    /// Store FileAnalysis results to disk cache
232    pub fn store_analysis(
233        &self,
234        _path: &Path,
235        content: &str,
236        analysis: &FileAnalysis,
237    ) -> Result<()> {
238        if !self.enabled {
239            return Ok(());
240        }
241        let content_hash = hash_content(content);
242        let cache_file = self.cache_dir.join(format!("{}.json", content_hash));
243        fs::create_dir_all(&self.cache_dir)?;
244        let json = serde_json::to_string(analysis)?;
245        fs::write(cache_file, json)?;
246        Ok(())
247    }
248
249    /// Load FileAnalysis results from disk cache
250    pub fn load_analysis(&self, path: &Path, content: &str) -> Option<FileAnalysis> {
251        let content_hash = hash_content(content);
252        self.load_analysis_by_hash(path, content_hash)
253    }
254
255    /// Save analysis results to cache (alias for store_analysis)
256    ///
257    /// Stores the full FileAnalysis to `.qryon/cache/analysis/{hash}.json`
258    /// where hash is the content hash of the source file.
259    pub fn save_analysis(&self, path: &Path, hash: u64, analysis: &FileAnalysis) -> Result<()> {
260        if !self.enabled {
261            return Ok(());
262        }
263        let cache_file = self.cache_dir.join(format!("{}.json", hash));
264        fs::create_dir_all(&self.cache_dir)?;
265        let json = serde_json::to_string(analysis)?;
266        fs::write(cache_file, json)?;
267        let _ = path; // path reserved for future use (e.g., logging)
268        Ok(())
269    }
270
271    /// Load cached analysis results by hash
272    ///
273    /// Returns the cached FileAnalysis if it exists and matches the given hash.
274    /// Returns None if cache miss or cache is disabled.
275    pub fn load_analysis_by_hash(&self, _path: &Path, hash: u64) -> Option<FileAnalysis> {
276        if !self.enabled {
277            return None;
278        }
279        let cache_file = self.cache_dir.join(format!("{}.json", hash));
280        if cache_file.exists()
281            && let Ok(json) = fs::read_to_string(&cache_file)
282            && let Ok(analysis) = serde_json::from_str::<FileAnalysis>(&json)
283        {
284            return Some(analysis);
285        }
286        None
287    }
288
289    /// Get a summary of cached analysis without loading full results
290    ///
291    /// Useful for quick checks without deserializing the full findings list.
292    pub fn get_analysis_summary(&self, path: &Path, hash: u64) -> Option<CachedFileAnalysis> {
293        let analysis = self.load_analysis_by_hash(path, hash)?;
294
295        let has_critical = analysis
296            .findings
297            .iter()
298            .any(|f| f.severity == rma_common::Severity::Critical);
299
300        Some(CachedFileAnalysis {
301            findings_count: analysis.findings.len(),
302            has_critical,
303            language: analysis.language,
304            metrics_summary: MetricsSummary {
305                lines_of_code: analysis.metrics.lines_of_code,
306                cyclomatic_complexity: analysis.metrics.cyclomatic_complexity,
307                function_count: analysis.metrics.function_count,
308            },
309        })
310    }
311}
312
313#[cfg(test)]
314mod tests {
315    use super::*;
316    use rma_common::{CodeMetrics, Finding, Severity};
317    use std::time::Duration;
318
319    #[test]
320    fn test_hash_content() {
321        let h1 = hash_content("hello world");
322        let h2 = hash_content("hello world");
323        let h3 = hash_content("hello world!");
324
325        assert_eq!(h1, h2);
326        assert_ne!(h1, h3);
327    }
328
329    #[test]
330    fn test_cache_manifest() {
331        let mut manifest = CacheManifest::default();
332        let path = PathBuf::from("/test/file.rs");
333        let content = "fn main() {}";
334        let mtime = SystemTime::UNIX_EPOCH + Duration::from_secs(1000);
335
336        // Initially needs analysis
337        assert!(manifest.needs_analysis(&path, content, mtime));
338
339        // After update, doesn't need analysis
340        manifest.update(path.clone(), content, mtime);
341        assert!(!manifest.needs_analysis(&path, content, mtime));
342
343        // Changed content needs analysis
344        assert!(manifest.needs_analysis(&path, "fn main() { panic!() }", mtime));
345    }
346
347    #[test]
348    fn test_save_and_load_analysis() {
349        let temp_dir = std::env::temp_dir().join("rma_cache_test");
350        let _ = std::fs::remove_dir_all(&temp_dir);
351
352        let cache = AnalysisCache::new(&temp_dir);
353        let path = PathBuf::from("/test/file.rs");
354        let content = "fn main() {}";
355        let hash = hash_content(content);
356
357        // Create a test FileAnalysis
358        let analysis = FileAnalysis {
359            path: path.to_string_lossy().to_string(),
360            language: Language::Rust,
361            metrics: CodeMetrics {
362                lines_of_code: 10,
363                lines_of_comments: 2,
364                blank_lines: 1,
365                cyclomatic_complexity: 3,
366                cognitive_complexity: 2,
367                function_count: 1,
368                class_count: 0,
369                import_count: 0,
370            },
371            findings: vec![],
372        };
373
374        // Save and load
375        cache.save_analysis(&path, hash, &analysis).unwrap();
376        let loaded = cache.load_analysis_by_hash(&path, hash);
377
378        assert!(loaded.is_some());
379        let loaded = loaded.unwrap();
380        assert_eq!(loaded.path, analysis.path);
381        assert_eq!(loaded.language, Language::Rust);
382        assert_eq!(loaded.metrics.lines_of_code, 10);
383        assert_eq!(loaded.metrics.cyclomatic_complexity, 3);
384
385        // Cleanup
386        let _ = std::fs::remove_dir_all(&temp_dir);
387    }
388
389    #[test]
390    fn test_get_analysis_summary() {
391        let temp_dir = std::env::temp_dir().join("rma_cache_summary_test");
392        let _ = std::fs::remove_dir_all(&temp_dir);
393
394        let cache = AnalysisCache::new(&temp_dir);
395        let path = PathBuf::from("/test/critical.rs");
396        let content = "unsafe fn dangerous() {}";
397        let hash = hash_content(content);
398
399        // Create analysis with a critical finding
400        let analysis = FileAnalysis {
401            path: path.to_string_lossy().to_string(),
402            language: Language::Rust,
403            metrics: CodeMetrics {
404                lines_of_code: 5,
405                lines_of_comments: 0,
406                blank_lines: 0,
407                cyclomatic_complexity: 1,
408                cognitive_complexity: 0,
409                function_count: 1,
410                class_count: 0,
411                import_count: 0,
412            },
413            findings: vec![Finding {
414                id: "test-1".to_string(),
415                rule_id: "test-rule".to_string(),
416                message: "A test finding".to_string(),
417                severity: Severity::Critical,
418                language: Language::Rust,
419                location: rma_common::SourceLocation {
420                    file: path.clone(),
421                    start_line: 1,
422                    start_column: 0,
423                    end_line: 1,
424                    end_column: 10,
425                },
426                snippet: Some("unsafe fn".to_string()),
427                suggestion: None,
428                fix: None,
429                confidence: rma_common::Confidence::default(),
430                category: rma_common::FindingCategory::default(),
431                subcategory: None,
432                technology: None,
433                impact: None,
434                likelihood: None,
435                source: Default::default(),
436                fingerprint: None,
437                properties: None,
438                occurrence_count: None,
439                additional_locations: None,
440                ai_verdict: None,
441                ai_explanation: None,
442                ai_confidence: None,
443            }],
444        };
445
446        cache.save_analysis(&path, hash, &analysis).unwrap();
447        let summary = cache.get_analysis_summary(&path, hash);
448
449        assert!(summary.is_some());
450        let summary = summary.unwrap();
451        assert_eq!(summary.findings_count, 1);
452        assert!(summary.has_critical);
453        assert_eq!(summary.language, Language::Rust);
454        assert_eq!(summary.metrics_summary.lines_of_code, 5);
455        assert_eq!(summary.metrics_summary.function_count, 1);
456
457        // Cleanup
458        let _ = std::fs::remove_dir_all(&temp_dir);
459    }
460
461    #[test]
462    fn test_disabled_cache() {
463        let cache = AnalysisCache::disabled();
464        let path = PathBuf::from("/test/file.rs");
465        let hash = 12345u64;
466
467        let analysis = FileAnalysis {
468            path: path.to_string_lossy().to_string(),
469            language: Language::Rust,
470            metrics: CodeMetrics::default(),
471            findings: vec![],
472        };
473
474        // Save should succeed but not actually save
475        cache.save_analysis(&path, hash, &analysis).unwrap();
476
477        // Load should return None
478        let loaded = cache.load_analysis_by_hash(&path, hash);
479        assert!(loaded.is_none());
480
481        // Summary should return None
482        let summary = cache.get_analysis_summary(&path, hash);
483        assert!(summary.is_none());
484    }
485}