syncable_cli/analyzer/security/turbo/
mod.rs

1//! # Turbo Security Analyzer
2//! 
3//! High-performance security analyzer that's 10-100x faster than traditional approaches.
4//! Uses advanced techniques like multi-pattern matching, memory-mapped I/O, and intelligent filtering.
5
6use std::path::Path;
7use std::sync::Arc;
8use std::time::Instant;
9
10use crossbeam::channel::bounded;
11
12use rayon::prelude::*;
13use log::{info, debug, trace};
14
15pub mod file_discovery;
16pub mod pattern_engine;
17pub mod cache;
18pub mod scanner;
19pub mod results;
20
21use file_discovery::{FileDiscovery, FileMetadata, DiscoveryConfig};
22use pattern_engine::PatternEngine;
23use cache::SecurityCache;
24use scanner::{FileScanner, ScanTask, ScanResult};
25use results::{ResultAggregator, SecurityReport};
26
27use crate::analyzer::security::SecurityFinding;
28
29/// Turbo security analyzer configuration
30#[derive(Debug, Clone)]
31pub struct TurboConfig {
32    /// Scanning mode determines speed vs thoroughness tradeoff
33    pub scan_mode: ScanMode,
34    
35    /// Maximum file size to scan (in bytes)
36    pub max_file_size: usize,
37    
38    /// Number of worker threads (0 = auto-detect)
39    pub worker_threads: usize,
40    
41    /// Enable memory mapping for large files
42    pub use_mmap: bool,
43    
44    /// Cache configuration
45    pub enable_cache: bool,
46    pub cache_size_mb: usize,
47    
48    /// Early termination
49    pub max_critical_findings: Option<usize>,
50    pub timeout_seconds: Option<u64>,
51    
52    /// File filtering
53    pub skip_gitignored: bool,
54    pub priority_extensions: Vec<String>,
55    
56    /// Pattern configuration
57    pub pattern_sets: Vec<String>,
58}
59
60/// Scanning modes with different speed/accuracy tradeoffs
61#[derive(Debug, Clone, Copy, PartialEq, Eq)]
62pub enum ScanMode {
63    /// Ultra-fast: Critical files only (.env, configs), basic patterns
64    Lightning,
65    
66    /// Fast: Smart sampling, priority patterns, skip large files
67    Fast,
68    
69    /// Balanced: Good coverage with performance optimizations
70    Balanced,
71    
72    /// Thorough: Full scan with all patterns (still optimized)
73    Thorough,
74    
75    /// Paranoid: Everything including experimental patterns
76    Paranoid,
77}
78
79impl Default for TurboConfig {
80    fn default() -> Self {
81        Self {
82            scan_mode: ScanMode::Balanced,
83            max_file_size: 10 * 1024 * 1024, // 10MB
84            worker_threads: 0, // Auto-detect
85            use_mmap: true,
86            enable_cache: true,
87            cache_size_mb: 100,
88            max_critical_findings: None,
89            timeout_seconds: None,
90            skip_gitignored: true,
91            priority_extensions: vec![
92                "env".to_string(),
93                "key".to_string(),
94                "pem".to_string(),
95                "json".to_string(),
96                "yml".to_string(),
97                "yaml".to_string(),
98                "toml".to_string(),
99                "ini".to_string(),
100                "conf".to_string(),
101                "config".to_string(),
102            ],
103            pattern_sets: vec!["default".to_string()],
104        }
105    }
106}
107
108/// High-performance security analyzer
109pub struct TurboSecurityAnalyzer {
110    config: TurboConfig,
111    pattern_engine: Arc<PatternEngine>,
112    cache: Arc<SecurityCache>,
113    file_discovery: Arc<FileDiscovery>,
114}
115
116impl TurboSecurityAnalyzer {
117    /// Create a new turbo security analyzer
118    pub fn new(config: TurboConfig) -> Result<Self, SecurityError> {
119        let start = Instant::now();
120        
121        // Initialize pattern engine with compiled patterns
122        let pattern_engine = Arc::new(PatternEngine::new(&config)?);
123        info!("Pattern engine initialized with {} patterns in {:?}", 
124              pattern_engine.pattern_count(), start.elapsed());
125        
126        // Initialize cache
127        let cache = Arc::new(SecurityCache::new(config.cache_size_mb));
128        
129        // Initialize file discovery
130        let discovery_config = DiscoveryConfig {
131            use_git: config.skip_gitignored,
132            max_file_size: config.max_file_size,
133            priority_extensions: config.priority_extensions.clone(),
134            scan_mode: config.scan_mode,
135        };
136        let file_discovery = Arc::new(FileDiscovery::new(discovery_config));
137        
138        Ok(Self {
139            config,
140            pattern_engine,
141            cache,
142            file_discovery,
143        })
144    }
145    
146    /// Analyze a project with turbo performance
147    pub fn analyze_project(&self, project_root: &Path) -> Result<SecurityReport, SecurityError> {
148        let start = Instant::now();
149        info!("🚀 Starting turbo security analysis for: {}", project_root.display());
150        
151        // Phase 1: Ultra-fast file discovery
152        let discovery_start = Instant::now();
153        let files = self.file_discovery.discover_files(project_root)?;
154        info!("📁 Discovered {} files in {:?}", files.len(), discovery_start.elapsed());
155        
156        // Early exit if no files
157        if files.is_empty() {
158            return Ok(SecurityReport::empty());
159        }
160        
161        // Phase 2: Intelligent filtering and prioritization
162        let filtered_files = self.filter_and_prioritize_files(files);
163        info!("🎯 Filtered to {} high-priority files", filtered_files.len());
164        
165        // Phase 3: Parallel scanning with work-stealing
166        let scan_start = Instant::now();
167        let (findings, files_scanned) = self.parallel_scan(filtered_files)?;
168        info!("🔍 Scanned files in {:?}, found {} findings",
169              scan_start.elapsed(), findings.len());
170
171        // Phase 4: Result aggregation and report generation
172        let report = ResultAggregator::aggregate(findings, start.elapsed(), files_scanned);
173        
174        info!("✅ Turbo analysis completed in {:?}", start.elapsed());
175        Ok(report)
176    }
177    
178    /// Filter and prioritize files based on scan mode and heuristics
179    fn filter_and_prioritize_files(&self, files: Vec<FileMetadata>) -> Vec<FileMetadata> {
180        use ScanMode::*;
181        
182        let mut filtered: Vec<FileMetadata> = match self.config.scan_mode {
183            Lightning => {
184                // Ultra-fast: Only critical files
185                files.into_iter()
186                    .filter(|f| f.is_critical())
187                    .take(100) // Hard limit for speed
188                    .collect()
189            }
190            Fast => {
191                // Fast: Priority files + sample of others
192                let (priority, others): (Vec<_>, Vec<_>) = files.into_iter()
193                    .partition(|f| f.is_priority());
194                
195                let mut result = priority;
196                // Sample 20% of other files
197                let sample_size = others.len() / 5;
198                result.extend(others.into_iter().take(sample_size));
199                result
200            }
201            Balanced => {
202                // Balanced: All priority files + 50% of others
203                let (priority, others): (Vec<_>, Vec<_>) = files.into_iter()
204                    .partition(|f| f.is_priority());
205                
206                let mut result = priority;
207                let sample_size = others.len() / 2;
208                result.extend(others.into_iter().take(sample_size));
209                result
210            }
211            Thorough => {
212                // Thorough: All files except huge ones
213                files.into_iter()
214                    .filter(|f| f.size < self.config.max_file_size)
215                    .collect()
216            }
217            Paranoid => {
218                // Paranoid: Everything
219                files
220            }
221        };
222        
223        // Sort by priority score (critical files first)
224        filtered.par_sort_by_key(|f| std::cmp::Reverse(f.priority_score()));
225        filtered
226    }
227    
228    /// Parallel scan with work-stealing and early termination
229    fn parallel_scan(&self, files: Vec<FileMetadata>) -> Result<(Vec<SecurityFinding>, usize), SecurityError> {
230        let thread_count = if self.config.worker_threads == 0 {
231            num_cpus::get()
232        } else {
233            self.config.worker_threads
234        };
235        
236        // Create channels for work distribution
237        let (task_sender, task_receiver) = bounded::<ScanTask>(thread_count * 10);
238        let (result_sender, result_receiver) = bounded::<ScanResult>(thread_count * 10);
239        
240        // Atomic counter for early termination
241        let critical_count = Arc::new(parking_lot::Mutex::new(0));
242        let should_terminate = Arc::new(parking_lot::RwLock::new(false));
243        
244        // Spawn scanner threads
245        let scanner_handles: Vec<_> = (0..thread_count)
246            .map(|thread_id| {
247                let scanner = FileScanner::new(
248                    thread_id,
249                    Arc::clone(&self.pattern_engine),
250                    Arc::clone(&self.cache),
251                    self.config.use_mmap,
252                );
253                
254                let task_receiver = task_receiver.clone();
255                let result_sender = result_sender.clone();
256                let critical_count = Arc::clone(&critical_count);
257                let should_terminate = Arc::clone(&should_terminate);
258                let max_critical = self.config.max_critical_findings;
259                
260                std::thread::spawn(move || {
261                    scanner.run(
262                        task_receiver,
263                        result_sender,
264                        critical_count,
265                        should_terminate,
266                        max_critical,
267                    )
268                })
269            })
270            .collect();
271        
272        // Drop original receiver to signal completion
273        drop(task_receiver);
274        
275        // Send scan tasks
276        let task_sender_thread = {
277            let task_sender = task_sender.clone();
278            let should_terminate = Arc::clone(&should_terminate);
279            
280            std::thread::spawn(move || {
281                for (idx, file) in files.into_iter().enumerate() {
282                    // Check for early termination
283                    if *should_terminate.read() {
284                        debug!("Early termination triggered, stopping task distribution");
285                        break;
286                    }
287                    
288                    let task = ScanTask {
289                        id: idx,
290                        file,
291                        quick_reject: idx > 1000, // Quick reject for files after first 1000
292                    };
293                    
294                    if task_sender.send(task).is_err() {
295                        break; // Channel closed
296                    }
297                }
298            })
299        };
300        
301        // Drop original sender to signal completion
302        drop(task_sender);
303        drop(result_sender);
304        
305        // Collect results
306        let mut all_findings = Vec::new();
307        let mut files_scanned = 0;
308        let mut files_skipped = 0;
309        
310        while let Ok(result) = result_receiver.recv() {
311            match result {
312                ScanResult::Findings(findings) => {
313                    all_findings.extend(findings);
314                    files_scanned += 1;
315                }
316                ScanResult::Skipped => {
317                    files_skipped += 1;
318                }
319                ScanResult::Error(err) => {
320                    debug!("Scan error: {}", err);
321                }
322            }
323            
324            // Progress reporting every 100 files
325            if (files_scanned + files_skipped) % 100 == 0 {
326                trace!("Progress: {} scanned, {} skipped", files_scanned, files_skipped);
327            }
328        }
329        
330        // Wait for threads to complete
331        task_sender_thread.join().unwrap();
332        for handle in scanner_handles {
333            handle.join().unwrap();
334        }
335        
336        info!("Scan complete: {} files scanned, {} skipped, {} findings",
337              files_scanned, files_skipped, all_findings.len());
338
339        Ok((all_findings, files_scanned))
340    }
341}
342
343#[derive(Debug, thiserror::Error)]
344pub enum SecurityError {
345    #[error("Pattern engine error: {0}")]
346    PatternEngine(String),
347    
348    #[error("File discovery error: {0}")]
349    FileDiscovery(String),
350    
351    #[error("IO error: {0}")]
352    Io(#[from] std::io::Error),
353    
354    #[error("Cache error: {0}")]
355    Cache(String),
356}
357
358#[cfg(test)]
359mod tests {
360    use super::*;
361    use tempfile::TempDir;
362    use std::fs;
363    
364    #[test]
365    fn test_turbo_analyzer_creation() {
366        let config = TurboConfig::default();
367        let analyzer = TurboSecurityAnalyzer::new(config);
368        assert!(analyzer.is_ok());
369    }
370    
371    #[test]
372    fn test_scan_modes() {
373        let temp_dir = TempDir::new().unwrap();
374        
375        // Create test files
376        fs::write(temp_dir.path().join(".env"), "API_KEY=secret123").unwrap();
377        fs::write(temp_dir.path().join("config.json"), r#"{"key": "value"}"#).unwrap();
378        fs::write(temp_dir.path().join("main.rs"), "fn main() {}").unwrap();
379        
380        // Test Lightning mode (should only scan critical files)
381        let mut config = TurboConfig::default();
382        config.scan_mode = ScanMode::Lightning;
383        
384        let analyzer = TurboSecurityAnalyzer::new(config).unwrap();
385        let report = analyzer.analyze_project(temp_dir.path()).unwrap();
386        
387        // Should find the .env file
388        assert!(report.total_findings > 0);
389    }
390}