rust_guardian/analyzer/
mod.rs

1//! Main analysis orchestrator for Rust Guardian
2//!
3//! Code Quality Principle: Service Orchestration - Analyzer orchestrates complex validation workflows
4//! - Coordinates path filtering, pattern matching, and result aggregation
5//! - Provides clean interface for validating single files or directory trees
6//! - Handles parallel processing and error recovery gracefully
7
8pub mod rust;
9
10use crate::analyzer::rust::RustAnalyzer;
11use crate::config::GuardianConfig;
12use crate::domain::violations::{GuardianError, GuardianResult, ValidationReport, Violation};
13use crate::patterns::{PathFilter, PatternEngine};
14use rayon::prelude::*;
15use std::fs;
16use std::path::{Path, PathBuf};
17use std::sync::{Arc, Mutex};
18use std::time::Instant;
19
20/// Main analyzer that orchestrates the entire validation process
21pub struct Analyzer {
22    /// Configuration for this analysis
23    config: GuardianConfig,
24    /// Pattern engine for detecting violations
25    pattern_engine: PatternEngine,
26    /// Path filter for determining which files to analyze
27    path_filter: PathFilter,
28    /// Rust-specific analyzer
29    rust_analyzer: RustAnalyzer,
30}
31
32/// Options for customizing analysis behavior
33#[derive(Debug, Clone)]
34pub struct AnalysisOptions {
35    /// Whether to use parallel processing
36    pub parallel: bool,
37    /// Maximum number of files to analyze (-1 for unlimited)
38    pub max_files: Option<usize>,
39    /// Whether to continue on errors or fail fast
40    pub fail_fast: bool,
41    /// Additional paths to exclude for this analysis
42    pub exclude_patterns: Vec<String>,
43    /// Whether to ignore .guardianignore files
44    pub ignore_ignore_files: bool,
45}
46
47impl Default for AnalysisOptions {
48    fn default() -> Self {
49        Self {
50            parallel: true,
51            max_files: None,
52            fail_fast: false,
53            exclude_patterns: Vec::new(),
54            ignore_ignore_files: false,
55        }
56    }
57}
58
59impl Analyzer {
60    /// Create a new analyzer with the given configuration
61    pub fn new(config: GuardianConfig) -> GuardianResult<Self> {
62        let mut pattern_engine = PatternEngine::new();
63
64        // Load all enabled rules into the pattern engine
65        for (category_name, category) in &config.patterns {
66            if !category.enabled {
67                continue;
68            }
69
70            for rule in &category.rules {
71                if !rule.enabled {
72                    continue;
73                }
74
75                let effective_severity = config.effective_severity(category, rule);
76                pattern_engine
77                    .add_rule(rule, effective_severity)
78                    .map_err(|e| {
79                        GuardianError::config(format!(
80                            "Failed to add rule '{}' in category '{}': {}",
81                            rule.id, category_name, e
82                        ))
83                    })?;
84            }
85        }
86
87        // Create path filter
88        let ignore_file = if config.paths.ignore_file.as_deref() == Some("") {
89            None
90        } else {
91            config.paths.ignore_file.clone()
92        };
93
94        let path_filter = PathFilter::new(config.paths.patterns.clone(), ignore_file)
95            .map_err(|e| GuardianError::config(format!("Failed to create path filter: {e}")))?;
96
97        Ok(Self {
98            config,
99            pattern_engine,
100            path_filter,
101            rust_analyzer: RustAnalyzer::new(),
102        })
103    }
104
105    /// Create an analyzer with default configuration
106    pub fn with_defaults() -> GuardianResult<Self> {
107        Self::new(GuardianConfig::default())
108    }
109
110    /// Analyze a single file and return violations
111    pub fn analyze_file<P: AsRef<Path>>(&self, file_path: P) -> GuardianResult<Vec<Violation>> {
112        let file_path = file_path.as_ref();
113
114        // Check if file should be analyzed
115        if !self.path_filter.should_analyze(file_path)? {
116            return Ok(Vec::new());
117        }
118
119        // Read file content
120        let content = fs::read_to_string(file_path).map_err(|e| {
121            GuardianError::analysis(
122                file_path.display().to_string(),
123                format!("Failed to read file: {e}"),
124            )
125        })?;
126
127        let mut all_violations = Vec::new();
128
129        // Apply pattern matching
130        let matches = self
131            .pattern_engine
132            .analyze_file(file_path, &content)
133            .map_err(|e| {
134                GuardianError::analysis(
135                    file_path.display().to_string(),
136                    format!("Pattern analysis failed: {e}"),
137                )
138            })?;
139
140        all_violations.extend(self.pattern_engine.matches_to_violations(matches));
141
142        // Apply Rust-specific analysis for .rs files
143        if self.rust_analyzer.handles_file(file_path) {
144            let rust_violations = self
145                .rust_analyzer
146                .analyze(file_path, &content)
147                .map_err(|e| {
148                    GuardianError::analysis(
149                        file_path.display().to_string(),
150                        format!("Rust analysis failed: {e}"),
151                    )
152                })?;
153            all_violations.extend(rust_violations);
154        }
155
156        Ok(all_violations)
157    }
158
159    /// Analyze multiple files and return a complete validation report
160    pub fn analyze_paths<P: AsRef<Path>>(
161        &self,
162        paths: &[P],
163        options: &AnalysisOptions,
164    ) -> GuardianResult<ValidationReport> {
165        let start_time = Instant::now();
166        let mut report = ValidationReport::new();
167
168        // Collect all files to analyze
169        let mut files_to_analyze = Vec::new();
170
171        for path in paths {
172            let path = path.as_ref();
173
174            if path.is_file() {
175                files_to_analyze.push(path.to_path_buf());
176            } else if path.is_dir() {
177                let discovered_files = self.path_filter.find_files(path)?;
178                files_to_analyze.extend(discovered_files);
179            }
180        }
181
182        // Apply additional exclusions if specified
183        if !options.exclude_patterns.is_empty() {
184            let mut temp_filter = self.path_filter.clone();
185            for pattern in &options.exclude_patterns {
186                temp_filter.add_pattern(pattern.clone())?;
187            }
188            files_to_analyze = temp_filter.filter_paths(&files_to_analyze)?;
189        }
190
191        // Limit number of files if requested
192        if let Some(max_files) = options.max_files {
193            files_to_analyze.truncate(max_files);
194        }
195
196        let total_files = files_to_analyze.len();
197
198        // Analyze files (parallel or sequential)
199        let violations = if options.parallel && files_to_analyze.len() > 1 {
200            self.analyze_files_parallel(&files_to_analyze, options)?
201        } else {
202            self.analyze_files_sequential(&files_to_analyze, options)?
203        };
204
205        // Build final report
206        for violation in violations {
207            report.add_violation(violation);
208        }
209
210        report.set_files_analyzed(total_files);
211        report.set_execution_time(start_time.elapsed().as_millis() as u64);
212        report.set_config_fingerprint(self.config.fingerprint());
213        report.sort_violations();
214
215        Ok(report)
216    }
217
218    /// Analyze files sequentially
219    fn analyze_files_sequential(
220        &self,
221        files: &[PathBuf],
222        options: &AnalysisOptions,
223    ) -> GuardianResult<Vec<Violation>> {
224        let mut all_violations = Vec::new();
225
226        for file_path in files {
227            match self.analyze_file(file_path) {
228                Ok(violations) => {
229                    all_violations.extend(violations);
230                }
231                Err(e) => {
232                    if options.fail_fast {
233                        return Err(e);
234                    } else {
235                        // Log error and continue
236                        tracing::warn!("Failed to analyze {}: {}", file_path.display(), e);
237                    }
238                }
239            }
240        }
241
242        Ok(all_violations)
243    }
244
245    /// Analyze files in parallel
246    fn analyze_files_parallel(
247        &self,
248        files: &[PathBuf],
249        options: &AnalysisOptions,
250    ) -> GuardianResult<Vec<Violation>> {
251        let violations = Arc::new(Mutex::new(Vec::new()));
252        let errors = Arc::new(Mutex::new(Vec::new()));
253
254        files
255            .par_iter()
256            .for_each(|file_path| match self.analyze_file(file_path) {
257                Ok(file_violations) => {
258                    if let Ok(mut v) = violations.lock() {
259                        v.extend(file_violations);
260                    }
261                }
262                Err(e) => {
263                    if let Ok(mut errs) = errors.lock() {
264                        errs.push((file_path.clone(), e));
265                    }
266                }
267            });
268
269        // Handle errors
270        let errors = Arc::try_unwrap(errors)
271            .map_err(|_| {
272                GuardianError::analysis(
273                    "parallel_analysis".to_string(),
274                    "Failed to unwrap errors Arc".to_string(),
275                )
276            })?
277            .into_inner()
278            .map_err(|_| {
279                GuardianError::analysis(
280                    "parallel_analysis".to_string(),
281                    "Failed to lock errors mutex".to_string(),
282                )
283            })?;
284
285        if !errors.is_empty() {
286            if options.fail_fast {
287                if let Some((file_path, error)) = errors.into_iter().next() {
288                    return Err(GuardianError::analysis(
289                        file_path.display().to_string(),
290                        error.to_string(),
291                    ));
292                }
293            } else {
294                // Log all errors
295                for (file_path, error) in errors {
296                    tracing::warn!("Failed to analyze {}: {}", file_path.display(), error);
297                }
298            }
299        }
300
301        let violations = Arc::try_unwrap(violations)
302            .map_err(|_| {
303                GuardianError::analysis(
304                    "parallel_analysis".to_string(),
305                    "Failed to unwrap violations Arc".to_string(),
306                )
307            })?
308            .into_inner()
309            .map_err(|_| {
310                GuardianError::analysis(
311                    "parallel_analysis".to_string(),
312                    "Failed to lock violations mutex".to_string(),
313                )
314            })?;
315        Ok(violations)
316    }
317
318    /// Analyze a directory tree and return a validation report
319    pub fn analyze_directory<P: AsRef<Path>>(
320        &self,
321        root: P,
322        options: &AnalysisOptions,
323    ) -> GuardianResult<ValidationReport> {
324        self.analyze_paths(&[root.as_ref()], options)
325    }
326
327    /// Get configuration fingerprint for cache validation
328    pub fn config_fingerprint(&self) -> String {
329        self.config.fingerprint()
330    }
331
332    /// Get statistics about the configured patterns
333    pub fn pattern_stats(&self) -> PatternStats {
334        let mut stats = PatternStats::default();
335
336        for category in self.config.patterns.values() {
337            if category.enabled {
338                stats.enabled_categories += 1;
339
340                for rule in &category.rules {
341                    if rule.enabled {
342                        stats.enabled_rules += 1;
343                        match rule.rule_type {
344                            crate::config::RuleType::Regex => stats.regex_patterns += 1,
345                            crate::config::RuleType::Ast => stats.ast_patterns += 1,
346                            crate::config::RuleType::Semantic => stats.semantic_patterns += 1,
347                            crate::config::RuleType::ImportAnalysis => stats.import_patterns += 1,
348                        }
349                    } else {
350                        stats.disabled_rules += 1;
351                    }
352                }
353            } else {
354                stats.disabled_categories += 1;
355                stats.disabled_rules += category.rules.len();
356            }
357        }
358
359        stats
360    }
361}
362
363/// Statistics about configured patterns
364#[derive(Debug, Default)]
365pub struct PatternStats {
366    pub enabled_categories: usize,
367    pub disabled_categories: usize,
368    pub enabled_rules: usize,
369    pub disabled_rules: usize,
370    pub regex_patterns: usize,
371    pub ast_patterns: usize,
372    pub semantic_patterns: usize,
373    pub import_patterns: usize,
374}
375
376impl PatternStats {
377    pub fn total_categories(&self) -> usize {
378        self.enabled_categories + self.disabled_categories
379    }
380
381    pub fn total_rules(&self) -> usize {
382        self.enabled_rules + self.disabled_rules
383    }
384}
385
386/// Trait for custom file analyzers
387pub trait FileAnalyzer {
388    /// Analyze a file and return violations
389    fn analyze(&self, file_path: &Path, content: &str) -> GuardianResult<Vec<Violation>>;
390
391    /// Check if this analyzer handles the given file type
392    fn handles_file(&self, file_path: &Path) -> bool;
393}
394
395/// Self-validation methods for analyzer functionality
396/// Following code quality principle: Components should be self-validating
397#[cfg(test)]
398impl Analyzer {
399    /// Validate analyzer creation and configuration
400    pub fn validate_initialization(&self) -> GuardianResult<()> {
401        let stats = self.pattern_stats();
402
403        if stats.enabled_rules == 0 {
404            return Err(GuardianError::config(
405                "Analyzer must have at least one enabled rule".to_string(),
406            ));
407        }
408
409        if stats.regex_patterns == 0 && stats.ast_patterns == 0 && stats.semantic_patterns == 0 {
410            return Err(GuardianError::config(
411                "Analyzer must have at least one pattern type enabled".to_string(),
412            ));
413        }
414
415        Ok(())
416    }
417
418    /// Validate single file analysis capabilities
419    pub fn validate_file_analysis(&self, test_content: &str) -> GuardianResult<()> {
420        use std::fs;
421        use tempfile::TempDir;
422
423        let temp_dir = TempDir::new().map_err(|e| {
424            GuardianError::analysis(
425                "validation".to_string(),
426                format!("Failed to create temp dir: {e}"),
427            )
428        })?;
429        let file_path = temp_dir.path().join("validation_test.rs");
430
431        fs::write(&file_path, test_content).map_err(|e| {
432            GuardianError::analysis(
433                "validation".to_string(),
434                format!("Failed to write test file: {e}"),
435            )
436        })?;
437
438        let violations = self.analyze_file(&file_path)?;
439
440        // Validate that violations are properly formatted
441        for violation in &violations {
442            if violation.rule_id.is_empty() {
443                return Err(GuardianError::analysis(
444                    "validation".to_string(),
445                    "Violation missing rule_id".to_string(),
446                ));
447            }
448            if violation.message.is_empty() {
449                return Err(GuardianError::analysis(
450                    "validation".to_string(),
451                    "Violation missing message".to_string(),
452                ));
453            }
454        }
455
456        Ok(())
457    }
458
459    /// Validate directory analysis capabilities
460    pub fn validate_directory_analysis(&self) -> GuardianResult<()> {
461        use std::fs;
462        use tempfile::TempDir;
463
464        let temp_dir = TempDir::new().map_err(|e| {
465            GuardianError::analysis(
466                "validation".to_string(),
467                format!("Failed to create temp dir: {e}"),
468            )
469        })?;
470        let root = temp_dir.path();
471
472        // Create realistic directory structure
473        fs::create_dir_all(root.join("src")).map_err(|e| {
474            GuardianError::analysis(
475                "validation".to_string(),
476                format!("Failed to create src dir: {e}"),
477            )
478        })?;
479        fs::create_dir_all(root.join("target/debug")).map_err(|e| {
480            GuardianError::analysis(
481                "validation".to_string(),
482                format!("Failed to create target dir: {e}"),
483            )
484        })?;
485
486        // Create test files with known patterns
487        fs::write(root.join("src/lib.rs"), "//! Test module\n//!\n//! Code Quality Principle: Self-validation\npub fn test() { /* implementation */ }")
488            .map_err(|e| GuardianError::analysis("validation".to_string(), format!("Failed to write lib.rs: {e}")))?;
489        fs::write(root.join("src/main.rs"), "//! Main module\n//!\n//! Code Quality Principle: Entry point\nfn main() { eprintln!(\"Application starting\"); }")
490            .map_err(|e| GuardianError::analysis("validation".to_string(), format!("Failed to write main.rs: {e}")))?;
491        fs::write(root.join("target/debug/app"), "binary content").map_err(|e| {
492            GuardianError::analysis(
493                "validation".to_string(),
494                format!("Failed to write binary: {e}"),
495            )
496        })?;
497
498        let report = self.analyze_directory(root, &AnalysisOptions::default())?;
499
500        // Validate report structure
501        if report.summary.total_files == 0 {
502            return Err(GuardianError::analysis(
503                "validation".to_string(),
504                "Directory analysis should find at least one file".to_string(),
505            ));
506        }
507
508        // Validate that target directory is excluded
509        let target_violations = report
510            .violations
511            .iter()
512            .filter(|v| v.file_path.to_string_lossy().contains("target/"))
513            .count();
514
515        if target_violations > 0 {
516            return Err(GuardianError::analysis(
517                "validation".to_string(),
518                "Target directory should be excluded from analysis".to_string(),
519            ));
520        }
521
522        Ok(())
523    }
524
525    /// Validate analysis options functionality
526    pub fn validate_analysis_options(&self) -> GuardianResult<()> {
527        use std::fs;
528        use tempfile::TempDir;
529
530        let temp_dir = TempDir::new().map_err(|e| {
531            GuardianError::analysis(
532                "validation".to_string(),
533                format!("Failed to create temp dir: {e}"),
534            )
535        })?;
536        let root = temp_dir.path();
537
538        fs::create_dir_all(root.join("src")).map_err(|e| {
539            GuardianError::analysis(
540                "validation".to_string(),
541                format!("Failed to create src dir: {e}"),
542            )
543        })?;
544        fs::write(
545            root.join("src/lib.rs"),
546            "//! Test lib\n//!\n//! Code Quality Principle: Testing\npub fn lib() {}",
547        )
548        .map_err(|e| {
549            GuardianError::analysis(
550                "validation".to_string(),
551                format!("Failed to write lib.rs: {e}"),
552            )
553        })?;
554        fs::write(
555            root.join("src/main.rs"),
556            "//! Test main\n//!\n//! Code Quality Principle: Entry\nfn main() {}",
557        )
558        .map_err(|e| {
559            GuardianError::analysis(
560                "validation".to_string(),
561                format!("Failed to write main.rs: {e}"),
562            )
563        })?;
564
565        // Test max_files limitation
566        let options = AnalysisOptions {
567            max_files: Some(1),
568            ..Default::default()
569        };
570
571        let report = self.analyze_directory(root, &options)?;
572
573        if report.summary.total_files != 1 {
574            return Err(GuardianError::analysis(
575                "validation".to_string(),
576                format!(
577                    "Expected 1 file with max_files=1, got {}",
578                    report.summary.total_files
579                ),
580            ));
581        }
582
583        Ok(())
584    }
585}
586
587/// Comprehensive validation entry point for the analyzer
588/// This replaces traditional unit tests with domain self-validation
589#[cfg(test)]
590pub fn validate_analyzer_domain() -> GuardianResult<()> {
591    let analyzer = Analyzer::with_defaults()?;
592
593    // Validate core functionality
594    analyzer.validate_initialization()?;
595    analyzer.validate_file_analysis(
596        "//! Test\n//!\n//! Code Quality Principle: Validation\nfn test() {}",
597    )?;
598    analyzer.validate_directory_analysis()?;
599    analyzer.validate_analysis_options()?;
600
601    // Validate pattern statistics
602    let stats = analyzer.pattern_stats();
603    if stats.total_rules() == 0 {
604        return Err(GuardianError::config(
605            "Pattern statistics validation failed: no rules configured".to_string(),
606        ));
607    }
608
609    if stats.total_categories() == 0 {
610        return Err(GuardianError::config(
611            "Pattern statistics validation failed: no categories configured".to_string(),
612        ));
613    }
614
615    Ok(())
616}
rust_guardian/analyzer/mod.rs

rust_guardian/analyzer/
mod.rs