vsec 0.0.1

Detect secrets and in Rust codebases
Documentation
// src/scanner/mod.rs

pub mod analyzer;
pub mod indexer;

use std::path::PathBuf;

use crate::models::Finding;
use crate::registry::SuspectRegistry;
use crate::scoring::{ScoringConfig, ScoringEngine};

pub use analyzer::Analyzer;
pub use indexer::Indexer;

/// Configuration for the scanner
#[derive(Debug, Clone, Default)]
pub struct ScannerConfig {
    /// Scoring configuration
    pub scoring: ScoringConfig,

    /// Whether to skip Phase 1 (indexing)
    pub skip_indexing: bool,

    /// Maximum file size to scan (in bytes)
    pub max_file_size: Option<usize>,
}

/// Main scanner that orchestrates the two-pass architecture
pub struct Scanner {
    engine: ScoringEngine,
    config: ScannerConfig,
}

impl Scanner {
    pub fn new(config: ScannerConfig) -> Result<Self, Box<dyn std::error::Error>> {
        Ok(Self {
            engine: ScoringEngine::new(config.scoring.clone())?,
            config,
        })
    }

    /// Scan a codebase using the two-pass architecture
    pub fn scan(&self, files: Vec<PathBuf>) -> ScanResult {
        let registry = if self.config.skip_indexing {
            SuspectRegistry::new()
        } else {
            // PHASE 1: Index all constant definitions
            tracing::info!("Phase 1: Indexing {} files...", files.len());
            let indexer = Indexer::new();
            indexer.index_files(&files)
        };

        let stats = registry.stats();
        tracing::info!(
            "Indexed {} constants ({} public, {} suspicious)",
            stats.total,
            stats.public,
            stats.suspicious
        );

        // PHASE 2: Analyze for secret usage
        tracing::info!("Phase 2: Analyzing {} files...", files.len());
        let analyzer = Analyzer::new(&registry, &self.engine);
        let findings = analyzer.scan_files(&files);

        tracing::info!("Found {} potential secrets", findings.len());

        ScanResult {
            findings,
            files_scanned: files.len(),
            constants_indexed: stats.total,
            registry_stats: stats,
        }
    }

    /// Get a reference to the scoring engine
    pub fn engine(&self) -> &ScoringEngine {
        &self.engine
    }

    /// Scan content from a string (for git history scanning)
    /// Returns findings for the given content
    pub fn scan_content(&self, path: &std::path::Path, content: &str) -> Vec<Finding> {
        // Parse the content directly with syn
        let ast = match syn::parse_file(content) {
            Ok(a) => a,
            Err(_) => return Vec::new(),
        };

        // Create an empty registry (no cross-file constant resolution for historical files)
        let registry = SuspectRegistry::new();

        // Create analyzer and scan this single file's AST
        let analyzer = Analyzer::new(&registry, &self.engine);
        analyzer.scan_ast(path, &ast)
    }
}

/// Result of a scan operation
#[derive(Debug)]
pub struct ScanResult {
    /// All findings from the scan
    pub findings: Vec<Finding>,

    /// Number of files scanned
    pub files_scanned: usize,

    /// Number of constants indexed
    pub constants_indexed: usize,

    /// Registry statistics
    pub registry_stats: crate::registry::RegistryStats,
}

impl ScanResult {
    /// Get findings sorted by severity (highest first)
    pub fn sorted_by_severity(&self) -> Vec<&Finding> {
        let mut findings: Vec<_> = self.findings.iter().collect();
        findings.sort_by(|a, b| b.score.total.cmp(&a.score.total));
        findings
    }

    /// Get findings above a certain score threshold
    pub fn above_threshold(&self, threshold: i32) -> Vec<&Finding> {
        self.findings
            .iter()
            .filter(|f| f.score.total >= threshold)
            .collect()
    }

    /// Check if any critical findings were found
    pub fn has_critical(&self) -> bool {
        self.findings.iter().any(|f| f.score.total >= 100)
    }

    /// Check if any high severity findings were found
    pub fn has_high(&self) -> bool {
        self.findings.iter().any(|f| f.score.total >= 70)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_scan_result_sorting() {
        use crate::models::{FindingId, Score, SourceLocation, SuspectValue};
        use std::path::PathBuf;

        let findings = vec![
            Finding {
                id: FindingId::new("a"),
                suspect: SuspectValue::Constant {
                    name: "LOW".into(),
                    value: "x".into(),
                    type_annotation: None,
                },
                location: SourceLocation::new(PathBuf::from("a.rs"), 1, 0),
                usage: None,
                context: crate::models::AnalysisContext::new(PathBuf::from("a.rs")),
                score: Score::from_total(30),
                explanation: String::new(),
                remediation: None,
                metadata: std::collections::HashMap::new(),
            },
            Finding {
                id: FindingId::new("b"),
                suspect: SuspectValue::Constant {
                    name: "HIGH".into(),
                    value: "y".into(),
                    type_annotation: None,
                },
                location: SourceLocation::new(PathBuf::from("b.rs"), 1, 0),
                usage: None,
                context: crate::models::AnalysisContext::new(PathBuf::from("b.rs")),
                score: Score::from_total(90),
                explanation: String::new(),
                remediation: None,
                metadata: std::collections::HashMap::new(),
            },
        ];

        let result = ScanResult {
            findings,
            files_scanned: 2,
            constants_indexed: 2,
            registry_stats: Default::default(),
        };

        let sorted = result.sorted_by_severity();
        assert_eq!(sorted[0].suspect.name(), Some("HIGH"));
        assert_eq!(sorted[1].suspect.name(), Some("LOW"));
    }
}