raxit-core 0.1.1

Core security scanning engine for AI agent applications
Documentation
//! Scanner implementation - orchestrates the scanning pipeline

use crate::{
    cache::FileCache,
    config::ScanConfig,
    error::{RaxitError, Result},
    extractors,
    schema::{ScanResult, TrustBoundary},
};
use rayon::prelude::*;
use std::path::{Path, PathBuf};
use walkdir::WalkDir;

/// Main scanner orchestrator
pub struct Scanner {
    config: ScanConfig,
    cache: FileCache,
    cache_path: PathBuf,
}

impl Scanner {
    /// Create a new scanner with the given configuration
    pub fn new(config: ScanConfig) -> Result<Self> {
        // Validate path exists
        if !config.path.exists() {
            return Err(RaxitError::InvalidPath(config.path.clone()));
        }

        // Set up cache path
        let cache_path = config.path.join(&config.cache_dir).join("cache.json");

        // Load cache if incremental mode is enabled
        let cache = if config.incremental {
            FileCache::load(&cache_path)?
        } else {
            FileCache::new()
        };

        Ok(Self {
            config,
            cache,
            cache_path,
        })
    }

    /// Discover files to scan based on include/exclude patterns
    /// Returns (files_to_scan, files_skipped)
    pub fn discover_files(&mut self) -> Result<(Vec<PathBuf>, usize)> {
        let mut all_files = Vec::new();
        let mut files_to_scan = Vec::new();
        let mut files_skipped = 0;

        let walker = WalkDir::new(&self.config.path)
            .follow_links(false)
            .into_iter()
            .filter_entry(|e| !self.should_exclude(e.path()));

        for entry in walker {
            let entry = entry?;
            let path = entry.path();

            if path.is_file() && self.should_include(path) {
                all_files.push(path.to_path_buf());

                // Check if file has changed (only in incremental mode)
                if self.config.incremental {
                    match self.cache.has_changed(path) {
                        Ok(true) => {
                            files_to_scan.push(path.to_path_buf());
                        }
                        Ok(false) => {
                            files_skipped += 1;
                            tracing::debug!("Skipping unchanged file: {}", path.display());
                        }
                        Err(e) => {
                            tracing::warn!("Failed to check cache for {}: {}", path.display(), e);
                            // On error, scan the file to be safe
                            files_to_scan.push(path.to_path_buf());
                        }
                    }
                } else {
                    files_to_scan.push(path.to_path_buf());
                }
            }
        }

        tracing::debug!(
            "Discovered {} files ({} to scan, {} skipped)",
            all_files.len(),
            files_to_scan.len(),
            files_skipped
        );

        Ok((files_to_scan, files_skipped))
    }

    /// Detect AI agent frameworks in the codebase
    pub fn detect_frameworks(&self, files: &[PathBuf]) -> Result<Vec<String>> {
        let mut frameworks = std::collections::HashSet::new();

        // Detect frameworks by looking for import patterns
        for file in files {
            if let Ok(content) = std::fs::read_to_string(file) {
                if content.contains("from pydantic_ai import")
                    || content.contains("import pydantic_ai")
                {
                    frameworks.insert("pydantic-ai".to_string());
                }
                if content.contains("from langgraph import") || content.contains("import langgraph")
                {
                    frameworks.insert("langgraph".to_string());
                }
                if content.contains("from crewai import") || content.contains("import crewai") {
                    frameworks.insert("crewai".to_string());
                }
            }
        }

        Ok(frameworks.into_iter().collect())
    }

    /// Extract assets from all files using appropriate extractors
    pub fn extract_all(
        &mut self,
        files: &[PathBuf],
        frameworks: &[String],
        files_skipped: usize,
    ) -> Result<ScanResult> {
        let mut result = ScanResult::new();

        // Populate manifest with scan metadata
        result.manifest.subject.name = self.detect_project_name();
        result.manifest.subject.version = self.detect_project_version();
        result.manifest.subject.source = self.detect_project_source();

        // Add scanned files to manifest
        result.manifest.files = files
            .iter()
            .map(|p| p.to_string_lossy().to_string())
            .collect();

        // Update scan configuration metadata
        result.manifest.scan_config.exclude_patterns = self.config.exclude.clone();
        result.manifest.scan_config.frameworks_detected = frameworks.to_vec();
        result.manifest.scan_config.parallel_workers = if self.config.parallel {
            self.config.max_threads.unwrap_or_else(num_cpus::get)
        } else {
            1
        };
        result.manifest.scan_config.incremental = self.config.incremental;
        result.manifest.scan_config.files_scanned = files.len();
        result.manifest.scan_config.files_skipped = files_skipped;

        // Determine primary framework (use first detected)
        let primary_framework = frameworks.first().map(|s| s.as_str()).unwrap_or("unknown");

        // Process files in parallel or sequentially based on config
        let extracted_assets: Vec<_> = if self.config.parallel {
            files
                .par_iter()
                .filter_map(|file| extractors::extract_from_file(file, primary_framework).ok())
                .collect()
        } else {
            files
                .iter()
                .filter_map(|file| extractors::extract_from_file(file, primary_framework).ok())
                .collect()
        };

        // Combine all extracted assets
        for assets in extracted_assets {
            result.agents.extend(assets.agents);
            result.tools.extend(assets.tools);
            result.models.extend(assets.models);
            result.memory.extend(assets.memory);
        }

        // Update cache for scanned files
        if self.config.incremental {
            for file in files {
                if let Err(e) = self.cache.update(file) {
                    tracing::warn!("Failed to update cache for {}: {}", file.display(), e);
                }
            }

            // Save cache
            if let Err(e) = self.cache.save(&self.cache_path) {
                tracing::warn!("Failed to save cache: {}", e);
            }
        }

        tracing::info!(
            "Extracted {} agents, {} tools, {} models from {} files",
            result.agents.len(),
            result.tools.len(),
            result.models.len(),
            files.len()
        );

        Ok(result)
    }

    /// Detect project name from directory or pyproject.toml
    fn detect_project_name(&self) -> String {
        // Try to read pyproject.toml
        let pyproject_path = self.config.path.join("pyproject.toml");
        if let Ok(content) = std::fs::read_to_string(&pyproject_path) {
            // Simple TOML parsing for project name
            for line in content.lines() {
                if line.trim().starts_with("name") {
                    if let Some(name) = line.split('=').nth(1) {
                        return name.trim().trim_matches('"').to_string();
                    }
                }
            }
        }

        // Fallback to directory name
        self.config
            .path
            .file_name()
            .and_then(|n| n.to_str())
            .unwrap_or("unknown")
            .to_string()
    }

    /// Detect project version from pyproject.toml
    fn detect_project_version(&self) -> Option<String> {
        let pyproject_path = self.config.path.join("pyproject.toml");
        if let Ok(content) = std::fs::read_to_string(&pyproject_path) {
            for line in content.lines() {
                if line.trim().starts_with("version") {
                    if let Some(version) = line.split('=').nth(1) {
                        return Some(version.trim().trim_matches('"').to_string());
                    }
                }
            }
        }
        None
    }

    /// Detect project source from git config
    fn detect_project_source(&self) -> Option<String> {
        let git_config = self.config.path.join(".git/config");
        if let Ok(content) = std::fs::read_to_string(&git_config) {
            for line in content.lines() {
                if line.trim().starts_with("url") {
                    if let Some(url) = line.split('=').nth(1) {
                        return Some(url.trim().to_string());
                    }
                }
            }
        }
        None
    }

    /// Build call graph from extracted assets
    pub fn build_call_graph(&self, results: &ScanResult) -> Result<CallGraph> {
        crate::analyzers::build_call_graph(results)
    }

    /// Analyze trust boundaries using Meta's Rule of Two
    pub fn analyze_trust_boundaries(&self, results: &ScanResult) -> Result<Vec<TrustBoundary>> {
        crate::analyzers::analyze_trust_boundaries(results)
    }

    /// Generate final schema with all analysis results
    pub fn generate_schema(
        &self,
        results: &ScanResult,
        boundaries: &[TrustBoundary],
    ) -> Result<ScanResult> {
        let mut schema = results.clone();
        schema.trust_boundaries = boundaries.to_vec();
        Ok(schema)
    }

    /// Check if path should be included based on patterns
    fn should_include(&self, path: &Path) -> bool {
        let path_str = path.to_string_lossy();

        // Check include patterns
        self.config.include.iter().any(|pattern| {
            glob::Pattern::new(pattern)
                .map(|p| p.matches(&path_str))
                .unwrap_or(false)
        })
    }

    /// Check if path should be excluded based on patterns
    fn should_exclude(&self, path: &Path) -> bool {
        let path_str = path.to_string_lossy();

        // Check exclude patterns
        self.config.exclude.iter().any(|pattern| {
            glob::Pattern::new(pattern)
                .map(|p| p.matches(&path_str))
                .unwrap_or(false)
        })
    }
}

/// Call graph representation (placeholder)
pub struct CallGraph {
    nodes: Vec<String>,
}

impl CallGraph {
    pub fn new() -> Self {
        Self { nodes: Vec::new() }
    }

    pub fn nodes(&self) -> &[String] {
        &self.nodes
    }
}

impl Default for CallGraph {
    fn default() -> Self {
        Self::new()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_scanner_creation() {
        let config = ScanConfig::default();
        let scanner = Scanner::new(config);
        assert!(scanner.is_ok());
    }

    #[test]
    fn test_invalid_path() {
        let config = ScanConfig::new("/nonexistent/path");
        let scanner = Scanner::new(config);
        assert!(scanner.is_err());
    }
}