use crate::{
cache::FileCache,
config::ScanConfig,
error::{RaxitError, Result},
extractors,
schema::{ScanResult, TrustBoundary},
};
use rayon::prelude::*;
use std::path::{Path, PathBuf};
use walkdir::WalkDir;
pub struct Scanner {
config: ScanConfig,
cache: FileCache,
cache_path: PathBuf,
}
impl Scanner {
pub fn new(config: ScanConfig) -> Result<Self> {
if !config.path.exists() {
return Err(RaxitError::InvalidPath(config.path.clone()));
}
let cache_path = config.path.join(&config.cache_dir).join("cache.json");
let cache = if config.incremental {
FileCache::load(&cache_path)?
} else {
FileCache::new()
};
Ok(Self {
config,
cache,
cache_path,
})
}
pub fn discover_files(&mut self) -> Result<(Vec<PathBuf>, usize)> {
let mut all_files = Vec::new();
let mut files_to_scan = Vec::new();
let mut files_skipped = 0;
let walker = WalkDir::new(&self.config.path)
.follow_links(false)
.into_iter()
.filter_entry(|e| !self.should_exclude(e.path()));
for entry in walker {
let entry = entry?;
let path = entry.path();
if path.is_file() && self.should_include(path) {
all_files.push(path.to_path_buf());
if self.config.incremental {
match self.cache.has_changed(path) {
Ok(true) => {
files_to_scan.push(path.to_path_buf());
}
Ok(false) => {
files_skipped += 1;
tracing::debug!("Skipping unchanged file: {}", path.display());
}
Err(e) => {
tracing::warn!("Failed to check cache for {}: {}", path.display(), e);
files_to_scan.push(path.to_path_buf());
}
}
} else {
files_to_scan.push(path.to_path_buf());
}
}
}
tracing::debug!(
"Discovered {} files ({} to scan, {} skipped)",
all_files.len(),
files_to_scan.len(),
files_skipped
);
Ok((files_to_scan, files_skipped))
}
pub fn detect_frameworks(&self, files: &[PathBuf]) -> Result<Vec<String>> {
let mut frameworks = std::collections::HashSet::new();
for file in files {
if let Ok(content) = std::fs::read_to_string(file) {
if content.contains("from pydantic_ai import")
|| content.contains("import pydantic_ai")
{
frameworks.insert("pydantic-ai".to_string());
}
if content.contains("from langgraph import") || content.contains("import langgraph")
{
frameworks.insert("langgraph".to_string());
}
if content.contains("from crewai import") || content.contains("import crewai") {
frameworks.insert("crewai".to_string());
}
}
}
Ok(frameworks.into_iter().collect())
}
pub fn extract_all(
&mut self,
files: &[PathBuf],
frameworks: &[String],
files_skipped: usize,
) -> Result<ScanResult> {
let mut result = ScanResult::new();
result.manifest.subject.name = self.detect_project_name();
result.manifest.subject.version = self.detect_project_version();
result.manifest.subject.source = self.detect_project_source();
result.manifest.files = files
.iter()
.map(|p| p.to_string_lossy().to_string())
.collect();
result.manifest.scan_config.exclude_patterns = self.config.exclude.clone();
result.manifest.scan_config.frameworks_detected = frameworks.to_vec();
result.manifest.scan_config.parallel_workers = if self.config.parallel {
self.config.max_threads.unwrap_or_else(num_cpus::get)
} else {
1
};
result.manifest.scan_config.incremental = self.config.incremental;
result.manifest.scan_config.files_scanned = files.len();
result.manifest.scan_config.files_skipped = files_skipped;
let primary_framework = frameworks.first().map(|s| s.as_str()).unwrap_or("unknown");
let extracted_assets: Vec<_> = if self.config.parallel {
files
.par_iter()
.filter_map(|file| extractors::extract_from_file(file, primary_framework).ok())
.collect()
} else {
files
.iter()
.filter_map(|file| extractors::extract_from_file(file, primary_framework).ok())
.collect()
};
for assets in extracted_assets {
result.agents.extend(assets.agents);
result.tools.extend(assets.tools);
result.models.extend(assets.models);
result.memory.extend(assets.memory);
}
if self.config.incremental {
for file in files {
if let Err(e) = self.cache.update(file) {
tracing::warn!("Failed to update cache for {}: {}", file.display(), e);
}
}
if let Err(e) = self.cache.save(&self.cache_path) {
tracing::warn!("Failed to save cache: {}", e);
}
}
tracing::info!(
"Extracted {} agents, {} tools, {} models from {} files",
result.agents.len(),
result.tools.len(),
result.models.len(),
files.len()
);
Ok(result)
}
fn detect_project_name(&self) -> String {
let pyproject_path = self.config.path.join("pyproject.toml");
if let Ok(content) = std::fs::read_to_string(&pyproject_path) {
for line in content.lines() {
if line.trim().starts_with("name") {
if let Some(name) = line.split('=').nth(1) {
return name.trim().trim_matches('"').to_string();
}
}
}
}
self.config
.path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown")
.to_string()
}
fn detect_project_version(&self) -> Option<String> {
let pyproject_path = self.config.path.join("pyproject.toml");
if let Ok(content) = std::fs::read_to_string(&pyproject_path) {
for line in content.lines() {
if line.trim().starts_with("version") {
if let Some(version) = line.split('=').nth(1) {
return Some(version.trim().trim_matches('"').to_string());
}
}
}
}
None
}
fn detect_project_source(&self) -> Option<String> {
let git_config = self.config.path.join(".git/config");
if let Ok(content) = std::fs::read_to_string(&git_config) {
for line in content.lines() {
if line.trim().starts_with("url") {
if let Some(url) = line.split('=').nth(1) {
return Some(url.trim().to_string());
}
}
}
}
None
}
pub fn build_call_graph(&self, results: &ScanResult) -> Result<CallGraph> {
crate::analyzers::build_call_graph(results)
}
pub fn analyze_trust_boundaries(&self, results: &ScanResult) -> Result<Vec<TrustBoundary>> {
crate::analyzers::analyze_trust_boundaries(results)
}
pub fn generate_schema(
&self,
results: &ScanResult,
boundaries: &[TrustBoundary],
) -> Result<ScanResult> {
let mut schema = results.clone();
schema.trust_boundaries = boundaries.to_vec();
Ok(schema)
}
fn should_include(&self, path: &Path) -> bool {
let path_str = path.to_string_lossy();
self.config.include.iter().any(|pattern| {
glob::Pattern::new(pattern)
.map(|p| p.matches(&path_str))
.unwrap_or(false)
})
}
fn should_exclude(&self, path: &Path) -> bool {
let path_str = path.to_string_lossy();
self.config.exclude.iter().any(|pattern| {
glob::Pattern::new(pattern)
.map(|p| p.matches(&path_str))
.unwrap_or(false)
})
}
}
pub struct CallGraph {
nodes: Vec<String>,
}
impl CallGraph {
pub fn new() -> Self {
Self { nodes: Vec::new() }
}
pub fn nodes(&self) -> &[String] {
&self.nodes
}
}
impl Default for CallGraph {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_scanner_creation() {
let config = ScanConfig::default();
let scanner = Scanner::new(config);
assert!(scanner.is_ok());
}
#[test]
fn test_invalid_path() {
let config = ScanConfig::new("/nonexistent/path");
let scanner = Scanner::new(config);
assert!(scanner.is_err());
}
}