1use crate::{
4 cache::FileCache,
5 config::ScanConfig,
6 error::{RaxitError, Result},
7 extractors,
8 schema::{ScanResult, TrustBoundary},
9};
10use rayon::prelude::*;
11use std::path::{Path, PathBuf};
12use walkdir::WalkDir;
13
14pub struct Scanner {
16 config: ScanConfig,
17 cache: FileCache,
18 cache_path: PathBuf,
19}
20
21impl Scanner {
22 pub fn new(config: ScanConfig) -> Result<Self> {
24 if !config.path.exists() {
26 return Err(RaxitError::InvalidPath(config.path.clone()));
27 }
28
29 let cache_path = config.path.join(&config.cache_dir).join("cache.json");
31
32 let cache = if config.incremental {
34 FileCache::load(&cache_path)?
35 } else {
36 FileCache::new()
37 };
38
39 Ok(Self {
40 config,
41 cache,
42 cache_path,
43 })
44 }
45
46 pub fn discover_files(&mut self) -> Result<(Vec<PathBuf>, usize)> {
49 let mut all_files = Vec::new();
50 let mut files_to_scan = Vec::new();
51 let mut files_skipped = 0;
52
53 let walker = WalkDir::new(&self.config.path)
54 .follow_links(false)
55 .into_iter()
56 .filter_entry(|e| !self.should_exclude(e.path()));
57
58 for entry in walker {
59 let entry = entry?;
60 let path = entry.path();
61
62 if path.is_file() && self.should_include(path) {
63 all_files.push(path.to_path_buf());
64
65 if self.config.incremental {
67 match self.cache.has_changed(path) {
68 Ok(true) => {
69 files_to_scan.push(path.to_path_buf());
70 }
71 Ok(false) => {
72 files_skipped += 1;
73 tracing::debug!("Skipping unchanged file: {}", path.display());
74 }
75 Err(e) => {
76 tracing::warn!("Failed to check cache for {}: {}", path.display(), e);
77 files_to_scan.push(path.to_path_buf());
79 }
80 }
81 } else {
82 files_to_scan.push(path.to_path_buf());
83 }
84 }
85 }
86
87 tracing::debug!(
88 "Discovered {} files ({} to scan, {} skipped)",
89 all_files.len(),
90 files_to_scan.len(),
91 files_skipped
92 );
93
94 Ok((files_to_scan, files_skipped))
95 }
96
97 pub fn detect_frameworks(&self, files: &[PathBuf]) -> Result<Vec<String>> {
99 let mut frameworks = std::collections::HashSet::new();
100
101 for file in files {
103 if let Ok(content) = std::fs::read_to_string(file) {
104 if content.contains("from pydantic_ai import")
105 || content.contains("import pydantic_ai")
106 {
107 frameworks.insert("pydantic-ai".to_string());
108 }
109 if content.contains("from langgraph import") || content.contains("import langgraph")
110 {
111 frameworks.insert("langgraph".to_string());
112 }
113 if content.contains("from crewai import") || content.contains("import crewai") {
114 frameworks.insert("crewai".to_string());
115 }
116 }
117 }
118
119 Ok(frameworks.into_iter().collect())
120 }
121
122 pub fn extract_all(
124 &mut self,
125 files: &[PathBuf],
126 frameworks: &[String],
127 files_skipped: usize,
128 ) -> Result<ScanResult> {
129 let mut result = ScanResult::new();
130
131 result.manifest.subject.name = self.detect_project_name();
133 result.manifest.subject.version = self.detect_project_version();
134 result.manifest.subject.source = self.detect_project_source();
135
136 result.manifest.files = files
138 .iter()
139 .map(|p| p.to_string_lossy().to_string())
140 .collect();
141
142 result.manifest.scan_config.exclude_patterns = self.config.exclude.clone();
144 result.manifest.scan_config.frameworks_detected = frameworks.to_vec();
145 result.manifest.scan_config.parallel_workers = if self.config.parallel {
146 self.config.max_threads.unwrap_or_else(num_cpus::get)
147 } else {
148 1
149 };
150 result.manifest.scan_config.incremental = self.config.incremental;
151 result.manifest.scan_config.files_scanned = files.len();
152 result.manifest.scan_config.files_skipped = files_skipped;
153
154 let primary_framework = frameworks.first().map(|s| s.as_str()).unwrap_or("unknown");
156
157 let extracted_assets: Vec<_> = if self.config.parallel {
159 files
160 .par_iter()
161 .filter_map(|file| extractors::extract_from_file(file, primary_framework).ok())
162 .collect()
163 } else {
164 files
165 .iter()
166 .filter_map(|file| extractors::extract_from_file(file, primary_framework).ok())
167 .collect()
168 };
169
170 for assets in extracted_assets {
172 result.agents.extend(assets.agents);
173 result.tools.extend(assets.tools);
174 result.models.extend(assets.models);
175 result.memory.extend(assets.memory);
176 }
177
178 if self.config.incremental {
180 for file in files {
181 if let Err(e) = self.cache.update(file) {
182 tracing::warn!("Failed to update cache for {}: {}", file.display(), e);
183 }
184 }
185
186 if let Err(e) = self.cache.save(&self.cache_path) {
188 tracing::warn!("Failed to save cache: {}", e);
189 }
190 }
191
192 tracing::info!(
193 "Extracted {} agents, {} tools, {} models from {} files",
194 result.agents.len(),
195 result.tools.len(),
196 result.models.len(),
197 files.len()
198 );
199
200 Ok(result)
201 }
202
203 fn detect_project_name(&self) -> String {
205 let pyproject_path = self.config.path.join("pyproject.toml");
207 if let Ok(content) = std::fs::read_to_string(&pyproject_path) {
208 for line in content.lines() {
210 if line.trim().starts_with("name") {
211 if let Some(name) = line.split('=').nth(1) {
212 return name.trim().trim_matches('"').to_string();
213 }
214 }
215 }
216 }
217
218 self.config
220 .path
221 .file_name()
222 .and_then(|n| n.to_str())
223 .unwrap_or("unknown")
224 .to_string()
225 }
226
227 fn detect_project_version(&self) -> Option<String> {
229 let pyproject_path = self.config.path.join("pyproject.toml");
230 if let Ok(content) = std::fs::read_to_string(&pyproject_path) {
231 for line in content.lines() {
232 if line.trim().starts_with("version") {
233 if let Some(version) = line.split('=').nth(1) {
234 return Some(version.trim().trim_matches('"').to_string());
235 }
236 }
237 }
238 }
239 None
240 }
241
242 fn detect_project_source(&self) -> Option<String> {
244 let git_config = self.config.path.join(".git/config");
245 if let Ok(content) = std::fs::read_to_string(&git_config) {
246 for line in content.lines() {
247 if line.trim().starts_with("url") {
248 if let Some(url) = line.split('=').nth(1) {
249 return Some(url.trim().to_string());
250 }
251 }
252 }
253 }
254 None
255 }
256
257 pub fn build_call_graph(&self, results: &ScanResult) -> Result<CallGraph> {
259 crate::analyzers::build_call_graph(results)
260 }
261
262 pub fn analyze_trust_boundaries(&self, results: &ScanResult) -> Result<Vec<TrustBoundary>> {
264 crate::analyzers::analyze_trust_boundaries(results)
265 }
266
267 pub fn generate_schema(
269 &self,
270 results: &ScanResult,
271 boundaries: &[TrustBoundary],
272 ) -> Result<ScanResult> {
273 let mut schema = results.clone();
274 schema.trust_boundaries = boundaries.to_vec();
275 Ok(schema)
276 }
277
278 fn should_include(&self, path: &Path) -> bool {
280 let path_str = path.to_string_lossy();
281
282 self.config.include.iter().any(|pattern| {
284 glob::Pattern::new(pattern)
285 .map(|p| p.matches(&path_str))
286 .unwrap_or(false)
287 })
288 }
289
290 fn should_exclude(&self, path: &Path) -> bool {
292 let path_str = path.to_string_lossy();
293
294 self.config.exclude.iter().any(|pattern| {
296 glob::Pattern::new(pattern)
297 .map(|p| p.matches(&path_str))
298 .unwrap_or(false)
299 })
300 }
301}
302
303pub struct CallGraph {
305 nodes: Vec<String>,
306}
307
308impl CallGraph {
309 pub fn new() -> Self {
310 Self { nodes: Vec::new() }
311 }
312
313 pub fn nodes(&self) -> &[String] {
314 &self.nodes
315 }
316}
317
318impl Default for CallGraph {
319 fn default() -> Self {
320 Self::new()
321 }
322}
323
324#[cfg(test)]
325mod tests {
326 use super::*;
327
328 #[test]
329 fn test_scanner_creation() {
330 let config = ScanConfig::default();
331 let scanner = Scanner::new(config);
332 assert!(scanner.is_ok());
333 }
334
335 #[test]
336 fn test_invalid_path() {
337 let config = ScanConfig::new("/nonexistent/path");
338 let scanner = Scanner::new(config);
339 assert!(scanner.is_err());
340 }
341}