1use std::path::{Path, PathBuf};
2
3use crate::engine::{self, AnalysisContext};
4use crate::extract::ScanContext;
5use crate::tokenize::ShellType;
6use crate::verdict::{Finding, Severity};
7
8pub struct ScanConfig {
10 pub path: PathBuf,
12 pub recursive: bool,
14 pub fail_on: Severity,
16 pub ignore_patterns: Vec<String>,
18 pub max_files: Option<usize>,
20}
21
22pub struct ScanResult {
24 pub file_results: Vec<FileScanResult>,
25 pub scanned_count: usize,
26 pub skipped_count: usize,
27 pub truncated: bool,
28 pub truncation_reason: Option<String>,
29}
30
31pub struct FileScanResult {
33 pub path: PathBuf,
34 pub findings: Vec<Finding>,
35 pub is_config_file: bool,
36}
37
38const PRIORITY_BASENAMES: &[&str] = &[
43 ".cursorrules",
44 ".cursorignore",
45 ".clinerules",
46 ".windsurfrules",
47 "CLAUDE.md",
48 "AGENTS.md",
49 "copilot-instructions.md",
50 "mcp.json",
51 ".mcp.json",
52 "mcp_settings.json",
53 "devcontainer.json",
54];
55
56const PRIORITY_PARENT_DIRS: &[&str] = &[
58 ".claude",
59 ".vscode",
60 ".cursor",
61 ".windsurf",
62 ".cline",
63 ".continue",
64 ".github",
65 ".devcontainer",
66 ".roo",
67];
68
69pub fn scan(config: &ScanConfig) -> ScanResult {
74 let mut files = collect_files(&config.path, config.recursive, &config.ignore_patterns);
75
76 files.sort_by(|a, b| {
78 let a_priority = is_priority_file(a);
79 let b_priority = is_priority_file(b);
80 match (a_priority, b_priority) {
81 (true, false) => std::cmp::Ordering::Less,
82 (false, true) => std::cmp::Ordering::Greater,
83 _ => a.cmp(b),
84 }
85 });
86
87 let mut truncated = false;
88 let mut truncation_reason = None;
89 let mut skipped_count = 0;
90
91 if let Some(max) = config.max_files {
93 if files.len() > max {
94 skipped_count = files.len() - max;
95 files.truncate(max);
96 truncated = true;
97 truncation_reason = Some(format!(
98 "Scan capped at {max} files ({skipped_count} skipped)."
99 ));
100 }
101 }
102
103 let mut file_results = Vec::new();
104 for file_path in &files {
105 if let Some(result) = scan_single_file(file_path) {
106 file_results.push(result);
107 } else {
108 skipped_count += 1;
109 }
110 }
111
112 ScanResult {
113 scanned_count: file_results.len(),
114 skipped_count,
115 truncated,
116 truncation_reason,
117 file_results,
118 }
119}
120
121pub fn scan_single_file(file_path: &Path) -> Option<FileScanResult> {
123 const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
125
126 let metadata = match std::fs::metadata(file_path) {
127 Ok(m) => m,
128 Err(e) => {
129 eprintln!(
130 "tirith: scan: cannot read metadata for {}: {e}",
131 file_path.display()
132 );
133 return None;
134 }
135 };
136 if metadata.len() > MAX_FILE_SIZE {
137 eprintln!(
138 "tirith: scan: skipping {} ({}B exceeds {}B limit)",
139 file_path.display(),
140 metadata.len(),
141 MAX_FILE_SIZE
142 );
143 return None;
144 }
145
146 let raw_bytes = match std::fs::read(file_path) {
147 Ok(b) => b,
148 Err(e) => {
149 eprintln!("tirith: scan: cannot read {}: {e}", file_path.display());
150 return None;
151 }
152 };
153 let content = String::from_utf8_lossy(&raw_bytes).into_owned();
154
155 let is_config = is_priority_file(file_path);
156
157 let cwd = file_path
158 .parent()
159 .map(|p| p.display().to_string())
160 .filter(|s| !s.is_empty());
161 let ctx = AnalysisContext {
162 input: content,
163 shell: ShellType::Posix,
164 scan_context: ScanContext::FileScan,
165 raw_bytes: Some(raw_bytes),
166 interactive: false,
167 cwd: cwd.clone(),
168 file_path: Some(file_path.to_path_buf()),
169 repo_root: None,
170 is_config_override: false,
171 clipboard_html: None,
172 };
173
174 let verdict = engine::analyze(&ctx);
175
176 let policy = crate::policy::Policy::discover(cwd.as_deref());
178 let mut findings = verdict.findings;
179 engine::filter_findings_by_paranoia_vec(&mut findings, policy.paranoia);
180
181 Some(FileScanResult {
182 path: file_path.to_path_buf(),
183 findings,
184 is_config_file: is_config,
185 })
186}
187
188pub fn scan_stdin(content: &str, raw_bytes: &[u8]) -> FileScanResult {
190 let cwd = std::env::current_dir()
191 .ok()
192 .map(|p| p.display().to_string());
193 let ctx = AnalysisContext {
194 input: content.to_string(),
195 shell: ShellType::Posix,
196 scan_context: ScanContext::FileScan,
197 raw_bytes: Some(raw_bytes.to_vec()),
198 interactive: false,
199 cwd: cwd.clone(),
200 file_path: None,
201 repo_root: None,
202 is_config_override: false,
203 clipboard_html: None,
204 };
205
206 let verdict = engine::analyze(&ctx);
207
208 let policy = crate::policy::Policy::discover(cwd.as_deref());
210 let mut findings = verdict.findings;
211 engine::filter_findings_by_paranoia_vec(&mut findings, policy.paranoia);
212
213 FileScanResult {
214 path: PathBuf::from("<stdin>"),
215 findings,
216 is_config_file: false,
217 }
218}
219
220fn is_priority_file(path: &Path) -> bool {
223 let basename = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
224
225 if PRIORITY_BASENAMES.contains(&basename) {
227 return true;
228 }
229
230 if let Some(parent) = path.parent() {
232 let parent_name = parent.file_name().and_then(|n| n.to_str()).unwrap_or("");
233 if PRIORITY_PARENT_DIRS.contains(&parent_name) {
234 return true;
235 }
236 }
237
238 false
239}
240
241fn collect_files(path: &Path, recursive: bool, ignore_patterns: &[String]) -> Vec<PathBuf> {
243 if path.is_file() {
244 return vec![path.to_path_buf()];
245 }
246
247 if !path.is_dir() {
248 eprintln!("tirith: scan: path does not exist: {}", path.display());
249 return vec![];
250 }
251
252 let mut files = Vec::new();
253 collect_files_recursive(path, recursive, ignore_patterns, &mut files);
254 files
255}
256
257fn collect_files_recursive(
258 dir: &Path,
259 recursive: bool,
260 ignore_patterns: &[String],
261 files: &mut Vec<PathBuf>,
262) {
263 let entries = match std::fs::read_dir(dir) {
264 Ok(e) => e,
265 Err(e) => {
266 eprintln!("tirith: scan: cannot read directory {}: {e}", dir.display());
267 return;
268 }
269 };
270
271 for entry in entries {
272 let entry = match entry {
273 Ok(e) => e,
274 Err(e) => {
275 eprintln!(
276 "tirith: scan: error reading entry in {}: {e}",
277 dir.display()
278 );
279 continue;
280 }
281 };
282 let path = entry.path();
283 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
284
285 if path.is_dir() {
287 if should_skip_dir(name) && !is_known_config_dir(name) {
288 continue;
289 }
290 if recursive || is_known_config_dir(name) {
291 collect_files_recursive(&path, recursive, ignore_patterns, files);
292 }
293 continue;
294 }
295
296 if is_binary_extension(name) {
298 continue;
299 }
300
301 if ignore_patterns
303 .iter()
304 .any(|pat| name.contains(pat.as_str()))
305 {
306 continue;
307 }
308
309 files.push(path);
310 }
311}
312
313fn should_skip_dir(name: &str) -> bool {
315 matches!(
316 name,
317 ".git"
318 | "node_modules"
319 | "target"
320 | "__pycache__"
321 | ".tox"
322 | "dist"
323 | "build"
324 | ".next"
325 | "vendor"
326 | ".cache"
327 )
328}
329
330fn is_known_config_dir(name: &str) -> bool {
332 matches!(
333 name,
334 ".claude"
335 | ".vscode"
336 | ".cursor"
337 | ".windsurf"
338 | ".cline"
339 | ".continue"
340 | ".github"
341 | ".devcontainer"
342 | ".roo"
343 )
344}
345
346fn is_binary_extension(name: &str) -> bool {
348 let binary_exts = [
349 ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".svg", ".webp", ".mp3", ".mp4", ".wav",
350 ".avi", ".mov", ".zip", ".tar", ".gz", ".bz2", ".xz", ".7z", ".rar", ".exe", ".dll", ".so",
351 ".dylib", ".o", ".a", ".wasm", ".pyc", ".class", ".jar",
352 ];
353 let name_lower = name.to_lowercase();
354 binary_exts.iter().any(|ext| name_lower.ends_with(ext))
355}
356
357impl ScanResult {
358 pub fn has_findings_at_or_above(&self, threshold: Severity) -> bool {
360 self.file_results
361 .iter()
362 .flat_map(|r| &r.findings)
363 .any(|f| f.severity >= threshold)
364 }
365
366 pub fn total_findings(&self) -> usize {
368 self.file_results.iter().map(|r| r.findings.len()).sum()
369 }
370}
371
372#[cfg(test)]
373mod tests {
374 use super::*;
375
376 #[test]
377 fn test_binary_extension_skip() {
378 assert!(is_binary_extension("image.png"));
379 assert!(is_binary_extension("archive.tar.gz"));
380 assert!(!is_binary_extension("config.json"));
381 assert!(!is_binary_extension("CLAUDE.md"));
382 }
383
384 #[test]
385 fn test_priority_file_detection() {
386 assert!(is_priority_file(Path::new(".cursorrules")));
388 assert!(is_priority_file(Path::new("CLAUDE.md")));
389 assert!(is_priority_file(Path::new("mcp.json")));
390 assert!(!is_priority_file(Path::new("README.md")));
391
392 assert!(!is_priority_file(Path::new("settings.json")));
394 assert!(!is_priority_file(Path::new("config.json")));
395 assert!(is_priority_file(Path::new(".claude/settings.json")));
396 assert!(is_priority_file(Path::new(".vscode/settings.json")));
397 assert!(is_priority_file(Path::new(".roo/rules.md")));
398 }
399
400 #[test]
401 fn test_skip_dirs() {
402 assert!(should_skip_dir(".git"));
403 assert!(should_skip_dir("node_modules"));
404 assert!(should_skip_dir("target"));
405 assert!(!should_skip_dir("src"));
406 assert!(!should_skip_dir(".vscode"));
407 }
408
409 #[test]
410 fn test_known_config_dirs() {
411 assert!(is_known_config_dir(".claude"));
412 assert!(is_known_config_dir(".vscode"));
413 assert!(is_known_config_dir(".cursor"));
414 assert!(!is_known_config_dir("src"));
415 assert!(!is_known_config_dir(".git"));
416 }
417}