1use std::path::{Path, PathBuf};
2
3use crate::engine::{self, AnalysisContext};
4use crate::extract::ScanContext;
5use crate::tokenize::ShellType;
6use crate::verdict::{Finding, Severity};
7
8pub struct ScanConfig {
10 pub path: PathBuf,
12 pub recursive: bool,
14 pub fail_on: Severity,
16 pub ignore_patterns: Vec<String>,
18 pub include_patterns: Vec<String>,
20 pub exclude_patterns: Vec<String>,
22 pub max_files: Option<usize>,
24}
25
26pub struct ScanResult {
28 pub file_results: Vec<FileScanResult>,
29 pub scanned_count: usize,
30 pub skipped_count: usize,
31 pub truncated: bool,
32 pub truncation_reason: Option<String>,
33}
34
35pub struct FileScanResult {
37 pub path: PathBuf,
38 pub findings: Vec<Finding>,
39 pub is_config_file: bool,
40}
41
42const PRIORITY_BASENAMES: &[&str] = &[
47 ".cursorrules",
48 ".cursorignore",
49 ".clinerules",
50 ".windsurfrules",
51 "CLAUDE.md",
52 "AGENTS.md",
53 "copilot-instructions.md",
54 "mcp.json",
55 ".mcp.json",
56 "mcp_settings.json",
57 "devcontainer.json",
58];
59
60const PRIORITY_PARENT_DIRS: &[&str] = &[
62 ".claude",
63 ".vscode",
64 ".cursor",
65 ".windsurf",
66 ".cline",
67 ".continue",
68 ".github",
69 ".devcontainer",
70 ".roo",
71];
72
73pub fn scan(config: &ScanConfig) -> ScanResult {
78 let mut files = collect_files(
79 &config.path,
80 config.recursive,
81 &config.ignore_patterns,
82 &config.include_patterns,
83 &config.exclude_patterns,
84 );
85
86 files.sort_by(|a, b| {
88 let a_priority = is_priority_file(a);
89 let b_priority = is_priority_file(b);
90 match (a_priority, b_priority) {
91 (true, false) => std::cmp::Ordering::Less,
92 (false, true) => std::cmp::Ordering::Greater,
93 _ => a.cmp(b),
94 }
95 });
96
97 let mut truncated = false;
98 let mut truncation_reason = None;
99 let mut skipped_count = 0;
100
101 if let Some(max) = config.max_files {
103 if files.len() > max {
104 skipped_count = files.len() - max;
105 files.truncate(max);
106 truncated = true;
107 truncation_reason = Some(format!(
108 "Scan capped at {max} files ({skipped_count} skipped)."
109 ));
110 }
111 }
112
113 let mut file_results = Vec::new();
114 for file_path in &files {
115 match catch_panic_scanning(file_path, || scan_single_file(file_path)) {
119 Some(Some(result)) => file_results.push(result),
120 Some(None) | None => skipped_count += 1,
121 }
122 }
123
124 ScanResult {
125 scanned_count: file_results.len(),
126 skipped_count,
127 truncated,
128 truncation_reason,
129 file_results,
130 }
131}
132
133pub fn scan_single_file(file_path: &Path) -> Option<FileScanResult> {
135 const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
138
139 let metadata = match std::fs::metadata(file_path) {
140 Ok(m) => m,
141 Err(e) => {
142 eprintln!(
143 "tirith: scan: cannot read metadata for {}: {e}",
144 file_path.display()
145 );
146 return None;
147 }
148 };
149 if metadata.len() > MAX_FILE_SIZE {
150 eprintln!(
151 "tirith: scan: skipping {} ({}B exceeds {}B limit)",
152 file_path.display(),
153 metadata.len(),
154 MAX_FILE_SIZE
155 );
156 return None;
157 }
158
159 let raw_bytes = match std::fs::read(file_path) {
160 Ok(b) => b,
161 Err(e) => {
162 eprintln!("tirith: scan: cannot read {}: {e}", file_path.display());
163 return None;
164 }
165 };
166 let content = String::from_utf8_lossy(&raw_bytes).into_owned();
167
168 let is_config = is_priority_file(file_path);
169
170 let cwd = file_path
171 .parent()
172 .map(|p| p.display().to_string())
173 .filter(|s| !s.is_empty());
174 let ctx = AnalysisContext {
175 input: content,
176 shell: ShellType::Posix,
177 scan_context: ScanContext::FileScan,
178 raw_bytes: Some(raw_bytes),
179 interactive: false,
180 cwd: cwd.clone(),
181 file_path: Some(file_path.to_path_buf()),
182 repo_root: None,
183 is_config_override: false,
184 clipboard_html: None,
185 };
186
187 let verdict = engine::analyze(&ctx);
188
189 let policy = crate::policy::Policy::discover(cwd.as_deref());
190 let mut findings = verdict.findings;
191 engine::filter_findings_by_paranoia_vec(&mut findings, policy.paranoia);
192
193 Some(FileScanResult {
194 path: file_path.to_path_buf(),
195 findings,
196 is_config_file: is_config,
197 })
198}
199
200fn catch_panic_scanning<T>(file_path: &Path, f: impl FnOnce() -> T) -> Option<T> {
217 match std::panic::catch_unwind(std::panic::AssertUnwindSafe(f)) {
218 Ok(v) => Some(v),
219 Err(_) => {
220 eprintln!(
221 "tirith: scan: internal error scanning {} (skipped — see panic message above)",
222 file_path.display()
223 );
224 None
225 }
226 }
227}
228
229pub fn scan_stdin(content: &str, raw_bytes: &[u8]) -> FileScanResult {
231 let cwd = std::env::current_dir()
232 .ok()
233 .map(|p| p.display().to_string());
234 let ctx = AnalysisContext {
235 input: content.to_string(),
236 shell: ShellType::Posix,
237 scan_context: ScanContext::FileScan,
238 raw_bytes: Some(raw_bytes.to_vec()),
239 interactive: false,
240 cwd: cwd.clone(),
241 file_path: None,
242 repo_root: None,
243 is_config_override: false,
244 clipboard_html: None,
245 };
246
247 let verdict = engine::analyze(&ctx);
248
249 let policy = crate::policy::Policy::discover(cwd.as_deref());
250 let mut findings = verdict.findings;
251 engine::filter_findings_by_paranoia_vec(&mut findings, policy.paranoia);
252
253 FileScanResult {
254 path: PathBuf::from("<stdin>"),
255 findings,
256 is_config_file: false,
257 }
258}
259
260fn is_priority_file(path: &Path) -> bool {
263 let basename = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
264
265 if PRIORITY_BASENAMES.contains(&basename) {
267 return true;
268 }
269
270 if let Some(parent) = path.parent() {
272 let parent_name = parent.file_name().and_then(|n| n.to_str()).unwrap_or("");
273 if PRIORITY_PARENT_DIRS.contains(&parent_name) {
274 return true;
275 }
276 }
277
278 false
279}
280
281fn collect_files(
283 path: &Path,
284 recursive: bool,
285 ignore_patterns: &[String],
286 include_patterns: &[String],
287 exclude_patterns: &[String],
288) -> Vec<PathBuf> {
289 if path.is_file() {
290 return vec![path.to_path_buf()];
291 }
292
293 if !path.is_dir() {
294 eprintln!("tirith: scan: path does not exist: {}", path.display());
295 return vec![];
296 }
297
298 let mut files = Vec::new();
299 collect_files_recursive(
300 path,
301 path,
302 recursive,
303 ignore_patterns,
304 include_patterns,
305 exclude_patterns,
306 &mut files,
307 );
308 files
309}
310
311fn collect_files_recursive(
312 root: &Path,
313 dir: &Path,
314 recursive: bool,
315 ignore_patterns: &[String],
316 include_patterns: &[String],
317 exclude_patterns: &[String],
318 files: &mut Vec<PathBuf>,
319) {
320 let entries = match std::fs::read_dir(dir) {
321 Ok(e) => e,
322 Err(e) => {
323 eprintln!("tirith: scan: cannot read directory {}: {e}", dir.display());
324 return;
325 }
326 };
327
328 for entry in entries {
329 let entry = match entry {
330 Ok(e) => e,
331 Err(e) => {
332 eprintln!(
333 "tirith: scan: error reading entry in {}: {e}",
334 dir.display()
335 );
336 continue;
337 }
338 };
339 let path = entry.path();
340 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
341
342 if path.is_dir() {
344 if should_skip_dir(name) && !is_known_config_dir(name) {
345 continue;
346 }
347 if recursive || is_known_config_dir(name) {
348 collect_files_recursive(
349 root,
350 &path,
351 recursive,
352 ignore_patterns,
353 include_patterns,
354 exclude_patterns,
355 files,
356 );
357 }
358 continue;
359 }
360
361 if is_binary_extension(name) {
363 continue;
364 }
365
366 let rel_path = path
368 .strip_prefix(root)
369 .ok()
370 .and_then(|p| p.to_str())
371 .unwrap_or(name);
372 if ignore_patterns
373 .iter()
374 .any(|pat| matches_ignore_pattern(name, pat) || matches_ignore_pattern(rel_path, pat))
375 {
376 continue;
377 }
378
379 if !include_patterns.is_empty() {
382 let mut included = false;
383 let mut negated = false;
384 let has_positive = include_patterns.iter().any(|p| !p.starts_with('!'));
385
386 for pat in include_patterns {
387 if let Some(stripped) = pat.strip_prefix('!') {
388 if matches_ignore_pattern(name, stripped)
390 || matches_ignore_pattern(rel_path, stripped)
391 {
392 negated = true;
393 }
394 } else {
395 if matches_ignore_pattern(name, pat) || matches_ignore_pattern(rel_path, pat) {
397 included = true;
398 }
399 }
400 }
401
402 if negated || (has_positive && !included) {
406 continue;
407 }
408 }
409
410 if exclude_patterns
412 .iter()
413 .any(|pat| matches_ignore_pattern(name, pat) || matches_ignore_pattern(rel_path, pat))
414 {
415 continue;
416 }
417
418 files.push(path);
419 }
420}
421
422fn should_skip_dir(name: &str) -> bool {
424 matches!(
425 name,
426 ".git"
427 | "node_modules"
428 | "target"
429 | "__pycache__"
430 | ".tox"
431 | "dist"
432 | "build"
433 | ".next"
434 | "vendor"
435 | ".cache"
436 )
437}
438
439fn is_known_config_dir(name: &str) -> bool {
441 matches!(
442 name,
443 ".claude"
444 | ".vscode"
445 | ".cursor"
446 | ".windsurf"
447 | ".cline"
448 | ".continue"
449 | ".github"
450 | ".devcontainer"
451 | ".roo"
452 )
453}
454
455fn is_binary_extension(name: &str) -> bool {
457 let binary_exts = [
458 ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".svg", ".webp", ".mp3", ".mp4", ".wav",
459 ".avi", ".mov", ".zip", ".tar", ".gz", ".bz2", ".xz", ".7z", ".rar", ".exe", ".dll", ".so",
460 ".dylib", ".o", ".a", ".wasm", ".pyc", ".class", ".jar",
461 ];
462 let name_lower = name.to_lowercase();
463 binary_exts.iter().any(|ext| name_lower.ends_with(ext))
464}
465
466pub fn matches_ignore_pattern(name: &str, pattern: &str) -> bool {
471 if pattern.contains('*') {
472 let parts: Vec<&str> = pattern.split('*').collect();
473 match parts.as_slice() {
474 [prefix, suffix] if prefix.is_empty() && !suffix.is_empty() => name.ends_with(suffix),
476 [prefix, suffix] if !prefix.is_empty() && suffix.is_empty() => name.starts_with(prefix),
478 [prefix, suffix] if !prefix.is_empty() && !suffix.is_empty() => {
480 name.starts_with(prefix)
481 && name.ends_with(suffix)
482 && name.len() >= prefix.len() + suffix.len()
483 }
484 [_, _] => true,
486 _ => {
488 let mut remaining = name;
489 for (i, part) in parts.iter().enumerate() {
490 if part.is_empty() {
491 continue;
492 }
493 if i == 0 {
494 if !remaining.starts_with(part) {
495 return false;
496 }
497 remaining = &remaining[part.len()..];
498 } else if let Some(pos) = remaining.find(part) {
499 remaining = &remaining[pos + part.len()..];
500 } else {
501 return false;
502 }
503 }
504 true
505 }
506 }
507 } else {
508 name.contains(pattern)
510 }
511}
512
513impl ScanResult {
514 pub fn has_findings_at_or_above(&self, threshold: Severity) -> bool {
516 self.file_results
517 .iter()
518 .flat_map(|r| &r.findings)
519 .any(|f| f.severity >= threshold)
520 }
521
522 pub fn total_findings(&self) -> usize {
524 self.file_results.iter().map(|r| r.findings.len()).sum()
525 }
526}
527
528#[cfg(test)]
529mod tests {
530 use super::*;
531
532 #[test]
533 fn catch_panic_scanning_returns_some_on_clean_run() {
534 let path = Path::new("dummy");
535 let result = catch_panic_scanning(path, || 42_i32);
536 assert_eq!(result, Some(42));
537 }
538
539 static PANIC_HOOK_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
544
545 #[test]
546 fn catch_panic_scanning_returns_none_on_panic() {
547 let _lock = PANIC_HOOK_LOCK.lock().unwrap_or_else(|e| e.into_inner());
548 let path = Path::new("dummy");
549 let prev = std::panic::take_hook();
553 std::panic::set_hook(Box::new(|_| {}));
554 let result: Option<i32> = catch_panic_scanning(path, || {
555 panic!("simulated rule panic");
556 });
557 std::panic::set_hook(prev);
558 assert!(result.is_none(), "panic must produce None, got {result:?}");
559 }
560
561 #[test]
562 fn test_binary_extension_skip() {
563 assert!(is_binary_extension("image.png"));
564 assert!(is_binary_extension("archive.tar.gz"));
565 assert!(!is_binary_extension("config.json"));
566 assert!(!is_binary_extension("CLAUDE.md"));
567 }
568
569 #[test]
570 fn test_priority_file_detection() {
571 assert!(is_priority_file(Path::new(".cursorrules")));
573 assert!(is_priority_file(Path::new("CLAUDE.md")));
574 assert!(is_priority_file(Path::new("mcp.json")));
575 assert!(!is_priority_file(Path::new("README.md")));
576
577 assert!(!is_priority_file(Path::new("settings.json")));
579 assert!(!is_priority_file(Path::new("config.json")));
580 assert!(is_priority_file(Path::new(".claude/settings.json")));
581 assert!(is_priority_file(Path::new(".vscode/settings.json")));
582 assert!(is_priority_file(Path::new(".roo/rules.md")));
583 }
584
585 #[test]
586 fn test_skip_dirs() {
587 assert!(should_skip_dir(".git"));
588 assert!(should_skip_dir("node_modules"));
589 assert!(should_skip_dir("target"));
590 assert!(!should_skip_dir("src"));
591 assert!(!should_skip_dir(".vscode"));
592 }
593
594 #[test]
595 fn test_known_config_dirs() {
596 assert!(is_known_config_dir(".claude"));
597 assert!(is_known_config_dir(".vscode"));
598 assert!(is_known_config_dir(".cursor"));
599 assert!(!is_known_config_dir("src"));
600 assert!(!is_known_config_dir(".git"));
601 }
602
603 #[test]
604 fn test_ignore_pattern_matching() {
605 assert!(matches_ignore_pattern("test.log", "*.log"));
607 assert!(!matches_ignore_pattern("test.txt", "*.log"));
608
609 assert!(matches_ignore_pattern("test_output.txt", "test_*"));
611 assert!(!matches_ignore_pattern("my_test.txt", "test_*"));
612
613 assert!(matches_ignore_pattern("my_test_file.txt", "test"));
615 assert!(!matches_ignore_pattern("readme.md", "test"));
616
617 assert!(matches_ignore_pattern("test_file.log", "test_*.log"));
619 assert!(!matches_ignore_pattern("test_file.txt", "test_*.log"));
620
621 assert!(matches_ignore_pattern("Cargo.lock", "Cargo.lock"));
623
624 assert!(matches_ignore_pattern(".claude/settings.json", ".claude/*"));
626 assert!(!matches_ignore_pattern("src/main.rs", ".claude/*"));
627 assert!(matches_ignore_pattern("docs/CLAUDE.md", "*/CLAUDE.md"));
628 assert!(!matches_ignore_pattern("README.md", "*/CLAUDE.md"));
629 }
630
631 #[test]
632 fn test_variation_selector_visible_in_scan() {
633 let tmp = tempfile::tempdir().expect("create temp dir");
636 let file_path = tmp.path().join("test_vs.txt");
637 std::fs::write(&file_path, b"A\xef\xb8\x8f").expect("write temp file");
638
639 let result = scan_single_file(&file_path).expect("scan should succeed");
640
641 let policy = crate::policy::Policy::discover(Some(tmp.path().to_str().unwrap()));
643 let mut findings = result.findings;
644 crate::engine::filter_findings_by_paranoia_vec(&mut findings, policy.paranoia);
645
646 assert!(
647 findings
648 .iter()
649 .any(|f| f.rule_id == crate::verdict::RuleId::VariationSelector),
650 "VariationSelector should be visible in scan at default paranoia: {findings:?}"
651 );
652 }
653
654 #[test]
655 fn test_negated_include_patterns() {
656 let tmp = tempfile::tempdir().expect("create temp dir");
657 std::fs::write(tmp.path().join("a.md"), "hello").unwrap();
658 std::fs::write(tmp.path().join("b.test.md"), "world").unwrap();
659 std::fs::write(tmp.path().join("c.rs"), "fn main() {}").unwrap();
660
661 let files = collect_files(
663 tmp.path(),
664 false,
665 &[],
666 &["*.md".to_string(), "!*.test.md".to_string()],
667 &[],
668 );
669
670 let names: Vec<&str> = files
671 .iter()
672 .filter_map(|p| p.file_name().and_then(|n| n.to_str()))
673 .collect();
674 assert!(names.contains(&"a.md"), "a.md should be included");
675 assert!(
676 !names.contains(&"b.test.md"),
677 "b.test.md should be excluded by negation"
678 );
679 assert!(
680 !names.contains(&"c.rs"),
681 "c.rs should not match *.md include"
682 );
683 }
684
685 #[test]
686 fn test_negation_only_include_patterns() {
687 let tmp = tempfile::tempdir().expect("create temp dir");
688 std::fs::write(tmp.path().join("a.md"), "hello").unwrap();
689 std::fs::write(tmp.path().join("b.test.md"), "world").unwrap();
690 std::fs::write(tmp.path().join("c.rs"), "fn main() {}").unwrap();
691
692 let files = collect_files(tmp.path(), false, &[], &["!*.test.md".to_string()], &[]);
695
696 let names: Vec<&str> = files
697 .iter()
698 .filter_map(|p| p.file_name().and_then(|n| n.to_str()))
699 .collect();
700 assert!(names.contains(&"a.md"), "a.md should be included");
701 assert!(
702 !names.contains(&"b.test.md"),
703 "b.test.md should be excluded by negation"
704 );
705 assert!(
706 names.contains(&"c.rs"),
707 "c.rs should be included (no positive filter)"
708 );
709 }
710}