1use anyhow::Result;
23use serde::{Deserialize, Serialize};
24use std::collections::{BTreeMap, BTreeSet};
25use std::fmt;
26use std::fs;
27use std::path::{Path, PathBuf};
28
29#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct ScoutConfig {
34 pub root: PathBuf,
36
37 #[serde(default = "default_max_depth")]
39 pub max_depth: usize,
40
41 #[serde(default = "default_max_sample_bytes")]
44 pub max_sample_bytes: usize,
45
46 #[serde(default = "default_max_tree_files")]
48 pub max_tree_files: usize,
49
50 #[serde(default = "default_ignores")]
52 pub ignore: Vec<String>,
53}
54
55fn default_max_depth() -> usize {
56 6
57}
58fn default_max_sample_bytes() -> usize {
59 512 * 1024
60}
61fn default_max_tree_files() -> usize {
62 200
63}
64fn default_ignores() -> Vec<String> {
65 [
66 ".git".into(),
67 "node_modules".into(),
68 "target".into(),
69 "dist".into(),
70 "build".into(),
71 "__pycache__".into(),
72 ".next".into(),
73 "vendor".into(),
74 "coverage".into(),
75 ".cache".into(),
76 ".turbo".into(),
77 "bazel-bin".into(),
78 "bazel-out".into(),
79 ".dart_tool".into(),
80 ".gradle".into(),
81 ]
82 .to_vec()
83}
84
85impl Default for ScoutConfig {
86 fn default() -> Self {
87 Self {
88 root: std::env::current_dir().unwrap_or_default(),
89 max_depth: default_max_depth(),
90 max_sample_bytes: default_max_sample_bytes(),
91 max_tree_files: default_max_tree_files(),
92 ignore: default_ignores(),
93 }
94 }
95}
96
97#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct Pattern {
100 pub name: String,
102 pub category: String,
104 pub confidence: u8,
106 pub evidence: String,
108}
109
110#[derive(Debug, Clone, Serialize, Deserialize)]
112pub struct LanguageStats {
113 pub language: String,
115 pub file_count: usize,
117 pub total_bytes: u64,
119 pub extensions: BTreeSet<String>,
121}
122
123#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct TreeNode {
126 pub path: String,
128 pub ext: String,
130 pub size: u64,
132 pub is_dir: bool,
134 pub child_count: usize,
136}
137
138#[derive(Debug, Clone, Serialize, Deserialize)]
140pub struct CodebaseSnapshot {
141 pub root: String,
143
144 pub tree: Vec<TreeNode>,
146
147 pub languages: Vec<LanguageStats>,
149
150 pub total_files: usize,
152
153 pub total_bytes: u64,
155
156 pub patterns: Vec<Pattern>,
158
159 pub key_files: Vec<KeyFile>,
161
162 pub dependencies: Vec<String>,
164
165 pub scanned_at: String,
167
168 pub scan_ms: u64,
170}
171
172#[derive(Debug, Clone, Serialize, Deserialize)]
174pub struct KeyFile {
175 pub path: String,
177 pub role: String,
179 pub summary: Option<String>,
181}
182
183pub struct Scout {
187 config: ScoutConfig,
188}
189
190impl Scout {
191 pub fn new(root: impl Into<PathBuf>) -> Self {
193 Self {
194 config: ScoutConfig {
195 root: root.into(),
196 ..Default::default()
197 },
198 }
199 }
200
201 pub fn with_config(config: ScoutConfig) -> Self {
203 Self { config }
204 }
205
206 pub fn scan(&self) -> Result<CodebaseSnapshot> {
208 let root = &self.config.root;
209 if !root.exists() {
210 anyhow::bail!("Root directory does not exist: {}", root.display());
211 }
212 if !root.is_dir() {
213 anyhow::bail!("Root is not a directory: {}", root.display());
214 }
215
216 let start = std::time::Instant::now();
217
218 let mut files: Vec<FileEntry> = Vec::new();
220 let mut tree: Vec<TreeNode> = Vec::new();
221 self.walk(root, root, 0, &mut files, &mut tree)?;
222
223 tree.truncate(self.config.max_tree_files);
225
226 let languages = self.compute_language_stats(&files);
228
229 let key_files = self.identify_key_files(&files, root);
231
232 let dependencies = self.extract_dependencies(&files, root);
234
235 let patterns = self.detect_patterns(&files, &key_files, &dependencies, root);
237
238 let total_bytes: u64 = files.iter().map(|f| f.size).sum();
239 let scan_ms = start.elapsed().as_millis() as u64;
240
241 Ok(CodebaseSnapshot {
242 root: root.to_string_lossy().to_string(),
243 tree,
244 languages,
245 total_files: files.len(),
246 total_bytes,
247 patterns,
248 key_files,
249 dependencies,
250 scanned_at: chrono::Utc::now().to_rfc3339(),
251 scan_ms,
252 })
253 }
254
255 fn walk(
258 &self,
259 root: &Path,
260 dir: &Path,
261 depth: usize,
262 files: &mut Vec<FileEntry>,
263 tree: &mut Vec<TreeNode>,
264 ) -> Result<()> {
265 if depth > self.config.max_depth {
266 return Ok(());
267 }
268
269 let entries = match fs::read_dir(dir) {
270 Ok(e) => e,
271 Err(_) => return Ok(()), };
273
274 for entry in entries {
275 let entry = match entry {
276 Ok(e) => e,
277 Err(_) => continue,
278 };
279
280 let name = entry.file_name().to_string_lossy().to_string();
281 let path = entry.path();
282
283 if self.should_ignore(&name) {
285 continue;
286 }
287
288 let rel = path
289 .strip_prefix(root)
290 .unwrap_or(&path)
291 .to_string_lossy()
292 .to_string();
293
294 if path.is_dir() {
295 tree.push(TreeNode {
296 path: rel.clone(),
297 ext: String::new(),
298 size: 0,
299 is_dir: true,
300 child_count: 0,
301 });
302 self.walk(root, &path, depth + 1, files, tree)?;
303 } else {
304 let size = entry.metadata().map(|m| m.len()).unwrap_or(0);
305 let ext = Path::new(&name)
306 .extension()
307 .map(|e| format!(".{}", e.to_string_lossy()))
308 .unwrap_or_default();
309
310 files.push(FileEntry {
311 rel_path: rel.clone(),
312 name,
313 ext: ext.clone(),
314 size,
315 });
316
317 if tree.len() < self.config.max_tree_files {
318 tree.push(TreeNode {
319 path: rel,
320 ext,
321 size,
322 is_dir: false,
323 child_count: 0,
324 });
325 }
326 }
327 }
328
329 Ok(())
330 }
331
332 fn should_ignore(&self, name: &str) -> bool {
333 let name_lower = name.to_lowercase();
334 if name_lower.starts_with('.')
336 && name_lower != ".env.example"
337 && name_lower != ".env.local.example"
338 {
339 return true;
340 }
341 for ignore in &self.config.ignore {
343 if name_lower == ignore.to_lowercase() {
344 return true;
345 }
346 }
347 false
348 }
349
350 fn compute_language_stats(&self, files: &[FileEntry]) -> Vec<LanguageStats> {
353 let mut lang_map: BTreeMap<String, LanguageStats> = BTreeMap::new();
354
355 for file in files {
356 if let Some(lang) = self.ext_to_language(&file.ext) {
357 let stats = lang_map.entry(lang.to_string()).or_insert_with(|| {
358 LanguageStats {
359 language: lang.to_string(),
360 file_count: 0,
361 total_bytes: 0,
362 extensions: BTreeSet::new(),
363 }
364 });
365 stats.file_count += 1;
366 stats.total_bytes += file.size;
367 stats.extensions.insert(file.ext.clone());
368 }
369 }
370
371 let mut v: Vec<LanguageStats> = lang_map.into_values().collect();
372 v.sort_by(|a, b| b.file_count.cmp(&a.file_count));
373 v
374 }
375
376 fn ext_to_language(&self, ext: &str) -> Option<&'static str> {
377 match ext {
378 ".rs" => Some("Rust"),
379 ".ts" | ".tsx" => Some("TypeScript"),
380 ".js" | ".jsx" | ".mjs" | ".cjs" => Some("JavaScript"),
381 ".py" | ".pyi" => Some("Python"),
382 ".go" => Some("Go"),
383 ".java" => Some("Java"),
384 ".kt" | ".kts" => Some("Kotlin"),
385 ".rb" => Some("Ruby"),
386 ".php" => Some("PHP"),
387 ".c" | ".h" => Some("C"),
388 ".cpp" | ".cc" | ".cxx" | ".hpp" => Some("C++"),
389 ".cs" => Some("C#"),
390 ".swift" => Some("Swift"),
391 ".scala" => Some("Scala"),
392 ".sh" | ".bash" | ".zsh" => Some("Shell"),
393 ".sql" => Some("SQL"),
394 ".html" | ".htm" => Some("HTML"),
395 ".css" | ".scss" | ".sass" | ".less" => Some("CSS"),
396 ".vue" => Some("Vue"),
397 ".svelte" => Some("Svelte"),
398 ".dart" => Some("Dart"),
399 ".lua" => Some("Lua"),
400 ".r" | ".R" => Some("R"),
401 ".zig" => Some("Zig"),
402 ".nim" => Some("Nim"),
403 ".ex" | ".exs" => Some("Elixir"),
404 ".erl" => Some("Erlang"),
405 ".hs" => Some("Haskell"),
406 ".ml" | ".mli" => Some("OCaml"),
407 ".toml" => Some("TOML"),
408 ".yaml" | ".yml" => Some("YAML"),
409 ".json" => Some("JSON"),
410 ".xml" => Some("XML"),
411 ".md" | ".mdx" => Some("Markdown"),
412 _ => None,
413 }
414 }
415
416 fn identify_key_files(&self, files: &[FileEntry], root: &Path) -> Vec<KeyFile> {
419 let mut key_files: Vec<KeyFile> = Vec::new();
420
421 let key_patterns: &[(&str, &str)] = &[
422 ("Cargo.toml", "config"),
424 ("package.json", "config"),
425 ("pyproject.toml", "config"),
426 ("go.mod", "config"),
427 ("build.gradle", "config"),
428 ("build.gradle.kts", "config"),
429 ("pom.xml", "config"),
430 ("Makefile", "config"),
431 ("CMakeLists.txt", "config"),
432 ("docker-compose.yml", "config"),
433 ("docker-compose.yaml", "config"),
434 ("tsconfig.json", "config"),
435 (".env.example", "config"),
436 ("main.rs", "entrypoint"),
438 ("main.go", "entrypoint"),
439 ("main.py", "entrypoint"),
440 ("main.java", "entrypoint"),
441 ("main.ts", "entrypoint"),
442 ("main.js", "entrypoint"),
443 ("index.ts", "entrypoint"),
444 ("index.js", "entrypoint"),
445 ("index.py", "entrypoint"),
446 ("app.rs", "entrypoint"),
447 ("lib.rs", "entrypoint"),
448 ("mod.rs", "entrypoint"),
449 ("README.md", "readme"),
451 ("README", "readme"),
452 ("README.txt", "readme"),
453 ("README.rst", "readme"),
454 ("LICENSE", "license"),
455 ("LICENSE.md", "license"),
456 ("LICENSE.txt", "license"),
457 ("CHANGELOG.md", "docs"),
458 ("CONTRIBUTING.md", "docs"),
459 (".github/workflows", "ci"),
461 (".gitlab-ci.yml", "ci"),
462 ("Jenkinsfile", "ci"),
463 ("tests", "test"),
465 ("test", "test"),
466 ("spec", "test"),
467 ("__tests__", "test"),
468 ];
469
470 for file in files {
471 let name_lower = file.name.to_lowercase();
472
473 for (pattern, role) in key_patterns {
474 if name_lower == *pattern || file.rel_path.contains(pattern) {
475 let summary = self.read_file_summary(root, &file.rel_path);
476 key_files.push(KeyFile {
477 path: file.rel_path.clone(),
478 role: role.to_string(),
479 summary,
480 });
481 break;
482 }
483 }
484 }
485
486 let ci_dir = root.join(".github").join("workflows");
488 if ci_dir.is_dir() {
489 if let Ok(entries) = fs::read_dir(&ci_dir) {
490 for entry in entries.flatten() {
491 let name = entry.file_name().to_string_lossy().to_string();
492 if name.ends_with(".yml") || name.ends_with(".yaml") {
493 let rel = format!(".github/workflows/{}", name);
494 if !key_files.iter().any(|kf| kf.path == rel) {
495 key_files.push(KeyFile {
496 path: rel,
497 role: "ci".to_string(),
498 summary: None,
499 });
500 }
501 }
502 }
503 }
504 }
505
506 let mut seen = BTreeSet::new();
508 key_files.retain(|kf| seen.insert(kf.path.clone()));
509
510 key_files.sort_by(|a, b| {
512 let rank = |r: &str| -> u8 {
513 match r {
514 "config" => 0,
515 "entrypoint" => 1,
516 "readme" => 2,
517 "license" => 3,
518 "ci" => 4,
519 "test" => 5,
520 "docs" => 6,
521 _ => 7,
522 }
523 };
524 rank(&a.role)
525 .cmp(&rank(&b.role))
526 .then_with(|| a.path.cmp(&b.path))
527 });
528
529 key_files
530 }
531
532 fn read_file_summary(&self, root: &Path, rel_path: &str) -> Option<String> {
534 let path = root.join(rel_path);
535 let content = fs::read_to_string(&path).ok()?;
536
537 for line in content.lines() {
538 let trimmed = line.trim();
539 if trimmed.is_empty()
541 || trimmed.starts_with('#')
542 || trimmed.starts_with("//")
543 || trimmed.starts_with("/*")
544 || trimmed.starts_with("--")
545 || trimmed.starts_with("---")
546 || trimmed.starts_with("!")
547 {
548 continue;
549 }
550 if trimmed.len() > 120 {
552 return Some(format!("{}…", &trimmed[..120]));
553 }
554 return Some(trimmed.to_string());
555 }
556 None
557 }
558
559 fn extract_dependencies(&self, files: &[FileEntry], root: &Path) -> Vec<String> {
562 let mut deps: Vec<String> = Vec::new();
563
564 for file in files {
565 match file.name.as_str() {
566 "Cargo.toml" => {
567 let path = root.join(&file.rel_path);
568 if let Ok(content) = fs::read_to_string(&path) {
569 self.extract_cargo_deps(&content, &mut deps);
570 }
571 }
572 "package.json" => {
573 let path = root.join(&file.rel_path);
574 if let Ok(content) = fs::read_to_string(&path) {
575 self.extract_npm_deps(&content, &mut deps);
576 }
577 }
578 "go.mod" => {
579 let path = root.join(&file.rel_path);
580 if let Ok(content) = fs::read_to_string(&path) {
581 self.extract_go_deps(&content, &mut deps);
582 }
583 }
584 "pyproject.toml" => {
585 let path = root.join(&file.rel_path);
586 if let Ok(content) = fs::read_to_string(&path) {
587 self.extract_python_deps(&content, &mut deps);
588 }
589 }
590 _ => {}
591 }
592 }
593
594 deps.sort();
595 deps.dedup();
596 deps
597 }
598
599 fn extract_cargo_deps(&self, content: &str, deps: &mut Vec<String>) {
600 let mut in_deps = false;
601 for line in content.lines() {
602 let trimmed = line.trim();
603 if trimmed == "[dependencies]" || trimmed == "[dev-dependencies]" {
604 in_deps = true;
605 continue;
606 }
607 if trimmed.starts_with('[') {
608 in_deps = false;
609 continue;
610 }
611 if in_deps {
612 if let Some((name, _)) = trimmed.split_once('=') {
613 let name = name.trim().to_string();
614 if !name.is_empty() {
615 deps.push(format!("{} (crate)", name));
616 }
617 } else if let Some((name, _)) = trimmed.split_once('{') {
618 let name = name.trim().to_string();
619 if !name.is_empty() {
620 deps.push(format!("{} (crate)", name));
621 }
622 }
623 }
624 }
625 }
626
627 fn extract_npm_deps(&self, content: &str, deps: &mut Vec<String>) {
628 if let Ok(json) = serde_json::from_str::<serde_json::Value>(content) {
629 for section in &["dependencies", "devDependencies"] {
630 if let Some(obj) = json.get(section).and_then(|v| v.as_object()) {
631 for name in obj.keys() {
632 deps.push(format!("{} (npm)", name));
633 }
634 }
635 }
636 }
637 }
638
639 fn extract_go_deps(&self, content: &str, deps: &mut Vec<String>) {
640 for line in content.lines() {
641 let trimmed = line.trim();
642 if trimmed.starts_with("require (") {
643 continue;
644 }
645 if trimmed.starts_with("require ") {
646 let parts: Vec<&str> = trimmed.split_whitespace().collect();
648 if parts.len() >= 3 && parts[0] == "require" {
649 deps.push(format!("{} (go)", parts[1]));
650 }
651 } else if !trimmed.starts_with("//")
652 && !trimmed.starts_with(')')
653 && !trimmed.starts_with("module ")
654 && !trimmed.starts_with("go ")
655 && !trimmed.is_empty()
656 {
657 let parts: Vec<&str> = trimmed.split_whitespace().collect();
659 if parts.len() >= 2 && parts[0].contains('/') {
660 deps.push(format!("{} (go)", parts[0]));
661 }
662 }
663 }
664 }
665
666 fn extract_python_deps(&self, content: &str, deps: &mut Vec<String>) {
667 let mut in_deps = false;
668 for line in content.lines() {
669 let trimmed = line.trim();
670 if trimmed == "[project]" || trimmed == "[tool.poetry]" {
671 in_deps = false;
672 }
673 if trimmed.starts_with('[') && trimmed.contains("dependencies") {
675 in_deps = true;
676 continue;
677 }
678 if trimmed.starts_with('[') && !trimmed.contains("dependencies") {
679 in_deps = false;
680 continue;
681 }
682 if in_deps {
683 if let Some((key, value)) = trimmed.split_once('=') {
685 let key = key.trim();
686 if key == "dependencies" {
687 let cleaned = value
688 .trim()
689 .trim_start_matches('[')
690 .trim_end_matches(']');
691 for dep in cleaned.split(',') {
692 let dep = dep.trim().trim_matches('"').trim_matches('\'');
693 if !dep.is_empty() {
694 deps.push(format!("{} (pypi)", dep));
695 }
696 }
697 } else if key != "python"
698 && !key.contains("version")
699 && !key.contains("requires")
700 {
701 let name = key.to_string();
703 if !name.is_empty() {
704 deps.push(format!("{} (pypi)", name));
705 }
706 }
707 }
708 }
709 }
710 }
711
712 fn detect_patterns(
715 &self,
716 files: &[FileEntry],
717 _key_files: &[KeyFile],
718 deps: &[String],
719 root: &Path,
720 ) -> Vec<Pattern> {
721 let mut patterns: Vec<Pattern> = Vec::new();
722
723 let file_names: BTreeSet<&str> = files.iter().map(|f| f.name.as_str()).collect();
725 let has_ext = |ext: &str| -> bool { files.iter().any(|f| f.ext == ext) };
726 let has_dir = |dir_name: &str| -> bool {
727 root.join(dir_name).is_dir()
728 || files
729 .iter()
730 .any(|f| f.rel_path.starts_with(&format!("{}/", dir_name)))
731 };
732 let dep_contains = |substr: &str| -> bool {
733 deps.iter()
734 .any(|d| d.to_lowercase().contains(&substr.to_lowercase()))
735 };
736
737 if has_ext(".rs") {
740 patterns.push(Pattern {
741 name: "Rust".to_string(),
742 category: "language".to_string(),
743 confidence: 98,
744 evidence: "Found .rs files".to_string(),
745 });
746 }
747
748 if has_ext(".ts") || has_ext(".tsx") {
749 patterns.push(Pattern {
750 name: "TypeScript".to_string(),
751 category: "language".to_string(),
752 confidence: 97,
753 evidence: "Found .ts/.tsx files".to_string(),
754 });
755 } else if has_ext(".js") || has_ext(".jsx") {
756 patterns.push(Pattern {
757 name: "JavaScript".to_string(),
758 category: "language".to_string(),
759 confidence: 95,
760 evidence: "Found .js/.jsx files (no .ts)".to_string(),
761 });
762 }
763
764 if has_ext(".py") {
765 patterns.push(Pattern {
766 name: "Python".to_string(),
767 category: "language".to_string(),
768 confidence: 97,
769 evidence: "Found .py files".to_string(),
770 });
771 }
772
773 if has_ext(".go") {
774 patterns.push(Pattern {
775 name: "Go".to_string(),
776 category: "language".to_string(),
777 confidence: 98,
778 evidence: "Found .go files".to_string(),
779 });
780 }
781
782 if has_ext(".java") {
783 patterns.push(Pattern {
784 name: "Java".to_string(),
785 category: "language".to_string(),
786 confidence: 98,
787 evidence: "Found .java files".to_string(),
788 });
789 }
790
791 if has_ext(".swift") {
792 patterns.push(Pattern {
793 name: "Swift".to_string(),
794 category: "language".to_string(),
795 confidence: 98,
796 evidence: "Found .swift files".to_string(),
797 });
798 }
799
800 if file_names.contains("Cargo.toml") && has_ext(".rs") {
803 if dep_contains("tokio") {
804 patterns.push(Pattern {
805 name: "Async Rust (Tokio)".to_string(),
806 category: "framework".to_string(),
807 confidence: 90,
808 evidence: "tokio dependency in Cargo.toml".to_string(),
809 });
810 }
811 if dep_contains("actix") {
812 patterns.push(Pattern {
813 name: "Actix Web".to_string(),
814 category: "framework".to_string(),
815 confidence: 92,
816 evidence: "actix dependency".to_string(),
817 });
818 }
819 if dep_contains("axum") {
820 patterns.push(Pattern {
821 name: "Axum".to_string(),
822 category: "framework".to_string(),
823 confidence: 92,
824 evidence: "axum dependency".to_string(),
825 });
826 }
827 if dep_contains("wasm") || dep_contains("leptos") {
828 patterns.push(Pattern {
829 name: "WASM/Leptos".to_string(),
830 category: "framework".to_string(),
831 confidence: 85,
832 evidence: "wasm-related dependency".to_string(),
833 });
834 }
835 }
836
837 if file_names.contains("package.json") {
838 if dep_contains("react") {
839 patterns.push(Pattern {
840 name: "React".to_string(),
841 category: "framework".to_string(),
842 confidence: 95,
843 evidence: "react dependency".to_string(),
844 });
845 }
846 if dep_contains("vue") {
847 patterns.push(Pattern {
848 name: "Vue".to_string(),
849 category: "framework".to_string(),
850 confidence: 95,
851 evidence: "vue dependency".to_string(),
852 });
853 }
854 if dep_contains("svelte") {
855 patterns.push(Pattern {
856 name: "Svelte".to_string(),
857 category: "framework".to_string(),
858 confidence: 95,
859 evidence: "svelte dependency".to_string(),
860 });
861 }
862 if dep_contains("next") {
863 patterns.push(Pattern {
864 name: "Next.js".to_string(),
865 category: "framework".to_string(),
866 confidence: 95,
867 evidence: "next dependency".to_string(),
868 });
869 }
870 if dep_contains("express") {
871 patterns.push(Pattern {
872 name: "Express".to_string(),
873 category: "framework".to_string(),
874 confidence: 92,
875 evidence: "express dependency".to_string(),
876 });
877 }
878 if dep_contains("fastify") {
879 patterns.push(Pattern {
880 name: "Fastify".to_string(),
881 category: "framework".to_string(),
882 confidence: 92,
883 evidence: "fastify dependency".to_string(),
884 });
885 }
886 }
887
888 if has_ext(".py") {
889 if dep_contains("django") {
890 patterns.push(Pattern {
891 name: "Django".to_string(),
892 category: "framework".to_string(),
893 confidence: 93,
894 evidence: "django dependency".to_string(),
895 });
896 }
897 if dep_contains("flask") {
898 patterns.push(Pattern {
899 name: "Flask".to_string(),
900 category: "framework".to_string(),
901 confidence: 93,
902 evidence: "flask dependency".to_string(),
903 });
904 }
905 if dep_contains("fastapi") {
906 patterns.push(Pattern {
907 name: "FastAPI".to_string(),
908 category: "framework".to_string(),
909 confidence: 93,
910 evidence: "fastapi dependency".to_string(),
911 });
912 }
913 }
914
915 if file_names.contains("Cargo.toml") {
919 let cargo_content =
920 fs::read_to_string(root.join("Cargo.toml")).unwrap_or_default();
921 if cargo_content.contains("[workspace]") {
922 patterns.push(Pattern {
923 name: "Rust workspace (monorepo)".to_string(),
924 category: "architecture".to_string(),
925 confidence: 95,
926 evidence: "[workspace] in Cargo.toml".to_string(),
927 });
928 }
929 }
930
931 if has_dir("src") {
933 patterns.push(Pattern {
934 name: "Standard src/ layout".to_string(),
935 category: "architecture".to_string(),
936 confidence: 90,
937 evidence: "src/ directory present".to_string(),
938 });
939 }
940
941 if root.join("src/lib.rs").exists() && root.join("src/main.rs").exists() {
943 patterns.push(Pattern {
944 name: "Lib+Binary Rust crate".to_string(),
945 category: "architecture".to_string(),
946 confidence: 90,
947 evidence: "Both lib.rs and main.rs in src/".to_string(),
948 });
949 }
950
951 let mod_dirs: Vec<&FileEntry> = files
953 .iter()
954 .filter(|f| {
955 f.ext == ".rs"
956 && f.rel_path.starts_with("src/")
957 && f.rel_path.ends_with("/mod.rs")
958 })
959 .collect();
960 if mod_dirs.len() >= 3 {
961 patterns.push(Pattern {
962 name: "Multi-module Rust project".to_string(),
963 category: "architecture".to_string(),
964 confidence: 85,
965 evidence: format!("{} mod.rs modules found", mod_dirs.len()),
966 });
967 }
968
969 if has_dir("controllers") && has_dir("models") && has_dir("views") {
971 patterns.push(Pattern {
972 name: "MVC architecture".to_string(),
973 category: "architecture".to_string(),
974 confidence: 88,
975 evidence: "Has controllers/, models/, views/ directories".to_string(),
976 });
977 }
978
979 if has_dir(".github") {
982 patterns.push(Pattern {
983 name: "GitHub Actions CI".to_string(),
984 category: "tooling".to_string(),
985 confidence: 95,
986 evidence: ".github/ directory present".to_string(),
987 });
988 }
989
990 if root.join("Dockerfile").exists() {
991 patterns.push(Pattern {
992 name: "Dockerized".to_string(),
993 category: "tooling".to_string(),
994 confidence: 95,
995 evidence: "Dockerfile found".to_string(),
996 });
997 }
998
999 if file_names.contains("Makefile") {
1000 patterns.push(Pattern {
1001 name: "Make-based build".to_string(),
1002 category: "tooling".to_string(),
1003 confidence: 90,
1004 evidence: "Makefile found".to_string(),
1005 });
1006 }
1007
1008 if has_dir("tests") || has_dir("test") {
1011 patterns.push(Pattern {
1012 name: "Has dedicated test directory".to_string(),
1013 category: "convention".to_string(),
1014 confidence: 95,
1015 evidence: "tests/ or test/ directory present".to_string(),
1016 });
1017 }
1018
1019 if has_dir("docs") {
1020 patterns.push(Pattern {
1021 name: "Has docs/ directory".to_string(),
1022 category: "convention".to_string(),
1023 confidence: 90,
1024 evidence: "docs/ directory present".to_string(),
1025 });
1026 }
1027
1028 if file_names.contains("CLIP.md")
1029 || file_names.contains("AGENTS.md")
1030 || file_names.contains("CLAUDE.md")
1031 {
1032 patterns.push(Pattern {
1033 name: "AI agent conventions".to_string(),
1034 category: "convention".to_string(),
1035 confidence: 92,
1036 evidence: "Agent config file (AGENTS.md/CLAUDE.md/CLIP.md)".to_string(),
1037 });
1038 }
1039
1040 let large_files: Vec<&FileEntry> = files.iter().filter(|f| f.size > 100_000).collect();
1044 if large_files.len() > 5 {
1045 patterns.push(Pattern {
1046 name: "Large files (>100KB)".to_string(),
1047 category: "anti-pattern".to_string(),
1048 confidence: 80,
1049 evidence: format!(
1050 "{} files exceed 100KB — largest: {}",
1051 large_files.len(),
1052 large_files
1053 .iter()
1054 .max_by_key(|f| f.size)
1055 .map(|f| format!("{} ({}KB)", f.rel_path, f.size / 1024))
1056 .unwrap_or_default()
1057 ),
1058 });
1059 }
1060
1061 let mixed_indent = self.detect_mixed_indentation(root, files);
1063 if mixed_indent > 0 {
1064 patterns.push(Pattern {
1065 name: "Mixed indentation".to_string(),
1066 category: "anti-pattern".to_string(),
1067 confidence: 70,
1068 evidence: format!(
1069 "{} file(s) mix tabs and spaces for indentation",
1070 mixed_indent
1071 ),
1072 });
1073 }
1074
1075 patterns.sort_by(|a, b| {
1077 a.category
1078 .cmp(&b.category)
1079 .then_with(|| b.confidence.cmp(&a.confidence))
1080 });
1081
1082 patterns
1083 }
1084
1085 fn detect_mixed_indentation(&self, root: &Path, files: &[FileEntry]) -> usize {
1087 let source_exts = [".rs", ".ts", ".js", ".py", ".go", ".java", ".tsx", ".jsx"];
1088 let mut count = 0usize;
1089 let mut sampled = 0usize;
1090 let max_sample = 20;
1091
1092 for file in files {
1093 if sampled >= max_sample {
1094 break;
1095 }
1096 if !source_exts.contains(&file.ext.as_str()) {
1097 continue;
1098 }
1099
1100 let path = root.join(&file.rel_path);
1101 if let Ok(content) = fs::read_to_string(&path) {
1102 sampled += 1;
1103 let has_tabs = content.lines().any(|l| l.starts_with('\t'));
1104 let has_spaces = content
1105 .lines()
1106 .any(|l| l.starts_with(" ") || l.starts_with(" "));
1107 if has_tabs && has_spaces {
1108 count += 1;
1109 }
1110 }
1111 }
1112
1113 count
1114 }
1115
1116 pub fn render_compact(snapshot: &CodebaseSnapshot) -> String {
1121 let mut out = String::with_capacity(4096);
1122
1123 out.push_str("╔══ Codebase Snapshot ══════════════════════╗\n");
1124 out.push_str(&format!("║ Root: {}\n", snapshot.root));
1125 out.push_str(&format!(
1126 "║ Files: {} | Size: {} | Scan: {}ms\n",
1127 snapshot.total_files,
1128 format_bytes(snapshot.total_bytes),
1129 snapshot.scan_ms
1130 ));
1131 out.push_str("╚═══════════════════════════════════════════╝\n\n");
1132
1133 if !snapshot.languages.is_empty() {
1135 out.push_str("## Languages\n\n");
1136 out.push_str("| Language | Files | Size |\n");
1137 out.push_str("|----------|-------|------|\n");
1138 for lang in &snapshot.languages {
1139 out.push_str(&format!(
1140 "| {} | {} | {} |\n",
1141 lang.language,
1142 lang.file_count,
1143 format_bytes(lang.total_bytes)
1144 ));
1145 }
1146 out.push('\n');
1147 }
1148
1149 if !snapshot.patterns.is_empty() {
1151 out.push_str("## Detected Patterns\n\n");
1152 for pattern in &snapshot.patterns {
1153 let conf = if pattern.confidence >= 90 {
1154 "●"
1155 } else if pattern.confidence >= 70 {
1156 "◐"
1157 } else {
1158 "○"
1159 };
1160 out.push_str(&format!(
1161 "- {} **{}** [{}] — {}\n",
1162 conf, pattern.name, pattern.category, pattern.evidence
1163 ));
1164 }
1165 out.push('\n');
1166 }
1167
1168 if !snapshot.key_files.is_empty() {
1170 out.push_str("## Key Files\n\n");
1171 for kf in &snapshot.key_files {
1172 if let Some(ref summary) = kf.summary {
1173 out.push_str(&format!(
1174 "- `{}` [{}] — {}\n",
1175 kf.path, kf.role, summary
1176 ));
1177 } else {
1178 out.push_str(&format!("- `{}` [{}]\n", kf.path, kf.role));
1179 }
1180 }
1181 out.push('\n');
1182 }
1183
1184 if !snapshot.dependencies.is_empty() {
1186 let display_count = 15;
1187 out.push_str(&format!(
1188 "## Dependencies ({} total)\n\n",
1189 snapshot.dependencies.len()
1190 ));
1191 for dep in snapshot.dependencies.iter().take(display_count) {
1192 out.push_str(&format!("- {}\n", dep));
1193 }
1194 if snapshot.dependencies.len() > display_count {
1195 out.push_str(&format!(
1196 "- … and {} more\n",
1197 snapshot.dependencies.len() - display_count
1198 ));
1199 }
1200 out.push('\n');
1201 }
1202
1203 if !snapshot.tree.is_empty() {
1205 out.push_str("## Directory Tree (top)\n\n");
1206 out.push_str("```\n");
1207 for node in &snapshot.tree {
1208 let indent = node.path.matches('/').count();
1209 let prefix = " ".repeat(indent);
1210 let name = node.path.rsplit('/').next().unwrap_or(&node.path);
1211 if node.is_dir {
1212 out.push_str(&format!("{}{}/\n", prefix, name));
1213 } else {
1214 out.push_str(&format!(
1215 "{}{} {}\n",
1216 prefix,
1217 name,
1218 if node.size > 0 {
1219 format!("({})", format_bytes(node.size))
1220 } else {
1221 String::new()
1222 }
1223 ));
1224 }
1225 }
1226 out.push_str("```\n");
1227 }
1228
1229 out
1230 }
1231
1232 pub fn render_json(snapshot: &CodebaseSnapshot) -> Result<String> {
1234 Ok(serde_json::to_string_pretty(snapshot)?)
1235 }
1236
1237 pub fn render_markdown(snapshot: &CodebaseSnapshot) -> String {
1239 let mut md = String::with_capacity(4096);
1240
1241 md.push_str("# Codebase Scout Report\n\n");
1242 md.push_str(&format!(
1243 "> Scanned: {} | Files: {} | Size: {} | Duration: {}ms\n\n",
1244 snapshot.scanned_at,
1245 snapshot.total_files,
1246 format_bytes(snapshot.total_bytes),
1247 snapshot.scan_ms,
1248 ));
1249
1250 md.push_str("## Overview\n\n");
1252 md.push_str(&format!("- **Root:** `{}`\n", snapshot.root));
1253 md.push_str(&format!("- **Total files:** {}\n", snapshot.total_files));
1254 md.push_str(&format!(
1255 "- **Total size:** {}\n",
1256 format_bytes(snapshot.total_bytes)
1257 ));
1258
1259 if let Some(primary_lang) = snapshot.languages.first() {
1260 md.push_str(&format!(
1261 "- **Primary language:** {} ({} files)\n",
1262 primary_lang.language, primary_lang.file_count
1263 ));
1264 }
1265 md.push('\n');
1266
1267 if !snapshot.languages.is_empty() {
1269 md.push_str("## Language Breakdown\n\n");
1270 md.push_str("| Language | Files | Size | Extensions |\n");
1271 md.push_str("|----------|-------|------|------------|\n");
1272 for lang in &snapshot.languages {
1273 let exts = lang
1274 .extensions
1275 .iter()
1276 .cloned()
1277 .collect::<Vec<_>>()
1278 .join(", ");
1279 md.push_str(&format!(
1280 "| {} | {} | {} | {} |\n",
1281 lang.language,
1282 lang.file_count,
1283 format_bytes(lang.total_bytes),
1284 exts
1285 ));
1286 }
1287 md.push('\n');
1288 }
1289
1290 if !snapshot.patterns.is_empty() {
1292 md.push_str("## Detected Patterns\n\n");
1293 let mut current_category = String::new();
1294 for pattern in &snapshot.patterns {
1295 if pattern.category != current_category {
1296 current_category = pattern.category.clone();
1297 md.push_str(&format!("### {}s\n\n", capitalize(¤t_category)));
1298 }
1299 md.push_str(&format!(
1300 "- **{}** ({}% confidence) — {}\n",
1301 pattern.name, pattern.confidence, pattern.evidence
1302 ));
1303 }
1304 md.push('\n');
1305 }
1306
1307 if !snapshot.key_files.is_empty() {
1309 md.push_str("## Key Files\n\n");
1310 md.push_str("| Path | Role | Summary |\n");
1311 md.push_str("|------|------|--------|\n");
1312 for kf in &snapshot.key_files {
1313 let summary = kf.summary.as_deref().unwrap_or("—");
1314 md.push_str(&format!(
1315 "| `{}` | {} | {} |\n",
1316 kf.path, kf.role, summary
1317 ));
1318 }
1319 md.push('\n');
1320 }
1321
1322 if !snapshot.dependencies.is_empty() {
1324 md.push_str(&format!(
1325 "## Dependencies ({})\n\n",
1326 snapshot.dependencies.len()
1327 ));
1328 for dep in &snapshot.dependencies {
1329 md.push_str(&format!("- {}\n", dep));
1330 }
1331 md.push('\n');
1332 }
1333
1334 if !snapshot.tree.is_empty() {
1336 md.push_str("## Directory Structure\n\n");
1337 md.push_str("```\n");
1338 for node in &snapshot.tree {
1339 let depth = node.path.matches('/').count();
1340 let indent = " ".repeat(depth);
1341 let name = node.path.rsplit('/').next().unwrap_or(&node.path);
1342 if node.is_dir {
1343 md.push_str(&format!("{}{}/\n", indent, name));
1344 } else {
1345 md.push_str(&format!(
1346 "{}{} {}\n",
1347 indent,
1348 name,
1349 if node.size > 0 {
1350 format!("({})", format_bytes(node.size))
1351 } else {
1352 String::new()
1353 }
1354 ));
1355 }
1356 }
1357 md.push_str("```\n");
1358 }
1359
1360 md
1361 }
1362}
1363
1364impl Default for Scout {
1365 fn default() -> Self {
1366 Self::new(std::env::current_dir().unwrap_or_default())
1367 }
1368}
1369
1370impl fmt::Debug for Scout {
1371 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1372 f.debug_struct("Scout")
1373 .field("root", &self.config.root)
1374 .finish()
1375 }
1376}
1377
1378#[derive(Debug)]
1381struct FileEntry {
1382 rel_path: String,
1383 name: String,
1384 ext: String,
1385 size: u64,
1386}
1387
1388fn format_bytes(bytes: u64) -> String {
1391 const KB: u64 = 1024;
1392 const MB: u64 = 1024 * KB;
1393 const GB: u64 = 1024 * MB;
1394
1395 if bytes >= GB {
1396 format!("{:.1} GB", bytes as f64 / GB as f64)
1397 } else if bytes >= MB {
1398 format!("{:.1} MB", bytes as f64 / MB as f64)
1399 } else if bytes >= KB {
1400 format!("{:.1} KB", bytes as f64 / KB as f64)
1401 } else {
1402 format!("{} B", bytes)
1403 }
1404}
1405
1406fn capitalize(s: &str) -> String {
1407 let mut c = s.chars();
1408 match c.next() {
1409 None => String::new(),
1410 Some(f) => f.to_uppercase().chain(c).collect(),
1411 }
1412}
1413
1414#[cfg(test)]
1417mod tests {
1418 use super::*;
1419 use std::fs;
1420
1421 #[test]
1422 fn test_scan_empty_dir() {
1423 let tmp = tempfile::tempdir().unwrap();
1424 let scout = Scout::new(tmp.path());
1425 let snapshot = scout.scan().unwrap();
1426
1427 assert_eq!(snapshot.total_files, 0);
1428 assert_eq!(snapshot.total_bytes, 0);
1429 assert!(snapshot.languages.is_empty());
1430 assert!(snapshot.patterns.is_empty());
1431 }
1432
1433 #[test]
1434 fn test_scan_rust_project() {
1435 let tmp = tempfile::tempdir().unwrap();
1436 let src = tmp.path().join("src");
1437 fs::create_dir_all(&src).unwrap();
1438
1439 fs::write(
1440 tmp.path().join("Cargo.toml"),
1441 r#"[package]
1442name = "test-project"
1443version = "0.1.0"
1444
1445[dependencies]
1446serde = { version = "1", features = ["derive"] }
1447tokio = "1"
1448anyhow = "1"
1449"#,
1450 )
1451 .unwrap();
1452 fs::write(src.join("main.rs"), "fn main() { println!(\"hello\"); }").unwrap();
1453 fs::write(
1454 src.join("lib.rs"),
1455 "pub fn add(a: i32, b: i32) -> i32 { a + b }",
1456 )
1457 .unwrap();
1458
1459 let scout = Scout::new(tmp.path());
1460 let snapshot = scout.scan().unwrap();
1461
1462 assert!(snapshot.total_files >= 3);
1463 assert!(snapshot.languages.iter().any(|l| l.language == "Rust"));
1464 assert!(snapshot
1465 .dependencies
1466 .iter()
1467 .any(|d| d.contains("serde")));
1468 assert!(snapshot.patterns.iter().any(|p| p.name == "Rust"));
1469 assert!(snapshot
1470 .patterns
1471 .iter()
1472 .any(|p| p.name == "Async Rust (Tokio)"));
1473 assert!(snapshot
1474 .patterns
1475 .iter()
1476 .any(|p| p.name == "Standard src/ layout"));
1477 }
1478
1479 #[test]
1480 fn test_scan_ts_project() {
1481 let tmp = tempfile::tempdir().unwrap();
1482 let src = tmp.path().join("src");
1483 fs::create_dir_all(&src).unwrap();
1484
1485 fs::write(
1486 tmp.path().join("package.json"),
1487 r#"{"dependencies": {"react": "^18.0.0", "next": "^14.0.0"}}"#,
1488 )
1489 .unwrap();
1490 fs::write(
1491 src.join("index.tsx"),
1492 "export default function App() { return <div/> }",
1493 )
1494 .unwrap();
1495
1496 let scout = Scout::new(tmp.path());
1497 let snapshot = scout.scan().unwrap();
1498
1499 assert!(snapshot
1500 .languages
1501 .iter()
1502 .any(|l| l.language == "TypeScript"));
1503 assert!(snapshot.patterns.iter().any(|p| p.name == "React"));
1504 assert!(snapshot.patterns.iter().any(|p| p.name == "Next.js"));
1505 }
1506
1507 #[test]
1508 fn test_scan_python_project() {
1509 let tmp = tempfile::tempdir().unwrap();
1510 fs::write(
1511 tmp.path().join("pyproject.toml"),
1512 r#"[tool.poetry]
1513name = "test-project"
1514
1515[tool.poetry.dependencies]
1516python = "^3.11"
1517flask = "^3.0"
1518requests = "^2.31"
1519"#,
1520 )
1521 .unwrap();
1522 fs::write(
1523 tmp.path().join("main.py"),
1524 "from flask import Flask\napp = Flask(__name__)\n",
1525 )
1526 .unwrap();
1527
1528 let scout = Scout::new(tmp.path());
1529 let snapshot = scout.scan().unwrap();
1530
1531 assert!(snapshot.languages.iter().any(|l| l.language == "Python"));
1532 assert!(snapshot.patterns.iter().any(|p| p.name == "Flask"));
1533 assert!(snapshot
1534 .dependencies
1535 .iter()
1536 .any(|d| d.contains("flask")));
1537 }
1538
1539 #[test]
1540 fn test_scan_go_project() {
1541 let tmp = tempfile::tempdir().unwrap();
1542 fs::write(
1543 tmp.path().join("go.mod"),
1544 "module example.com/test\n\ngo 1.22\n\nrequire (\n\tgithub.com/gin-gonic/gin v1.9.1\n)\n",
1545 )
1546 .unwrap();
1547 fs::write(tmp.path().join("main.go"), "package main\n\nfunc main() {}\n").unwrap();
1548
1549 let scout = Scout::new(tmp.path());
1550 let snapshot = scout.scan().unwrap();
1551
1552 assert!(snapshot.languages.iter().any(|l| l.language == "Go"));
1553 assert!(snapshot.dependencies.iter().any(|d| d.contains("gin")));
1554 }
1555
1556 #[test]
1557 fn test_scan_ignores_dirs() {
1558 let tmp = tempfile::tempdir().unwrap();
1559 fs::create_dir_all(tmp.path().join(".git/objects")).unwrap();
1560 fs::create_dir_all(tmp.path().join("target/debug")).unwrap();
1561 fs::create_dir_all(tmp.path().join("node_modules/react")).unwrap();
1562 fs::write(tmp.path().join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap();
1563 fs::create_dir_all(tmp.path().join("src")).unwrap();
1564 fs::write(tmp.path().join("src/main.rs"), "fn main() {}").unwrap();
1565
1566 let scout = Scout::new(tmp.path());
1567 let snapshot = scout.scan().unwrap();
1568
1569 for node in &snapshot.tree {
1570 assert!(
1571 !node.path.starts_with(".git/"),
1572 "Should skip .git: {}",
1573 node.path
1574 );
1575 assert!(
1576 !node.path.starts_with("target/"),
1577 "Should skip target: {}",
1578 node.path
1579 );
1580 assert!(
1581 !node.path.starts_with("node_modules/"),
1582 "Should skip node_modules: {}",
1583 node.path
1584 );
1585 }
1586 }
1587
1588 #[test]
1589 fn test_scan_respects_depth_limit() {
1590 let tmp = tempfile::tempdir().unwrap();
1591 let deep = tmp.path().join("a/b/c/d/e/f");
1592 fs::create_dir_all(&deep).unwrap();
1593 fs::write(deep.join("deep.txt"), "content").unwrap();
1594 fs::write(tmp.path().join("shallow.txt"), "content").unwrap();
1595
1596 let config = ScoutConfig {
1597 root: tmp.path().to_path_buf(),
1598 max_depth: 3,
1599 ..Default::default()
1600 };
1601 let scout = Scout::with_config(config);
1602 let snapshot = scout.scan().unwrap();
1603
1604 assert!(snapshot.tree.iter().any(|n| n.path == "shallow.txt"));
1605 assert!(!snapshot.tree.iter().any(|n| n.path.contains("deep.txt")));
1606 }
1607
1608 #[test]
1609 fn test_scan_nonexistent_dir() {
1610 let scout = Scout::new("/nonexistent/path/that/does/not/exist");
1611 assert!(scout.scan().is_err());
1612 }
1613
1614 #[test]
1615 fn test_scan_file_as_root() {
1616 let tmp = tempfile::tempdir().unwrap();
1617 let file_path = tmp.path().join("not_a_dir.txt");
1618 fs::write(&file_path, "content").unwrap();
1619
1620 let scout = Scout::new(&file_path);
1621 assert!(scout.scan().is_err());
1622 }
1623
1624 #[test]
1625 fn test_render_compact_not_empty() {
1626 let tmp = tempfile::tempdir().unwrap();
1627 let src = tmp.path().join("src");
1628 fs::create_dir_all(&src).unwrap();
1629 fs::write(tmp.path().join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap();
1630 fs::write(src.join("main.rs"), "fn main() {}").unwrap();
1631
1632 let scout = Scout::new(tmp.path());
1633 let snapshot = scout.scan().unwrap();
1634 let compact = Scout::render_compact(&snapshot);
1635
1636 assert!(compact.contains("Codebase Snapshot"));
1637 assert!(compact.contains("Rust"));
1638 assert!(compact.contains("Cargo.toml"));
1639 }
1640
1641 #[test]
1642 fn test_render_markdown_not_empty() {
1643 let tmp = tempfile::tempdir().unwrap();
1644 let src = tmp.path().join("src");
1645 fs::create_dir_all(&src).unwrap();
1646 fs::write(tmp.path().join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap();
1647 fs::write(src.join("main.rs"), "fn main() {}").unwrap();
1648
1649 let scout = Scout::new(tmp.path());
1650 let snapshot = scout.scan().unwrap();
1651 let md = Scout::render_markdown(&snapshot);
1652
1653 assert!(md.contains("# Codebase Scout Report"));
1654 assert!(md.contains("## Language Breakdown"));
1655 assert!(md.contains("Rust"));
1656 }
1657
1658 #[test]
1659 fn test_render_json_roundtrip() {
1660 let tmp = tempfile::tempdir().unwrap();
1661 fs::write(tmp.path().join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap();
1662
1663 let scout = Scout::new(tmp.path());
1664 let snapshot = scout.scan().unwrap();
1665 let json = Scout::render_json(&snapshot).unwrap();
1666
1667 let parsed: CodebaseSnapshot = serde_json::from_str(&json).unwrap();
1668 assert_eq!(parsed.root, snapshot.root);
1669 assert_eq!(parsed.total_files, snapshot.total_files);
1670 }
1671
1672 #[test]
1673 fn test_key_file_summary_extraction() {
1674 let tmp = tempfile::tempdir().unwrap();
1675 fs::write(
1676 tmp.path().join("Cargo.toml"),
1677 "[package]\nname = \"my-cool-project\"\nversion = \"1.0.0\"\n",
1678 )
1679 .unwrap();
1680
1681 let scout = Scout::new(tmp.path());
1682 let snapshot = scout.scan().unwrap();
1683
1684 let cargo = snapshot
1685 .key_files
1686 .iter()
1687 .find(|kf| kf.path == "Cargo.toml")
1688 .unwrap();
1689 assert_eq!(cargo.role, "config");
1690 assert!(cargo.summary.is_some());
1691 }
1692
1693 #[test]
1694 fn test_workspace_detection() {
1695 let tmp = tempfile::tempdir().unwrap();
1696 fs::write(
1697 tmp.path().join("Cargo.toml"),
1698 "[workspace]\nmembers = [\"crates/*\"]\n",
1699 )
1700 .unwrap();
1701
1702 let scout = Scout::new(tmp.path());
1703 let snapshot = scout.scan().unwrap();
1704
1705 assert!(snapshot
1706 .patterns
1707 .iter()
1708 .any(|p| p.name == "Rust workspace (monorepo)"));
1709 }
1710
1711 #[test]
1712 fn test_lib_binary_detection() {
1713 let tmp = tempfile::tempdir().unwrap();
1714 let src = tmp.path().join("src");
1715 fs::create_dir_all(&src).unwrap();
1716 fs::write(tmp.path().join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap();
1717 fs::write(src.join("lib.rs"), "pub fn foo() {}").unwrap();
1718 fs::write(src.join("main.rs"), "fn main() {}").unwrap();
1719
1720 let scout = Scout::new(tmp.path());
1721 let snapshot = scout.scan().unwrap();
1722
1723 assert!(snapshot
1724 .patterns
1725 .iter()
1726 .any(|p| p.name == "Lib+Binary Rust crate"));
1727 }
1728
1729 #[test]
1730 fn test_ci_detection() {
1731 let tmp = tempfile::tempdir().unwrap();
1732 let workflows = tmp.path().join(".github/workflows");
1733 fs::create_dir_all(&workflows).unwrap();
1734 fs::write(workflows.join("ci.yml"), "name: CI\non: push\n").unwrap();
1735 fs::write(tmp.path().join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap();
1736
1737 let scout = Scout::new(tmp.path());
1738 let snapshot = scout.scan().unwrap();
1739
1740 assert!(snapshot
1741 .patterns
1742 .iter()
1743 .any(|p| p.name == "GitHub Actions CI"));
1744 assert!(snapshot
1745 .key_files
1746 .iter()
1747 .any(|kf| kf.role == "ci" && kf.path.contains("workflows")));
1748 }
1749
1750 #[test]
1751 fn test_docker_detection() {
1752 let tmp = tempfile::tempdir().unwrap();
1753 fs::write(tmp.path().join("Dockerfile"), "FROM rust:1.75\n").unwrap();
1754 fs::write(tmp.path().join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap();
1755
1756 let scout = Scout::new(tmp.path());
1757 let snapshot = scout.scan().unwrap();
1758
1759 assert!(snapshot.patterns.iter().any(|p| p.name == "Dockerized"));
1760 }
1761
1762 #[test]
1763 fn test_mvc_detection() {
1764 let tmp = tempfile::tempdir().unwrap();
1765 for dir in &["controllers", "models", "views", "src"] {
1766 fs::create_dir_all(tmp.path().join(dir)).unwrap();
1767 }
1768 fs::write(
1769 tmp.path().join("package.json"),
1770 r#"{"dependencies": {"express": "^4.0.0"}}"#,
1771 )
1772 .unwrap();
1773
1774 let scout = Scout::new(tmp.path());
1775 let snapshot = scout.scan().unwrap();
1776
1777 assert!(snapshot
1778 .patterns
1779 .iter()
1780 .any(|p| p.name == "MVC architecture"));
1781 }
1782
1783 #[test]
1784 fn test_format_bytes() {
1785 assert_eq!(format_bytes(0), "0 B");
1786 assert_eq!(format_bytes(512), "512 B");
1787 assert_eq!(format_bytes(1024), "1.0 KB");
1788 assert_eq!(format_bytes(1536), "1.5 KB");
1789 assert_eq!(format_bytes(1048576), "1.0 MB");
1790 assert_eq!(format_bytes(1073741824), "1.0 GB");
1791 }
1792
1793 #[test]
1794 fn test_capitalize() {
1795 assert_eq!(capitalize("language"), "Language");
1796 assert_eq!(capitalize("framework"), "Framework");
1797 assert_eq!(capitalize(""), "");
1798 }
1799
1800 #[test]
1801 fn test_config_default() {
1802 let config = ScoutConfig::default();
1803 assert_eq!(config.max_depth, 6);
1804 assert!(config.ignore.contains(&".git".to_string()));
1805 assert!(config.ignore.contains(&"node_modules".to_string()));
1806 }
1807
1808 #[test]
1809 fn test_config_serde_roundtrip() {
1810 let config = ScoutConfig {
1811 root: PathBuf::from("/tmp/project"),
1812 max_depth: 4,
1813 max_sample_bytes: 256 * 1024,
1814 max_tree_files: 100,
1815 ignore: vec![".git".into()],
1816 };
1817
1818 let json = serde_json::to_string(&config).unwrap();
1819 let parsed: ScoutConfig = serde_json::from_str(&json).unwrap();
1820 assert_eq!(parsed.root, config.root);
1821 assert_eq!(parsed.max_depth, 4);
1822 assert_eq!(parsed.max_tree_files, 100);
1823 }
1824
1825 #[test]
1826 fn test_snapshot_serde_roundtrip() {
1827 let tmp = tempfile::tempdir().unwrap();
1828 let src = tmp.path().join("src");
1829 fs::create_dir_all(&src).unwrap();
1830 fs::write(tmp.path().join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap();
1831 fs::write(src.join("main.rs"), "fn main() {}").unwrap();
1832
1833 let scout = Scout::new(tmp.path());
1834 let snapshot = scout.scan().unwrap();
1835
1836 let json = serde_json::to_string_pretty(&snapshot).unwrap();
1837 let parsed: CodebaseSnapshot = serde_json::from_str(&json).unwrap();
1838 assert_eq!(parsed.root, snapshot.root);
1839 assert_eq!(parsed.total_files, snapshot.total_files);
1840 assert_eq!(parsed.languages.len(), snapshot.languages.len());
1841 assert_eq!(parsed.patterns.len(), snapshot.patterns.len());
1842 }
1843
1844 #[test]
1845 fn test_scan_cargo_workspace_with_members() {
1846 let tmp = tempfile::tempdir().unwrap();
1847 let crates_dir = tmp.path().join("crates");
1848 let crate_a = crates_dir.join("crate-a");
1849 let crate_b = crates_dir.join("crate-b");
1850 fs::create_dir_all(crate_a.join("src")).unwrap();
1851 fs::create_dir_all(crate_b.join("src")).unwrap();
1852
1853 fs::write(
1854 tmp.path().join("Cargo.toml"),
1855 "[workspace]\nmembers = [\"crates/*\"]\n",
1856 )
1857 .unwrap();
1858 fs::write(
1859 crate_a.join("Cargo.toml"),
1860 "[package]\nname = \"crate-a\"\nversion = \"0.1.0\"\n\n[dependencies]\nserde = \"1\"\n",
1861 )
1862 .unwrap();
1863 fs::write(crate_a.join("src/lib.rs"), "pub fn a() {}").unwrap();
1864 fs::write(
1865 crate_b.join("Cargo.toml"),
1866 "[package]\nname = \"crate-b\"\nversion = \"0.1.0\"\n\n[dependencies]\ntokio = \"1\"\n",
1867 )
1868 .unwrap();
1869 fs::write(crate_b.join("src/lib.rs"), "pub fn b() {}").unwrap();
1870
1871 let scout = Scout::new(tmp.path());
1872 let snapshot = scout.scan().unwrap();
1873
1874 assert!(snapshot.total_files >= 5);
1875 assert!(snapshot
1876 .dependencies
1877 .iter()
1878 .any(|d| d.contains("serde")));
1879 assert!(snapshot
1880 .dependencies
1881 .iter()
1882 .any(|d| d.contains("tokio")));
1883 assert!(snapshot
1884 .patterns
1885 .iter()
1886 .any(|p| p.name == "Rust workspace (monorepo)"));
1887 assert!(snapshot
1888 .patterns
1889 .iter()
1890 .any(|p| p.name == "Async Rust (Tokio)"));
1891 }
1892
1893 #[test]
1894 fn test_agent_conventions_detection() {
1895 let tmp = tempfile::tempdir().unwrap();
1896 fs::write(
1897 tmp.path().join("AGENTS.md"),
1898 "# Agent Conventions\nUse Rust.\n",
1899 )
1900 .unwrap();
1901 fs::write(tmp.path().join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap();
1902
1903 let scout = Scout::new(tmp.path());
1904 let snapshot = scout.scan().unwrap();
1905
1906 assert!(snapshot
1907 .patterns
1908 .iter()
1909 .any(|p| p.name == "AI agent conventions"));
1910 }
1911
1912 #[test]
1913 fn test_multi_language_project() {
1914 let tmp = tempfile::tempdir().unwrap();
1915 let src = tmp.path().join("src");
1916 let frontend = tmp.path().join("frontend/src");
1917 let scripts = tmp.path().join("scripts");
1918 fs::create_dir_all(&src).unwrap();
1919 fs::create_dir_all(&frontend).unwrap();
1920 fs::create_dir_all(&scripts).unwrap();
1921
1922 fs::write(tmp.path().join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap();
1923 fs::write(src.join("main.rs"), "fn main() {}").unwrap();
1924 fs::write(
1925 frontend.join("App.tsx"),
1926 "export default function App() {}",
1927 )
1928 .unwrap();
1929 fs::write(
1930 scripts.join("build.py"),
1931 "#!/usr/bin/env python3\nprint('hi')\n",
1932 )
1933 .unwrap();
1934
1935 let scout = Scout::new(tmp.path());
1936 let snapshot = scout.scan().unwrap();
1937
1938 let lang_names: Vec<&str> = snapshot
1939 .languages
1940 .iter()
1941 .map(|l| l.language.as_str())
1942 .collect();
1943 assert!(lang_names.contains(&"Rust"));
1944 assert!(lang_names.contains(&"TypeScript"));
1945 assert!(lang_names.contains(&"Python"));
1946 }
1947}