Skip to main content

oxi/skills/
scout.rs

1//! Scout skill for oxi — fast codebase reconnaissance
2//!
3//! Produces a compressed snapshot of a codebase optimized for handoff to another
4//! agent or for quick orientation. Three core capabilities:
5//!
6//! 1. **Fast codebase mapping** — directory tree, file counts, size metrics
7//! 2. **Compressed context for handoff** — a [`CodebaseSnapshot`] that captures
8//!    structure, key files, dependency graph, and detected patterns in a compact
9//!    form designed to fit within context windows.
10//! 3. **Pattern detection** — identifies architectural patterns, frameworks,
11//!    languages, conventions, and anti-patterns.
12//!
13//! Usage:
14//! ```ignore
15//! use oxi::skills::scout::Scout;
16//!
17//! let scout = Scout::new("/path/to/project");
18//! let snapshot = scout.scan()?;
19//! println!("{}", Scout::render_compact(&snapshot));
20//! ```
21
22use anyhow::Result;
23use serde::{Deserialize, Serialize};
24use std::collections::{BTreeMap, BTreeSet};
25use std::fmt;
26use std::fs;
27use std::path::{Path, PathBuf};
28
29// ── Public types ─────────────────────────────────────────────────────
30
31/// Configuration for a scout scan.
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct ScoutConfig {
34    /// Root directory to scan.
35    pub root: PathBuf,
36
37    /// Maximum directory depth to traverse (default: 6).
38    #[serde(default = "default_max_depth")]
39    pub max_depth: usize,
40
41    /// Maximum total bytes of file content to sample for pattern detection
42    /// (default: 512 KiB).
43    #[serde(default = "default_max_sample_bytes")]
44    pub max_sample_bytes: usize,
45
46    /// Maximum number of files to include in the tree summary (default: 200).
47    #[serde(default = "default_max_tree_files")]
48    pub max_tree_files: usize,
49
50    /// Directory names to ignore (case-insensitive match).
51    #[serde(default = "default_ignores")]
52    pub ignore: Vec<String>,
53}
54
55fn default_max_depth() -> usize {
56    6
57}
58fn default_max_sample_bytes() -> usize {
59    512 * 1024
60}
61fn default_max_tree_files() -> usize {
62    200
63}
64fn default_ignores() -> Vec<String> {
65    [
66        ".git".into(),
67        "node_modules".into(),
68        "target".into(),
69        "dist".into(),
70        "build".into(),
71        "__pycache__".into(),
72        ".next".into(),
73        "vendor".into(),
74        "coverage".into(),
75        ".cache".into(),
76        ".turbo".into(),
77        "bazel-bin".into(),
78        "bazel-out".into(),
79        ".dart_tool".into(),
80        ".gradle".into(),
81    ]
82    .to_vec()
83}
84
85impl Default for ScoutConfig {
86    fn default() -> Self {
87        Self {
88            root: std::env::current_dir().unwrap_or_default(),
89            max_depth: default_max_depth(),
90            max_sample_bytes: default_max_sample_bytes(),
91            max_tree_files: default_max_tree_files(),
92            ignore: default_ignores(),
93        }
94    }
95}
96
97/// A single detected pattern.
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct Pattern {
100    /// Human-readable name (e.g. "Rust project (Cargo)").
101    pub name: String,
102    /// Category: "language", "framework", "tooling", "architecture", "convention", "anti-pattern".
103    pub category: String,
104    /// Confidence: 0-100.
105    pub confidence: u8,
106    /// Short evidence for why this pattern was detected.
107    pub evidence: String,
108}
109
110/// Per-language stats.
111#[derive(Debug, Clone, Serialize, Deserialize)]
112pub struct LanguageStats {
113    /// Language name (e.g. "Rust").
114    pub language: String,
115    /// Number of files.
116    pub file_count: usize,
117    /// Total bytes across files.
118    pub total_bytes: u64,
119    /// File extensions that contributed.
120    pub extensions: BTreeSet<String>,
121}
122
123/// A node in the directory tree summary.
124#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct TreeNode {
126    /// Relative path from root.
127    pub path: String,
128    /// File extension (empty for dirs).
129    pub ext: String,
130    /// Size in bytes (0 for dirs).
131    pub size: u64,
132    /// True if this is a directory.
133    pub is_dir: bool,
134    /// Number of direct children (files + dirs).
135    pub child_count: usize,
136}
137
138/// The compressed result of a scout scan — everything an agent needs for orientation.
139#[derive(Debug, Clone, Serialize, Deserialize)]
140pub struct CodebaseSnapshot {
141    /// Absolute path of the scanned root.
142    pub root: String,
143
144    /// Top-level directory tree (bounded by `max_tree_files`).
145    pub tree: Vec<TreeNode>,
146
147    /// Per-language statistics, sorted by file count descending.
148    pub languages: Vec<LanguageStats>,
149
150    /// Total file count (excluding ignored dirs).
151    pub total_files: usize,
152
153    /// Total bytes across all scanned files.
154    pub total_bytes: u64,
155
156    /// Detected patterns.
157    pub patterns: Vec<Pattern>,
158
159    /// Key files identified (config, entry points, important docs).
160    pub key_files: Vec<KeyFile>,
161
162    /// Dependency names extracted from config files.
163    pub dependencies: Vec<String>,
164
165    /// Generated at timestamp (ISO 8601).
166    pub scanned_at: String,
167
168    /// Scan duration in milliseconds.
169    pub scan_ms: u64,
170}
171
172/// A key file worth highlighting.
173#[derive(Debug, Clone, Serialize, Deserialize)]
174pub struct KeyFile {
175    /// Relative path from root.
176    pub path: String,
177    /// Role: "config", "entrypoint", "readme", "license", "ci", "test", "docs", "schema".
178    pub role: String,
179    /// First meaningful line (for quick orientation).
180    pub summary: Option<String>,
181}
182
183// ── Scout engine ─────────────────────────────────────────────────────
184
185/// The scout engine — performs a fast, read-only scan of a codebase.
186pub struct Scout {
187    config: ScoutConfig,
188}
189
190impl Scout {
191    /// Create a new scout for the given root directory.
192    pub fn new(root: impl Into<PathBuf>) -> Self {
193        Self {
194            config: ScoutConfig {
195                root: root.into(),
196                ..Default::default()
197            },
198        }
199    }
200
201    /// Create a scout with full configuration.
202    pub fn with_config(config: ScoutConfig) -> Self {
203        Self { config }
204    }
205
206    /// Run the scan and produce a [`CodebaseSnapshot`].
207    pub fn scan(&self) -> Result<CodebaseSnapshot> {
208        let root = &self.config.root;
209        if !root.exists() {
210            anyhow::bail!("Root directory does not exist: {}", root.display());
211        }
212        if !root.is_dir() {
213            anyhow::bail!("Root is not a directory: {}", root.display());
214        }
215
216        let start = std::time::Instant::now();
217
218        // Phase 1: Walk the tree and collect raw file info
219        let mut files: Vec<FileEntry> = Vec::new();
220        let mut tree: Vec<TreeNode> = Vec::new();
221        self.walk(root, root, 0, &mut files, &mut tree)?;
222
223        // Truncate tree to budget
224        tree.truncate(self.config.max_tree_files);
225
226        // Phase 2: Compute language stats
227        let languages = self.compute_language_stats(&files);
228
229        // Phase 3: Identify key files
230        let key_files = self.identify_key_files(&files, root);
231
232        // Phase 4: Extract dependencies
233        let dependencies = self.extract_dependencies(&files, root);
234
235        // Phase 5: Detect patterns
236        let patterns = self.detect_patterns(&files, &key_files, &dependencies, root);
237
238        let total_bytes: u64 = files.iter().map(|f| f.size).sum();
239        let scan_ms = start.elapsed().as_millis() as u64;
240
241        Ok(CodebaseSnapshot {
242            root: root.to_string_lossy().to_string(),
243            tree,
244            languages,
245            total_files: files.len(),
246            total_bytes,
247            patterns,
248            key_files,
249            dependencies,
250            scanned_at: chrono::Utc::now().to_rfc3339(),
251            scan_ms,
252        })
253    }
254
255    // ── Internal: walking ────────────────────────────────────────
256
257    fn walk(
258        &self,
259        root: &Path,
260        dir: &Path,
261        depth: usize,
262        files: &mut Vec<FileEntry>,
263        tree: &mut Vec<TreeNode>,
264    ) -> Result<()> {
265        if depth > self.config.max_depth {
266            return Ok(());
267        }
268
269        let entries = match fs::read_dir(dir) {
270            Ok(e) => e,
271            Err(_) => return Ok(()), // skip unreadable dirs
272        };
273
274        for entry in entries {
275            let entry = match entry {
276                Ok(e) => e,
277                Err(_) => continue,
278            };
279
280            let name = entry.file_name().to_string_lossy().to_string();
281            let path = entry.path();
282
283            // Skip ignored names
284            if self.should_ignore(&name) {
285                continue;
286            }
287
288            let rel = path
289                .strip_prefix(root)
290                .unwrap_or(&path)
291                .to_string_lossy()
292                .to_string();
293
294            if path.is_dir() {
295                tree.push(TreeNode {
296                    path: rel.clone(),
297                    ext: String::new(),
298                    size: 0,
299                    is_dir: true,
300                    child_count: 0,
301                });
302                self.walk(root, &path, depth + 1, files, tree)?;
303            } else {
304                let size = entry.metadata().map(|m| m.len()).unwrap_or(0);
305                let ext = Path::new(&name)
306                    .extension()
307                    .map(|e| format!(".{}", e.to_string_lossy()))
308                    .unwrap_or_default();
309
310                files.push(FileEntry {
311                    rel_path: rel.clone(),
312                    name,
313                    ext: ext.clone(),
314                    size,
315                });
316
317                if tree.len() < self.config.max_tree_files {
318                    tree.push(TreeNode {
319                        path: rel,
320                        ext,
321                        size,
322                        is_dir: false,
323                        child_count: 0,
324                    });
325                }
326            }
327        }
328
329        Ok(())
330    }
331
332    fn should_ignore(&self, name: &str) -> bool {
333        let name_lower = name.to_lowercase();
334        // Hidden files/dirs (but allow .env.example etc.)
335        if name_lower.starts_with('.')
336            && name_lower != ".env.example"
337            && name_lower != ".env.local.example"
338        {
339            return true;
340        }
341        // Configured ignores
342        for ignore in &self.config.ignore {
343            if name_lower == ignore.to_lowercase() {
344                return true;
345            }
346        }
347        false
348    }
349
350    // ── Internal: language stats ─────────────────────────────────
351
352    fn compute_language_stats(&self, files: &[FileEntry]) -> Vec<LanguageStats> {
353        let mut lang_map: BTreeMap<String, LanguageStats> = BTreeMap::new();
354
355        for file in files {
356            if let Some(lang) = self.ext_to_language(&file.ext) {
357                let stats = lang_map.entry(lang.to_string()).or_insert_with(|| {
358                    LanguageStats {
359                        language: lang.to_string(),
360                        file_count: 0,
361                        total_bytes: 0,
362                        extensions: BTreeSet::new(),
363                    }
364                });
365                stats.file_count += 1;
366                stats.total_bytes += file.size;
367                stats.extensions.insert(file.ext.clone());
368            }
369        }
370
371        let mut v: Vec<LanguageStats> = lang_map.into_values().collect();
372        v.sort_by(|a, b| b.file_count.cmp(&a.file_count));
373        v
374    }
375
376    fn ext_to_language(&self, ext: &str) -> Option<&'static str> {
377        match ext {
378            ".rs" => Some("Rust"),
379            ".ts" | ".tsx" => Some("TypeScript"),
380            ".js" | ".jsx" | ".mjs" | ".cjs" => Some("JavaScript"),
381            ".py" | ".pyi" => Some("Python"),
382            ".go" => Some("Go"),
383            ".java" => Some("Java"),
384            ".kt" | ".kts" => Some("Kotlin"),
385            ".rb" => Some("Ruby"),
386            ".php" => Some("PHP"),
387            ".c" | ".h" => Some("C"),
388            ".cpp" | ".cc" | ".cxx" | ".hpp" => Some("C++"),
389            ".cs" => Some("C#"),
390            ".swift" => Some("Swift"),
391            ".scala" => Some("Scala"),
392            ".sh" | ".bash" | ".zsh" => Some("Shell"),
393            ".sql" => Some("SQL"),
394            ".html" | ".htm" => Some("HTML"),
395            ".css" | ".scss" | ".sass" | ".less" => Some("CSS"),
396            ".vue" => Some("Vue"),
397            ".svelte" => Some("Svelte"),
398            ".dart" => Some("Dart"),
399            ".lua" => Some("Lua"),
400            ".r" | ".R" => Some("R"),
401            ".zig" => Some("Zig"),
402            ".nim" => Some("Nim"),
403            ".ex" | ".exs" => Some("Elixir"),
404            ".erl" => Some("Erlang"),
405            ".hs" => Some("Haskell"),
406            ".ml" | ".mli" => Some("OCaml"),
407            ".toml" => Some("TOML"),
408            ".yaml" | ".yml" => Some("YAML"),
409            ".json" => Some("JSON"),
410            ".xml" => Some("XML"),
411            ".md" | ".mdx" => Some("Markdown"),
412            _ => None,
413        }
414    }
415
416    // ── Internal: key files ──────────────────────────────────────
417
418    fn identify_key_files(&self, files: &[FileEntry], root: &Path) -> Vec<KeyFile> {
419        let mut key_files: Vec<KeyFile> = Vec::new();
420
421        let key_patterns: &[(&str, &str)] = &[
422            // Config
423            ("Cargo.toml", "config"),
424            ("package.json", "config"),
425            ("pyproject.toml", "config"),
426            ("go.mod", "config"),
427            ("build.gradle", "config"),
428            ("build.gradle.kts", "config"),
429            ("pom.xml", "config"),
430            ("Makefile", "config"),
431            ("CMakeLists.txt", "config"),
432            ("docker-compose.yml", "config"),
433            ("docker-compose.yaml", "config"),
434            ("tsconfig.json", "config"),
435            (".env.example", "config"),
436            // Entrypoints
437            ("main.rs", "entrypoint"),
438            ("main.go", "entrypoint"),
439            ("main.py", "entrypoint"),
440            ("main.java", "entrypoint"),
441            ("main.ts", "entrypoint"),
442            ("main.js", "entrypoint"),
443            ("index.ts", "entrypoint"),
444            ("index.js", "entrypoint"),
445            ("index.py", "entrypoint"),
446            ("app.rs", "entrypoint"),
447            ("lib.rs", "entrypoint"),
448            ("mod.rs", "entrypoint"),
449            // Docs
450            ("README.md", "readme"),
451            ("README", "readme"),
452            ("README.txt", "readme"),
453            ("README.rst", "readme"),
454            ("LICENSE", "license"),
455            ("LICENSE.md", "license"),
456            ("LICENSE.txt", "license"),
457            ("CHANGELOG.md", "docs"),
458            ("CONTRIBUTING.md", "docs"),
459            // CI
460            (".github/workflows", "ci"),
461            (".gitlab-ci.yml", "ci"),
462            ("Jenkinsfile", "ci"),
463            // Test dirs
464            ("tests", "test"),
465            ("test", "test"),
466            ("spec", "test"),
467            ("__tests__", "test"),
468        ];
469
470        for file in files {
471            let name_lower = file.name.to_lowercase();
472
473            for (pattern, role) in key_patterns {
474                if name_lower == *pattern || file.rel_path.contains(pattern) {
475                    let summary = self.read_file_summary(root, &file.rel_path);
476                    key_files.push(KeyFile {
477                        path: file.rel_path.clone(),
478                        role: role.to_string(),
479                        summary,
480                    });
481                    break;
482                }
483            }
484        }
485
486        // Also check for CI workflow files
487        let ci_dir = root.join(".github").join("workflows");
488        if ci_dir.is_dir() {
489            if let Ok(entries) = fs::read_dir(&ci_dir) {
490                for entry in entries.flatten() {
491                    let name = entry.file_name().to_string_lossy().to_string();
492                    if name.ends_with(".yml") || name.ends_with(".yaml") {
493                        let rel = format!(".github/workflows/{}", name);
494                        if !key_files.iter().any(|kf| kf.path == rel) {
495                            key_files.push(KeyFile {
496                                path: rel,
497                                role: "ci".to_string(),
498                                summary: None,
499                            });
500                        }
501                    }
502                }
503            }
504        }
505
506        // Deduplicate by path
507        let mut seen = BTreeSet::new();
508        key_files.retain(|kf| seen.insert(kf.path.clone()));
509
510        // Sort: config first, then entrypoint, then readme, then rest
511        key_files.sort_by(|a, b| {
512            let rank = |r: &str| -> u8 {
513                match r {
514                    "config" => 0,
515                    "entrypoint" => 1,
516                    "readme" => 2,
517                    "license" => 3,
518                    "ci" => 4,
519                    "test" => 5,
520                    "docs" => 6,
521                    _ => 7,
522                }
523            };
524            rank(&a.role)
525                .cmp(&rank(&b.role))
526                .then_with(|| a.path.cmp(&b.path))
527        });
528
529        key_files
530    }
531
532    /// Read the first meaningful line of a file for a summary.
533    fn read_file_summary(&self, root: &Path, rel_path: &str) -> Option<String> {
534        let path = root.join(rel_path);
535        let content = fs::read_to_string(&path).ok()?;
536
537        for line in content.lines() {
538            let trimmed = line.trim();
539            // Skip empty lines, comments, frontmatter, shebangs
540            if trimmed.is_empty()
541                || trimmed.starts_with('#')
542                || trimmed.starts_with("//")
543                || trimmed.starts_with("/*")
544                || trimmed.starts_with("--")
545                || trimmed.starts_with("---")
546                || trimmed.starts_with("!")
547            {
548                continue;
549            }
550            // Truncate long lines
551            if trimmed.len() > 120 {
552                return Some(format!("{}…", &trimmed[..120]));
553            }
554            return Some(trimmed.to_string());
555        }
556        None
557    }
558
559    // ── Internal: dependencies ───────────────────────────────────
560
561    fn extract_dependencies(&self, files: &[FileEntry], root: &Path) -> Vec<String> {
562        let mut deps: Vec<String> = Vec::new();
563
564        for file in files {
565            match file.name.as_str() {
566                "Cargo.toml" => {
567                    let path = root.join(&file.rel_path);
568                    if let Ok(content) = fs::read_to_string(&path) {
569                        self.extract_cargo_deps(&content, &mut deps);
570                    }
571                }
572                "package.json" => {
573                    let path = root.join(&file.rel_path);
574                    if let Ok(content) = fs::read_to_string(&path) {
575                        self.extract_npm_deps(&content, &mut deps);
576                    }
577                }
578                "go.mod" => {
579                    let path = root.join(&file.rel_path);
580                    if let Ok(content) = fs::read_to_string(&path) {
581                        self.extract_go_deps(&content, &mut deps);
582                    }
583                }
584                "pyproject.toml" => {
585                    let path = root.join(&file.rel_path);
586                    if let Ok(content) = fs::read_to_string(&path) {
587                        self.extract_python_deps(&content, &mut deps);
588                    }
589                }
590                _ => {}
591            }
592        }
593
594        deps.sort();
595        deps.dedup();
596        deps
597    }
598
599    fn extract_cargo_deps(&self, content: &str, deps: &mut Vec<String>) {
600        let mut in_deps = false;
601        for line in content.lines() {
602            let trimmed = line.trim();
603            if trimmed == "[dependencies]" || trimmed == "[dev-dependencies]" {
604                in_deps = true;
605                continue;
606            }
607            if trimmed.starts_with('[') {
608                in_deps = false;
609                continue;
610            }
611            if in_deps {
612                if let Some((name, _)) = trimmed.split_once('=') {
613                    let name = name.trim().to_string();
614                    if !name.is_empty() {
615                        deps.push(format!("{} (crate)", name));
616                    }
617                } else if let Some((name, _)) = trimmed.split_once('{') {
618                    let name = name.trim().to_string();
619                    if !name.is_empty() {
620                        deps.push(format!("{} (crate)", name));
621                    }
622                }
623            }
624        }
625    }
626
627    fn extract_npm_deps(&self, content: &str, deps: &mut Vec<String>) {
628        if let Ok(json) = serde_json::from_str::<serde_json::Value>(content) {
629            for section in &["dependencies", "devDependencies"] {
630                if let Some(obj) = json.get(section).and_then(|v| v.as_object()) {
631                    for name in obj.keys() {
632                        deps.push(format!("{} (npm)", name));
633                    }
634                }
635            }
636        }
637    }
638
639    fn extract_go_deps(&self, content: &str, deps: &mut Vec<String>) {
640        for line in content.lines() {
641            let trimmed = line.trim();
642            if trimmed.starts_with("require (") {
643                continue;
644            }
645            if trimmed.starts_with("require ") {
646                // Single-line require
647                let parts: Vec<&str> = trimmed.split_whitespace().collect();
648                if parts.len() >= 3 && parts[0] == "require" {
649                    deps.push(format!("{} (go)", parts[1]));
650                }
651            } else if !trimmed.starts_with("//")
652                && !trimmed.starts_with(')')
653                && !trimmed.starts_with("module ")
654                && !trimmed.starts_with("go ")
655                && !trimmed.is_empty()
656            {
657                // Inside require block: "github.com/foo/bar v1.2.3"
658                let parts: Vec<&str> = trimmed.split_whitespace().collect();
659                if parts.len() >= 2 && parts[0].contains('/') {
660                    deps.push(format!("{} (go)", parts[0]));
661                }
662            }
663        }
664    }
665
666    fn extract_python_deps(&self, content: &str, deps: &mut Vec<String>) {
667        let mut in_deps = false;
668        for line in content.lines() {
669            let trimmed = line.trim();
670            if trimmed == "[project]" || trimmed == "[tool.poetry]" {
671                in_deps = false;
672            }
673            // Poetry-style: [tool.poetry.dependencies]
674            if trimmed.starts_with('[') && trimmed.contains("dependencies") {
675                in_deps = true;
676                continue;
677            }
678            if trimmed.starts_with('[') && !trimmed.contains("dependencies") {
679                in_deps = false;
680                continue;
681            }
682            if in_deps {
683                // PEP 621: dependencies = ["requests", "flask"]
684                if let Some((key, value)) = trimmed.split_once('=') {
685                    let key = key.trim();
686                    if key == "dependencies" {
687                        let cleaned = value
688                            .trim()
689                            .trim_start_matches('[')
690                            .trim_end_matches(']');
691                        for dep in cleaned.split(',') {
692                            let dep = dep.trim().trim_matches('"').trim_matches('\'');
693                            if !dep.is_empty() {
694                                deps.push(format!("{} (pypi)", dep));
695                            }
696                        }
697                    } else if key != "python"
698                        && !key.contains("version")
699                        && !key.contains("requires")
700                    {
701                        // Poetry-style: key = "version"
702                        let name = key.to_string();
703                        if !name.is_empty() {
704                            deps.push(format!("{} (pypi)", name));
705                        }
706                    }
707                }
708            }
709        }
710    }
711
712    // ── Internal: pattern detection ──────────────────────────────
713
714    fn detect_patterns(
715        &self,
716        files: &[FileEntry],
717        _key_files: &[KeyFile],
718        deps: &[String],
719        root: &Path,
720    ) -> Vec<Pattern> {
721        let mut patterns: Vec<Pattern> = Vec::new();
722
723        // Build quick lookup maps
724        let file_names: BTreeSet<&str> = files.iter().map(|f| f.name.as_str()).collect();
725        let has_ext = |ext: &str| -> bool { files.iter().any(|f| f.ext == ext) };
726        let has_dir = |dir_name: &str| -> bool {
727            root.join(dir_name).is_dir()
728                || files
729                    .iter()
730                    .any(|f| f.rel_path.starts_with(&format!("{}/", dir_name)))
731        };
732        let dep_contains = |substr: &str| -> bool {
733            deps.iter()
734                .any(|d| d.to_lowercase().contains(&substr.to_lowercase()))
735        };
736
737        // ── Language detection ───────────────────────────────
738
739        if has_ext(".rs") {
740            patterns.push(Pattern {
741                name: "Rust".to_string(),
742                category: "language".to_string(),
743                confidence: 98,
744                evidence: "Found .rs files".to_string(),
745            });
746        }
747
748        if has_ext(".ts") || has_ext(".tsx") {
749            patterns.push(Pattern {
750                name: "TypeScript".to_string(),
751                category: "language".to_string(),
752                confidence: 97,
753                evidence: "Found .ts/.tsx files".to_string(),
754            });
755        } else if has_ext(".js") || has_ext(".jsx") {
756            patterns.push(Pattern {
757                name: "JavaScript".to_string(),
758                category: "language".to_string(),
759                confidence: 95,
760                evidence: "Found .js/.jsx files (no .ts)".to_string(),
761            });
762        }
763
764        if has_ext(".py") {
765            patterns.push(Pattern {
766                name: "Python".to_string(),
767                category: "language".to_string(),
768                confidence: 97,
769                evidence: "Found .py files".to_string(),
770            });
771        }
772
773        if has_ext(".go") {
774            patterns.push(Pattern {
775                name: "Go".to_string(),
776                category: "language".to_string(),
777                confidence: 98,
778                evidence: "Found .go files".to_string(),
779            });
780        }
781
782        if has_ext(".java") {
783            patterns.push(Pattern {
784                name: "Java".to_string(),
785                category: "language".to_string(),
786                confidence: 98,
787                evidence: "Found .java files".to_string(),
788            });
789        }
790
791        if has_ext(".swift") {
792            patterns.push(Pattern {
793                name: "Swift".to_string(),
794                category: "language".to_string(),
795                confidence: 98,
796                evidence: "Found .swift files".to_string(),
797            });
798        }
799
800        // ── Framework detection ──────────────────────────────
801
802        if file_names.contains("Cargo.toml") && has_ext(".rs") {
803            if dep_contains("tokio") {
804                patterns.push(Pattern {
805                    name: "Async Rust (Tokio)".to_string(),
806                    category: "framework".to_string(),
807                    confidence: 90,
808                    evidence: "tokio dependency in Cargo.toml".to_string(),
809                });
810            }
811            if dep_contains("actix") {
812                patterns.push(Pattern {
813                    name: "Actix Web".to_string(),
814                    category: "framework".to_string(),
815                    confidence: 92,
816                    evidence: "actix dependency".to_string(),
817                });
818            }
819            if dep_contains("axum") {
820                patterns.push(Pattern {
821                    name: "Axum".to_string(),
822                    category: "framework".to_string(),
823                    confidence: 92,
824                    evidence: "axum dependency".to_string(),
825                });
826            }
827            if dep_contains("wasm") || dep_contains("leptos") {
828                patterns.push(Pattern {
829                    name: "WASM/Leptos".to_string(),
830                    category: "framework".to_string(),
831                    confidence: 85,
832                    evidence: "wasm-related dependency".to_string(),
833                });
834            }
835        }
836
837        if file_names.contains("package.json") {
838            if dep_contains("react") {
839                patterns.push(Pattern {
840                    name: "React".to_string(),
841                    category: "framework".to_string(),
842                    confidence: 95,
843                    evidence: "react dependency".to_string(),
844                });
845            }
846            if dep_contains("vue") {
847                patterns.push(Pattern {
848                    name: "Vue".to_string(),
849                    category: "framework".to_string(),
850                    confidence: 95,
851                    evidence: "vue dependency".to_string(),
852                });
853            }
854            if dep_contains("svelte") {
855                patterns.push(Pattern {
856                    name: "Svelte".to_string(),
857                    category: "framework".to_string(),
858                    confidence: 95,
859                    evidence: "svelte dependency".to_string(),
860                });
861            }
862            if dep_contains("next") {
863                patterns.push(Pattern {
864                    name: "Next.js".to_string(),
865                    category: "framework".to_string(),
866                    confidence: 95,
867                    evidence: "next dependency".to_string(),
868                });
869            }
870            if dep_contains("express") {
871                patterns.push(Pattern {
872                    name: "Express".to_string(),
873                    category: "framework".to_string(),
874                    confidence: 92,
875                    evidence: "express dependency".to_string(),
876                });
877            }
878            if dep_contains("fastify") {
879                patterns.push(Pattern {
880                    name: "Fastify".to_string(),
881                    category: "framework".to_string(),
882                    confidence: 92,
883                    evidence: "fastify dependency".to_string(),
884                });
885            }
886        }
887
888        if has_ext(".py") {
889            if dep_contains("django") {
890                patterns.push(Pattern {
891                    name: "Django".to_string(),
892                    category: "framework".to_string(),
893                    confidence: 93,
894                    evidence: "django dependency".to_string(),
895                });
896            }
897            if dep_contains("flask") {
898                patterns.push(Pattern {
899                    name: "Flask".to_string(),
900                    category: "framework".to_string(),
901                    confidence: 93,
902                    evidence: "flask dependency".to_string(),
903                });
904            }
905            if dep_contains("fastapi") {
906                patterns.push(Pattern {
907                    name: "FastAPI".to_string(),
908                    category: "framework".to_string(),
909                    confidence: 93,
910                    evidence: "fastapi dependency".to_string(),
911                });
912            }
913        }
914
915        // ── Architecture patterns ────────────────────────────
916
917        // Monorepo / workspace
918        if file_names.contains("Cargo.toml") {
919            let cargo_content =
920                fs::read_to_string(root.join("Cargo.toml")).unwrap_or_default();
921            if cargo_content.contains("[workspace]") {
922                patterns.push(Pattern {
923                    name: "Rust workspace (monorepo)".to_string(),
924                    category: "architecture".to_string(),
925                    confidence: 95,
926                    evidence: "[workspace] in Cargo.toml".to_string(),
927                });
928            }
929        }
930
931        // src/ layout
932        if has_dir("src") {
933            patterns.push(Pattern {
934                name: "Standard src/ layout".to_string(),
935                category: "architecture".to_string(),
936                confidence: 90,
937                evidence: "src/ directory present".to_string(),
938            });
939        }
940
941        // lib + binary separation
942        if root.join("src/lib.rs").exists() && root.join("src/main.rs").exists() {
943            patterns.push(Pattern {
944                name: "Lib+Binary Rust crate".to_string(),
945                category: "architecture".to_string(),
946                confidence: 90,
947                evidence: "Both lib.rs and main.rs in src/".to_string(),
948            });
949        }
950
951        // Feature modules
952        let mod_dirs: Vec<&FileEntry> = files
953            .iter()
954            .filter(|f| {
955                f.ext == ".rs"
956                    && f.rel_path.starts_with("src/")
957                    && f.rel_path.ends_with("/mod.rs")
958            })
959            .collect();
960        if mod_dirs.len() >= 3 {
961            patterns.push(Pattern {
962                name: "Multi-module Rust project".to_string(),
963                category: "architecture".to_string(),
964                confidence: 85,
965                evidence: format!("{} mod.rs modules found", mod_dirs.len()),
966            });
967        }
968
969        // MVC / layered
970        if has_dir("controllers") && has_dir("models") && has_dir("views") {
971            patterns.push(Pattern {
972                name: "MVC architecture".to_string(),
973                category: "architecture".to_string(),
974                confidence: 88,
975                evidence: "Has controllers/, models/, views/ directories".to_string(),
976            });
977        }
978
979        // ── Tooling patterns ─────────────────────────────────
980
981        if has_dir(".github") {
982            patterns.push(Pattern {
983                name: "GitHub Actions CI".to_string(),
984                category: "tooling".to_string(),
985                confidence: 95,
986                evidence: ".github/ directory present".to_string(),
987            });
988        }
989
990        if root.join("Dockerfile").exists() {
991            patterns.push(Pattern {
992                name: "Dockerized".to_string(),
993                category: "tooling".to_string(),
994                confidence: 95,
995                evidence: "Dockerfile found".to_string(),
996            });
997        }
998
999        if file_names.contains("Makefile") {
1000            patterns.push(Pattern {
1001                name: "Make-based build".to_string(),
1002                category: "tooling".to_string(),
1003                confidence: 90,
1004                evidence: "Makefile found".to_string(),
1005            });
1006        }
1007
1008        // ── Convention patterns ──────────────────────────────
1009
1010        if has_dir("tests") || has_dir("test") {
1011            patterns.push(Pattern {
1012                name: "Has dedicated test directory".to_string(),
1013                category: "convention".to_string(),
1014                confidence: 95,
1015                evidence: "tests/ or test/ directory present".to_string(),
1016            });
1017        }
1018
1019        if has_dir("docs") {
1020            patterns.push(Pattern {
1021                name: "Has docs/ directory".to_string(),
1022                category: "convention".to_string(),
1023                confidence: 90,
1024                evidence: "docs/ directory present".to_string(),
1025            });
1026        }
1027
1028        if file_names.contains("CLIP.md")
1029            || file_names.contains("AGENTS.md")
1030            || file_names.contains("CLAUDE.md")
1031        {
1032            patterns.push(Pattern {
1033                name: "AI agent conventions".to_string(),
1034                category: "convention".to_string(),
1035                confidence: 92,
1036                evidence: "Agent config file (AGENTS.md/CLAUDE.md/CLIP.md)".to_string(),
1037            });
1038        }
1039
1040        // ── Anti-pattern detection ───────────────────────────
1041
1042        // Very large files
1043        let large_files: Vec<&FileEntry> = files.iter().filter(|f| f.size > 100_000).collect();
1044        if large_files.len() > 5 {
1045            patterns.push(Pattern {
1046                name: "Large files (>100KB)".to_string(),
1047                category: "anti-pattern".to_string(),
1048                confidence: 80,
1049                evidence: format!(
1050                    "{} files exceed 100KB — largest: {}",
1051                    large_files.len(),
1052                    large_files
1053                        .iter()
1054                        .max_by_key(|f| f.size)
1055                        .map(|f| format!("{} ({}KB)", f.rel_path, f.size / 1024))
1056                        .unwrap_or_default()
1057                ),
1058            });
1059        }
1060
1061        // Mixed tab/space indentation (sample a few files)
1062        let mixed_indent = self.detect_mixed_indentation(root, files);
1063        if mixed_indent > 0 {
1064            patterns.push(Pattern {
1065                name: "Mixed indentation".to_string(),
1066                category: "anti-pattern".to_string(),
1067                confidence: 70,
1068                evidence: format!(
1069                    "{} file(s) mix tabs and spaces for indentation",
1070                    mixed_indent
1071                ),
1072            });
1073        }
1074
1075        // Sort by category, then confidence descending
1076        patterns.sort_by(|a, b| {
1077            a.category
1078                .cmp(&b.category)
1079                .then_with(|| b.confidence.cmp(&a.confidence))
1080        });
1081
1082        patterns
1083    }
1084
1085    /// Check a sample of source files for mixed tabs/spaces.
1086    fn detect_mixed_indentation(&self, root: &Path, files: &[FileEntry]) -> usize {
1087        let source_exts = [".rs", ".ts", ".js", ".py", ".go", ".java", ".tsx", ".jsx"];
1088        let mut count = 0usize;
1089        let mut sampled = 0usize;
1090        let max_sample = 20;
1091
1092        for file in files {
1093            if sampled >= max_sample {
1094                break;
1095            }
1096            if !source_exts.contains(&file.ext.as_str()) {
1097                continue;
1098            }
1099
1100            let path = root.join(&file.rel_path);
1101            if let Ok(content) = fs::read_to_string(&path) {
1102                sampled += 1;
1103                let has_tabs = content.lines().any(|l| l.starts_with('\t'));
1104                let has_spaces = content
1105                    .lines()
1106                    .any(|l| l.starts_with("    ") || l.starts_with("  "));
1107                if has_tabs && has_spaces {
1108                    count += 1;
1109                }
1110            }
1111        }
1112
1113        count
1114    }
1115
1116    // ── Rendering ───────────────────────────────────────────────
1117
1118    /// Render a compact, human-readable summary of the codebase snapshot.
1119    /// Designed to be used as handoff context between agents.
1120    pub fn render_compact(snapshot: &CodebaseSnapshot) -> String {
1121        let mut out = String::with_capacity(4096);
1122
1123        out.push_str("╔══ Codebase Snapshot ══════════════════════╗\n");
1124        out.push_str(&format!("║ Root: {}\n", snapshot.root));
1125        out.push_str(&format!(
1126            "║ Files: {} | Size: {} | Scan: {}ms\n",
1127            snapshot.total_files,
1128            format_bytes(snapshot.total_bytes),
1129            snapshot.scan_ms
1130        ));
1131        out.push_str("╚═══════════════════════════════════════════╝\n\n");
1132
1133        // Languages
1134        if !snapshot.languages.is_empty() {
1135            out.push_str("## Languages\n\n");
1136            out.push_str("| Language | Files | Size |\n");
1137            out.push_str("|----------|-------|------|\n");
1138            for lang in &snapshot.languages {
1139                out.push_str(&format!(
1140                    "| {} | {} | {} |\n",
1141                    lang.language,
1142                    lang.file_count,
1143                    format_bytes(lang.total_bytes)
1144                ));
1145            }
1146            out.push('\n');
1147        }
1148
1149        // Patterns
1150        if !snapshot.patterns.is_empty() {
1151            out.push_str("## Detected Patterns\n\n");
1152            for pattern in &snapshot.patterns {
1153                let conf = if pattern.confidence >= 90 {
1154                    "●"
1155                } else if pattern.confidence >= 70 {
1156                    "◐"
1157                } else {
1158                    "○"
1159                };
1160                out.push_str(&format!(
1161                    "- {} **{}** [{}] — {}\n",
1162                    conf, pattern.name, pattern.category, pattern.evidence
1163                ));
1164            }
1165            out.push('\n');
1166        }
1167
1168        // Key files
1169        if !snapshot.key_files.is_empty() {
1170            out.push_str("## Key Files\n\n");
1171            for kf in &snapshot.key_files {
1172                if let Some(ref summary) = kf.summary {
1173                    out.push_str(&format!(
1174                        "- `{}` [{}] — {}\n",
1175                        kf.path, kf.role, summary
1176                    ));
1177                } else {
1178                    out.push_str(&format!("- `{}` [{}]\n", kf.path, kf.role));
1179                }
1180            }
1181            out.push('\n');
1182        }
1183
1184        // Dependencies (abbreviated)
1185        if !snapshot.dependencies.is_empty() {
1186            let display_count = 15;
1187            out.push_str(&format!(
1188                "## Dependencies ({} total)\n\n",
1189                snapshot.dependencies.len()
1190            ));
1191            for dep in snapshot.dependencies.iter().take(display_count) {
1192                out.push_str(&format!("- {}\n", dep));
1193            }
1194            if snapshot.dependencies.len() > display_count {
1195                out.push_str(&format!(
1196                    "- … and {} more\n",
1197                    snapshot.dependencies.len() - display_count
1198                ));
1199            }
1200            out.push('\n');
1201        }
1202
1203        // Directory tree (compact)
1204        if !snapshot.tree.is_empty() {
1205            out.push_str("## Directory Tree (top)\n\n");
1206            out.push_str("```\n");
1207            for node in &snapshot.tree {
1208                let indent = node.path.matches('/').count();
1209                let prefix = "  ".repeat(indent);
1210                let name = node.path.rsplit('/').next().unwrap_or(&node.path);
1211                if node.is_dir {
1212                    out.push_str(&format!("{}{}/\n", prefix, name));
1213                } else {
1214                    out.push_str(&format!(
1215                        "{}{} {}\n",
1216                        prefix,
1217                        name,
1218                        if node.size > 0 {
1219                            format!("({})", format_bytes(node.size))
1220                        } else {
1221                            String::new()
1222                        }
1223                    ));
1224                }
1225            }
1226            out.push_str("```\n");
1227        }
1228
1229        out
1230    }
1231
1232    /// Render a JSON string of the snapshot (for programmatic handoff).
1233    pub fn render_json(snapshot: &CodebaseSnapshot) -> Result<String> {
1234        Ok(serde_json::to_string_pretty(snapshot)?)
1235    }
1236
1237    /// Render a markdown report of the snapshot.
1238    pub fn render_markdown(snapshot: &CodebaseSnapshot) -> String {
1239        let mut md = String::with_capacity(4096);
1240
1241        md.push_str("# Codebase Scout Report\n\n");
1242        md.push_str(&format!(
1243            "> Scanned: {} | Files: {} | Size: {} | Duration: {}ms\n\n",
1244            snapshot.scanned_at,
1245            snapshot.total_files,
1246            format_bytes(snapshot.total_bytes),
1247            snapshot.scan_ms,
1248        ));
1249
1250        // Overview
1251        md.push_str("## Overview\n\n");
1252        md.push_str(&format!("- **Root:** `{}`\n", snapshot.root));
1253        md.push_str(&format!("- **Total files:** {}\n", snapshot.total_files));
1254        md.push_str(&format!(
1255            "- **Total size:** {}\n",
1256            format_bytes(snapshot.total_bytes)
1257        ));
1258
1259        if let Some(primary_lang) = snapshot.languages.first() {
1260            md.push_str(&format!(
1261                "- **Primary language:** {} ({} files)\n",
1262                primary_lang.language, primary_lang.file_count
1263            ));
1264        }
1265        md.push('\n');
1266
1267        // Language breakdown
1268        if !snapshot.languages.is_empty() {
1269            md.push_str("## Language Breakdown\n\n");
1270            md.push_str("| Language | Files | Size | Extensions |\n");
1271            md.push_str("|----------|-------|------|------------|\n");
1272            for lang in &snapshot.languages {
1273                let exts = lang
1274                    .extensions
1275                    .iter()
1276                    .cloned()
1277                    .collect::<Vec<_>>()
1278                    .join(", ");
1279                md.push_str(&format!(
1280                    "| {} | {} | {} | {} |\n",
1281                    lang.language,
1282                    lang.file_count,
1283                    format_bytes(lang.total_bytes),
1284                    exts
1285                ));
1286            }
1287            md.push('\n');
1288        }
1289
1290        // Patterns
1291        if !snapshot.patterns.is_empty() {
1292            md.push_str("## Detected Patterns\n\n");
1293            let mut current_category = String::new();
1294            for pattern in &snapshot.patterns {
1295                if pattern.category != current_category {
1296                    current_category = pattern.category.clone();
1297                    md.push_str(&format!("### {}s\n\n", capitalize(&current_category)));
1298                }
1299                md.push_str(&format!(
1300                    "- **{}** ({}% confidence) — {}\n",
1301                    pattern.name, pattern.confidence, pattern.evidence
1302                ));
1303            }
1304            md.push('\n');
1305        }
1306
1307        // Key files
1308        if !snapshot.key_files.is_empty() {
1309            md.push_str("## Key Files\n\n");
1310            md.push_str("| Path | Role | Summary |\n");
1311            md.push_str("|------|------|--------|\n");
1312            for kf in &snapshot.key_files {
1313                let summary = kf.summary.as_deref().unwrap_or("—");
1314                md.push_str(&format!(
1315                    "| `{}` | {} | {} |\n",
1316                    kf.path, kf.role, summary
1317                ));
1318            }
1319            md.push('\n');
1320        }
1321
1322        // Dependencies
1323        if !snapshot.dependencies.is_empty() {
1324            md.push_str(&format!(
1325                "## Dependencies ({})\n\n",
1326                snapshot.dependencies.len()
1327            ));
1328            for dep in &snapshot.dependencies {
1329                md.push_str(&format!("- {}\n", dep));
1330            }
1331            md.push('\n');
1332        }
1333
1334        // Directory tree
1335        if !snapshot.tree.is_empty() {
1336            md.push_str("## Directory Structure\n\n");
1337            md.push_str("```\n");
1338            for node in &snapshot.tree {
1339                let depth = node.path.matches('/').count();
1340                let indent = "  ".repeat(depth);
1341                let name = node.path.rsplit('/').next().unwrap_or(&node.path);
1342                if node.is_dir {
1343                    md.push_str(&format!("{}{}/\n", indent, name));
1344                } else {
1345                    md.push_str(&format!(
1346                        "{}{} {}\n",
1347                        indent,
1348                        name,
1349                        if node.size > 0 {
1350                            format!("({})", format_bytes(node.size))
1351                        } else {
1352                            String::new()
1353                        }
1354                    ));
1355                }
1356            }
1357            md.push_str("```\n");
1358        }
1359
1360        md
1361    }
1362}
1363
1364impl Default for Scout {
1365    fn default() -> Self {
1366        Self::new(std::env::current_dir().unwrap_or_default())
1367    }
1368}
1369
1370impl fmt::Debug for Scout {
1371    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1372        f.debug_struct("Scout")
1373            .field("root", &self.config.root)
1374            .finish()
1375    }
1376}
1377
1378// ── Internal types ───────────────────────────────────────────────────
1379
1380#[derive(Debug)]
1381struct FileEntry {
1382    rel_path: String,
1383    name: String,
1384    ext: String,
1385    size: u64,
1386}
1387
1388// ── Helpers ──────────────────────────────────────────────────────────
1389
1390fn format_bytes(bytes: u64) -> String {
1391    const KB: u64 = 1024;
1392    const MB: u64 = 1024 * KB;
1393    const GB: u64 = 1024 * MB;
1394
1395    if bytes >= GB {
1396        format!("{:.1} GB", bytes as f64 / GB as f64)
1397    } else if bytes >= MB {
1398        format!("{:.1} MB", bytes as f64 / MB as f64)
1399    } else if bytes >= KB {
1400        format!("{:.1} KB", bytes as f64 / KB as f64)
1401    } else {
1402        format!("{} B", bytes)
1403    }
1404}
1405
1406fn capitalize(s: &str) -> String {
1407    let mut c = s.chars();
1408    match c.next() {
1409        None => String::new(),
1410        Some(f) => f.to_uppercase().chain(c).collect(),
1411    }
1412}
1413
1414// ── Tests ────────────────────────────────────────────────────────────
1415
1416#[cfg(test)]
1417mod tests {
1418    use super::*;
1419    use std::fs;
1420
1421    #[test]
1422    fn test_scan_empty_dir() {
1423        let tmp = tempfile::tempdir().unwrap();
1424        let scout = Scout::new(tmp.path());
1425        let snapshot = scout.scan().unwrap();
1426
1427        assert_eq!(snapshot.total_files, 0);
1428        assert_eq!(snapshot.total_bytes, 0);
1429        assert!(snapshot.languages.is_empty());
1430        assert!(snapshot.patterns.is_empty());
1431    }
1432
1433    #[test]
1434    fn test_scan_rust_project() {
1435        let tmp = tempfile::tempdir().unwrap();
1436        let src = tmp.path().join("src");
1437        fs::create_dir_all(&src).unwrap();
1438
1439        fs::write(
1440            tmp.path().join("Cargo.toml"),
1441            r#"[package]
1442name = "test-project"
1443version = "0.1.0"
1444
1445[dependencies]
1446serde = { version = "1", features = ["derive"] }
1447tokio = "1"
1448anyhow = "1"
1449"#,
1450        )
1451        .unwrap();
1452        fs::write(src.join("main.rs"), "fn main() { println!(\"hello\"); }").unwrap();
1453        fs::write(
1454            src.join("lib.rs"),
1455            "pub fn add(a: i32, b: i32) -> i32 { a + b }",
1456        )
1457        .unwrap();
1458
1459        let scout = Scout::new(tmp.path());
1460        let snapshot = scout.scan().unwrap();
1461
1462        assert!(snapshot.total_files >= 3);
1463        assert!(snapshot.languages.iter().any(|l| l.language == "Rust"));
1464        assert!(snapshot
1465            .dependencies
1466            .iter()
1467            .any(|d| d.contains("serde")));
1468        assert!(snapshot.patterns.iter().any(|p| p.name == "Rust"));
1469        assert!(snapshot
1470            .patterns
1471            .iter()
1472            .any(|p| p.name == "Async Rust (Tokio)"));
1473        assert!(snapshot
1474            .patterns
1475            .iter()
1476            .any(|p| p.name == "Standard src/ layout"));
1477    }
1478
1479    #[test]
1480    fn test_scan_ts_project() {
1481        let tmp = tempfile::tempdir().unwrap();
1482        let src = tmp.path().join("src");
1483        fs::create_dir_all(&src).unwrap();
1484
1485        fs::write(
1486            tmp.path().join("package.json"),
1487            r#"{"dependencies": {"react": "^18.0.0", "next": "^14.0.0"}}"#,
1488        )
1489        .unwrap();
1490        fs::write(
1491            src.join("index.tsx"),
1492            "export default function App() { return <div/> }",
1493        )
1494        .unwrap();
1495
1496        let scout = Scout::new(tmp.path());
1497        let snapshot = scout.scan().unwrap();
1498
1499        assert!(snapshot
1500            .languages
1501            .iter()
1502            .any(|l| l.language == "TypeScript"));
1503        assert!(snapshot.patterns.iter().any(|p| p.name == "React"));
1504        assert!(snapshot.patterns.iter().any(|p| p.name == "Next.js"));
1505    }
1506
1507    #[test]
1508    fn test_scan_python_project() {
1509        let tmp = tempfile::tempdir().unwrap();
1510        fs::write(
1511            tmp.path().join("pyproject.toml"),
1512            r#"[tool.poetry]
1513name = "test-project"
1514
1515[tool.poetry.dependencies]
1516python = "^3.11"
1517flask = "^3.0"
1518requests = "^2.31"
1519"#,
1520        )
1521        .unwrap();
1522        fs::write(
1523            tmp.path().join("main.py"),
1524            "from flask import Flask\napp = Flask(__name__)\n",
1525        )
1526        .unwrap();
1527
1528        let scout = Scout::new(tmp.path());
1529        let snapshot = scout.scan().unwrap();
1530
1531        assert!(snapshot.languages.iter().any(|l| l.language == "Python"));
1532        assert!(snapshot.patterns.iter().any(|p| p.name == "Flask"));
1533        assert!(snapshot
1534            .dependencies
1535            .iter()
1536            .any(|d| d.contains("flask")));
1537    }
1538
1539    #[test]
1540    fn test_scan_go_project() {
1541        let tmp = tempfile::tempdir().unwrap();
1542        fs::write(
1543            tmp.path().join("go.mod"),
1544            "module example.com/test\n\ngo 1.22\n\nrequire (\n\tgithub.com/gin-gonic/gin v1.9.1\n)\n",
1545        )
1546        .unwrap();
1547        fs::write(tmp.path().join("main.go"), "package main\n\nfunc main() {}\n").unwrap();
1548
1549        let scout = Scout::new(tmp.path());
1550        let snapshot = scout.scan().unwrap();
1551
1552        assert!(snapshot.languages.iter().any(|l| l.language == "Go"));
1553        assert!(snapshot.dependencies.iter().any(|d| d.contains("gin")));
1554    }
1555
1556    #[test]
1557    fn test_scan_ignores_dirs() {
1558        let tmp = tempfile::tempdir().unwrap();
1559        fs::create_dir_all(tmp.path().join(".git/objects")).unwrap();
1560        fs::create_dir_all(tmp.path().join("target/debug")).unwrap();
1561        fs::create_dir_all(tmp.path().join("node_modules/react")).unwrap();
1562        fs::write(tmp.path().join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap();
1563        fs::create_dir_all(tmp.path().join("src")).unwrap();
1564        fs::write(tmp.path().join("src/main.rs"), "fn main() {}").unwrap();
1565
1566        let scout = Scout::new(tmp.path());
1567        let snapshot = scout.scan().unwrap();
1568
1569        for node in &snapshot.tree {
1570            assert!(
1571                !node.path.starts_with(".git/"),
1572                "Should skip .git: {}",
1573                node.path
1574            );
1575            assert!(
1576                !node.path.starts_with("target/"),
1577                "Should skip target: {}",
1578                node.path
1579            );
1580            assert!(
1581                !node.path.starts_with("node_modules/"),
1582                "Should skip node_modules: {}",
1583                node.path
1584            );
1585        }
1586    }
1587
1588    #[test]
1589    fn test_scan_respects_depth_limit() {
1590        let tmp = tempfile::tempdir().unwrap();
1591        let deep = tmp.path().join("a/b/c/d/e/f");
1592        fs::create_dir_all(&deep).unwrap();
1593        fs::write(deep.join("deep.txt"), "content").unwrap();
1594        fs::write(tmp.path().join("shallow.txt"), "content").unwrap();
1595
1596        let config = ScoutConfig {
1597            root: tmp.path().to_path_buf(),
1598            max_depth: 3,
1599            ..Default::default()
1600        };
1601        let scout = Scout::with_config(config);
1602        let snapshot = scout.scan().unwrap();
1603
1604        assert!(snapshot.tree.iter().any(|n| n.path == "shallow.txt"));
1605        assert!(!snapshot.tree.iter().any(|n| n.path.contains("deep.txt")));
1606    }
1607
1608    #[test]
1609    fn test_scan_nonexistent_dir() {
1610        let scout = Scout::new("/nonexistent/path/that/does/not/exist");
1611        assert!(scout.scan().is_err());
1612    }
1613
1614    #[test]
1615    fn test_scan_file_as_root() {
1616        let tmp = tempfile::tempdir().unwrap();
1617        let file_path = tmp.path().join("not_a_dir.txt");
1618        fs::write(&file_path, "content").unwrap();
1619
1620        let scout = Scout::new(&file_path);
1621        assert!(scout.scan().is_err());
1622    }
1623
1624    #[test]
1625    fn test_render_compact_not_empty() {
1626        let tmp = tempfile::tempdir().unwrap();
1627        let src = tmp.path().join("src");
1628        fs::create_dir_all(&src).unwrap();
1629        fs::write(tmp.path().join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap();
1630        fs::write(src.join("main.rs"), "fn main() {}").unwrap();
1631
1632        let scout = Scout::new(tmp.path());
1633        let snapshot = scout.scan().unwrap();
1634        let compact = Scout::render_compact(&snapshot);
1635
1636        assert!(compact.contains("Codebase Snapshot"));
1637        assert!(compact.contains("Rust"));
1638        assert!(compact.contains("Cargo.toml"));
1639    }
1640
1641    #[test]
1642    fn test_render_markdown_not_empty() {
1643        let tmp = tempfile::tempdir().unwrap();
1644        let src = tmp.path().join("src");
1645        fs::create_dir_all(&src).unwrap();
1646        fs::write(tmp.path().join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap();
1647        fs::write(src.join("main.rs"), "fn main() {}").unwrap();
1648
1649        let scout = Scout::new(tmp.path());
1650        let snapshot = scout.scan().unwrap();
1651        let md = Scout::render_markdown(&snapshot);
1652
1653        assert!(md.contains("# Codebase Scout Report"));
1654        assert!(md.contains("## Language Breakdown"));
1655        assert!(md.contains("Rust"));
1656    }
1657
1658    #[test]
1659    fn test_render_json_roundtrip() {
1660        let tmp = tempfile::tempdir().unwrap();
1661        fs::write(tmp.path().join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap();
1662
1663        let scout = Scout::new(tmp.path());
1664        let snapshot = scout.scan().unwrap();
1665        let json = Scout::render_json(&snapshot).unwrap();
1666
1667        let parsed: CodebaseSnapshot = serde_json::from_str(&json).unwrap();
1668        assert_eq!(parsed.root, snapshot.root);
1669        assert_eq!(parsed.total_files, snapshot.total_files);
1670    }
1671
1672    #[test]
1673    fn test_key_file_summary_extraction() {
1674        let tmp = tempfile::tempdir().unwrap();
1675        fs::write(
1676            tmp.path().join("Cargo.toml"),
1677            "[package]\nname = \"my-cool-project\"\nversion = \"1.0.0\"\n",
1678        )
1679        .unwrap();
1680
1681        let scout = Scout::new(tmp.path());
1682        let snapshot = scout.scan().unwrap();
1683
1684        let cargo = snapshot
1685            .key_files
1686            .iter()
1687            .find(|kf| kf.path == "Cargo.toml")
1688            .unwrap();
1689        assert_eq!(cargo.role, "config");
1690        assert!(cargo.summary.is_some());
1691    }
1692
1693    #[test]
1694    fn test_workspace_detection() {
1695        let tmp = tempfile::tempdir().unwrap();
1696        fs::write(
1697            tmp.path().join("Cargo.toml"),
1698            "[workspace]\nmembers = [\"crates/*\"]\n",
1699        )
1700        .unwrap();
1701
1702        let scout = Scout::new(tmp.path());
1703        let snapshot = scout.scan().unwrap();
1704
1705        assert!(snapshot
1706            .patterns
1707            .iter()
1708            .any(|p| p.name == "Rust workspace (monorepo)"));
1709    }
1710
1711    #[test]
1712    fn test_lib_binary_detection() {
1713        let tmp = tempfile::tempdir().unwrap();
1714        let src = tmp.path().join("src");
1715        fs::create_dir_all(&src).unwrap();
1716        fs::write(tmp.path().join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap();
1717        fs::write(src.join("lib.rs"), "pub fn foo() {}").unwrap();
1718        fs::write(src.join("main.rs"), "fn main() {}").unwrap();
1719
1720        let scout = Scout::new(tmp.path());
1721        let snapshot = scout.scan().unwrap();
1722
1723        assert!(snapshot
1724            .patterns
1725            .iter()
1726            .any(|p| p.name == "Lib+Binary Rust crate"));
1727    }
1728
1729    #[test]
1730    fn test_ci_detection() {
1731        let tmp = tempfile::tempdir().unwrap();
1732        let workflows = tmp.path().join(".github/workflows");
1733        fs::create_dir_all(&workflows).unwrap();
1734        fs::write(workflows.join("ci.yml"), "name: CI\non: push\n").unwrap();
1735        fs::write(tmp.path().join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap();
1736
1737        let scout = Scout::new(tmp.path());
1738        let snapshot = scout.scan().unwrap();
1739
1740        assert!(snapshot
1741            .patterns
1742            .iter()
1743            .any(|p| p.name == "GitHub Actions CI"));
1744        assert!(snapshot
1745            .key_files
1746            .iter()
1747            .any(|kf| kf.role == "ci" && kf.path.contains("workflows")));
1748    }
1749
1750    #[test]
1751    fn test_docker_detection() {
1752        let tmp = tempfile::tempdir().unwrap();
1753        fs::write(tmp.path().join("Dockerfile"), "FROM rust:1.75\n").unwrap();
1754        fs::write(tmp.path().join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap();
1755
1756        let scout = Scout::new(tmp.path());
1757        let snapshot = scout.scan().unwrap();
1758
1759        assert!(snapshot.patterns.iter().any(|p| p.name == "Dockerized"));
1760    }
1761
1762    #[test]
1763    fn test_mvc_detection() {
1764        let tmp = tempfile::tempdir().unwrap();
1765        for dir in &["controllers", "models", "views", "src"] {
1766            fs::create_dir_all(tmp.path().join(dir)).unwrap();
1767        }
1768        fs::write(
1769            tmp.path().join("package.json"),
1770            r#"{"dependencies": {"express": "^4.0.0"}}"#,
1771        )
1772        .unwrap();
1773
1774        let scout = Scout::new(tmp.path());
1775        let snapshot = scout.scan().unwrap();
1776
1777        assert!(snapshot
1778            .patterns
1779            .iter()
1780            .any(|p| p.name == "MVC architecture"));
1781    }
1782
1783    #[test]
1784    fn test_format_bytes() {
1785        assert_eq!(format_bytes(0), "0 B");
1786        assert_eq!(format_bytes(512), "512 B");
1787        assert_eq!(format_bytes(1024), "1.0 KB");
1788        assert_eq!(format_bytes(1536), "1.5 KB");
1789        assert_eq!(format_bytes(1048576), "1.0 MB");
1790        assert_eq!(format_bytes(1073741824), "1.0 GB");
1791    }
1792
1793    #[test]
1794    fn test_capitalize() {
1795        assert_eq!(capitalize("language"), "Language");
1796        assert_eq!(capitalize("framework"), "Framework");
1797        assert_eq!(capitalize(""), "");
1798    }
1799
1800    #[test]
1801    fn test_config_default() {
1802        let config = ScoutConfig::default();
1803        assert_eq!(config.max_depth, 6);
1804        assert!(config.ignore.contains(&".git".to_string()));
1805        assert!(config.ignore.contains(&"node_modules".to_string()));
1806    }
1807
1808    #[test]
1809    fn test_config_serde_roundtrip() {
1810        let config = ScoutConfig {
1811            root: PathBuf::from("/tmp/project"),
1812            max_depth: 4,
1813            max_sample_bytes: 256 * 1024,
1814            max_tree_files: 100,
1815            ignore: vec![".git".into()],
1816        };
1817
1818        let json = serde_json::to_string(&config).unwrap();
1819        let parsed: ScoutConfig = serde_json::from_str(&json).unwrap();
1820        assert_eq!(parsed.root, config.root);
1821        assert_eq!(parsed.max_depth, 4);
1822        assert_eq!(parsed.max_tree_files, 100);
1823    }
1824
1825    #[test]
1826    fn test_snapshot_serde_roundtrip() {
1827        let tmp = tempfile::tempdir().unwrap();
1828        let src = tmp.path().join("src");
1829        fs::create_dir_all(&src).unwrap();
1830        fs::write(tmp.path().join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap();
1831        fs::write(src.join("main.rs"), "fn main() {}").unwrap();
1832
1833        let scout = Scout::new(tmp.path());
1834        let snapshot = scout.scan().unwrap();
1835
1836        let json = serde_json::to_string_pretty(&snapshot).unwrap();
1837        let parsed: CodebaseSnapshot = serde_json::from_str(&json).unwrap();
1838        assert_eq!(parsed.root, snapshot.root);
1839        assert_eq!(parsed.total_files, snapshot.total_files);
1840        assert_eq!(parsed.languages.len(), snapshot.languages.len());
1841        assert_eq!(parsed.patterns.len(), snapshot.patterns.len());
1842    }
1843
1844    #[test]
1845    fn test_scan_cargo_workspace_with_members() {
1846        let tmp = tempfile::tempdir().unwrap();
1847        let crates_dir = tmp.path().join("crates");
1848        let crate_a = crates_dir.join("crate-a");
1849        let crate_b = crates_dir.join("crate-b");
1850        fs::create_dir_all(crate_a.join("src")).unwrap();
1851        fs::create_dir_all(crate_b.join("src")).unwrap();
1852
1853        fs::write(
1854            tmp.path().join("Cargo.toml"),
1855            "[workspace]\nmembers = [\"crates/*\"]\n",
1856        )
1857        .unwrap();
1858        fs::write(
1859            crate_a.join("Cargo.toml"),
1860            "[package]\nname = \"crate-a\"\nversion = \"0.1.0\"\n\n[dependencies]\nserde = \"1\"\n",
1861        )
1862        .unwrap();
1863        fs::write(crate_a.join("src/lib.rs"), "pub fn a() {}").unwrap();
1864        fs::write(
1865            crate_b.join("Cargo.toml"),
1866            "[package]\nname = \"crate-b\"\nversion = \"0.1.0\"\n\n[dependencies]\ntokio = \"1\"\n",
1867        )
1868        .unwrap();
1869        fs::write(crate_b.join("src/lib.rs"), "pub fn b() {}").unwrap();
1870
1871        let scout = Scout::new(tmp.path());
1872        let snapshot = scout.scan().unwrap();
1873
1874        assert!(snapshot.total_files >= 5);
1875        assert!(snapshot
1876            .dependencies
1877            .iter()
1878            .any(|d| d.contains("serde")));
1879        assert!(snapshot
1880            .dependencies
1881            .iter()
1882            .any(|d| d.contains("tokio")));
1883        assert!(snapshot
1884            .patterns
1885            .iter()
1886            .any(|p| p.name == "Rust workspace (monorepo)"));
1887        assert!(snapshot
1888            .patterns
1889            .iter()
1890            .any(|p| p.name == "Async Rust (Tokio)"));
1891    }
1892
1893    #[test]
1894    fn test_agent_conventions_detection() {
1895        let tmp = tempfile::tempdir().unwrap();
1896        fs::write(
1897            tmp.path().join("AGENTS.md"),
1898            "# Agent Conventions\nUse Rust.\n",
1899        )
1900        .unwrap();
1901        fs::write(tmp.path().join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap();
1902
1903        let scout = Scout::new(tmp.path());
1904        let snapshot = scout.scan().unwrap();
1905
1906        assert!(snapshot
1907            .patterns
1908            .iter()
1909            .any(|p| p.name == "AI agent conventions"));
1910    }
1911
1912    #[test]
1913    fn test_multi_language_project() {
1914        let tmp = tempfile::tempdir().unwrap();
1915        let src = tmp.path().join("src");
1916        let frontend = tmp.path().join("frontend/src");
1917        let scripts = tmp.path().join("scripts");
1918        fs::create_dir_all(&src).unwrap();
1919        fs::create_dir_all(&frontend).unwrap();
1920        fs::create_dir_all(&scripts).unwrap();
1921
1922        fs::write(tmp.path().join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap();
1923        fs::write(src.join("main.rs"), "fn main() {}").unwrap();
1924        fs::write(
1925            frontend.join("App.tsx"),
1926            "export default function App() {}",
1927        )
1928        .unwrap();
1929        fs::write(
1930            scripts.join("build.py"),
1931            "#!/usr/bin/env python3\nprint('hi')\n",
1932        )
1933        .unwrap();
1934
1935        let scout = Scout::new(tmp.path());
1936        let snapshot = scout.scan().unwrap();
1937
1938        let lang_names: Vec<&str> = snapshot
1939            .languages
1940            .iter()
1941            .map(|l| l.language.as_str())
1942            .collect();
1943        assert!(lang_names.contains(&"Rust"));
1944        assert!(lang_names.contains(&"TypeScript"));
1945        assert!(lang_names.contains(&"Python"));
1946    }
1947}