Skip to main content

gobby_code/index/
walker.rs

1//! Git-aware file discovery using the `ignore` crate.
2//! Respects .gitignore and exclude patterns.
3
4use std::collections::BTreeSet;
5use std::io::Read;
6use std::path::{Component, Path, PathBuf};
7
8use crate::index::languages;
9use crate::index::security;
10
11/// Maximum file size to index (10 MB).
12const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
13const GENERATED_JS_MARKER_SCAN_BYTES: usize = 64 * 1024;
14const GENERATED_JS_ANALYSIS_READ_BYTES: u64 = 256 * 1024;
15const MINIFIED_JS_MIN_BYTES: usize = 128 * 1024;
16const MINIFIED_JS_LONG_LINE_BYTES: usize = 20 * 1024;
17const MINIFIED_JS_MAX_LINES: usize = 20;
18const MINIFIED_JS_AVG_LINE_BYTES: usize = 2 * 1024;
19const GCODE_CONFIG_PATH: &str = ".gobby/gcode.json";
20const DEFAULT_HIDDEN_ALLOWLIST_PATTERNS: &[&str] = &[
21    ".gobby/plans/**/*.md",
22    ".github/workflows/**/*.yml",
23    ".github/workflows/**/*.yaml",
24];
25const GENERATED_JS_MARKERS: &[&str] = &[
26    "generated by",
27    "do not edit",
28    "@generated",
29    "auto-generated",
30    "automatically generated",
31];
32
33/// How a file should be indexed.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum FileClassification {
36    Ast,
37    ContentOnly,
38}
39
40/// Discover files eligible for indexing under `root`.
41/// Returns (ast_candidates, content_only_candidates) as absolute paths.
42pub fn discover_files(root: &Path, exclude_patterns: &[String]) -> (Vec<PathBuf>, Vec<PathBuf>) {
43    let mut candidates = Vec::new();
44    let mut content_only = Vec::new();
45    let mut seen = BTreeSet::new();
46
47    let mut settings = gobby_core::indexing::WalkerSettings::new(root);
48    settings.max_filesize = Some(MAX_FILE_SIZE);
49    let mut builder = settings.into_walker();
50    builder.hidden(true);
51    let walker = builder.build();
52
53    for entry in walker.flatten() {
54        let path = entry.path();
55        if !path.is_file() {
56            continue;
57        }
58
59        push_classified_file(
60            root,
61            path,
62            exclude_patterns,
63            &mut candidates,
64            &mut content_only,
65            &mut seen,
66        );
67    }
68
69    let hidden_allowlist = HiddenPathAllowlist::load(root);
70    for path in hidden_allowlist.discover(root) {
71        push_classified_file(
72            root,
73            &path,
74            exclude_patterns,
75            &mut candidates,
76            &mut content_only,
77            &mut seen,
78        );
79    }
80
81    (candidates, content_only)
82}
83
84/// Classify an individual file for indexing.
85pub fn classify_file(
86    root: &Path,
87    path: &Path,
88    exclude_patterns: &[String],
89) -> Option<FileClassification> {
90    if !is_safe_text_file(root, path, exclude_patterns) {
91        return None;
92    }
93    if is_generated_js_bundle(path) {
94        return None;
95    }
96
97    if is_hidden_metadata_content_only(root, path) {
98        return Some(FileClassification::ContentOnly);
99    }
100
101    if languages::detect_language(&path.to_string_lossy()).is_some() {
102        Some(FileClassification::Ast)
103    } else {
104        Some(FileClassification::ContentOnly)
105    }
106}
107
108/// Return true when `path` is an unsupported, safe text file suitable for chunks.
109pub fn is_content_indexable(root: &Path, path: &Path, exclude_patterns: &[String]) -> bool {
110    matches!(
111        classify_file(root, path, exclude_patterns),
112        Some(FileClassification::ContentOnly)
113    )
114}
115
116/// Language label for content-only files.
117pub fn content_language(path: &Path) -> String {
118    let extension = path
119        .extension()
120        .map(|e| e.to_string_lossy().to_lowercase())
121        .filter(|ext| !ext.is_empty())
122        .unwrap_or_else(|| "text".to_string());
123
124    match extension.as_str() {
125        "md" | "markdown" => "markdown".to_string(),
126        "yml" | "yaml" => "yaml".to_string(),
127        _ => extension,
128    }
129}
130
131fn push_classified_file(
132    root: &Path,
133    path: &Path,
134    exclude_patterns: &[String],
135    candidates: &mut Vec<PathBuf>,
136    content_only: &mut Vec<PathBuf>,
137    seen: &mut BTreeSet<PathBuf>,
138) {
139    let key = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
140    if !seen.insert(key) {
141        return;
142    }
143
144    match classify_file(root, path, exclude_patterns) {
145        Some(FileClassification::Ast) => candidates.push(path.to_path_buf()),
146        Some(FileClassification::ContentOnly) => content_only.push(path.to_path_buf()),
147        None => {}
148    }
149}
150
151#[derive(Debug, Clone, PartialEq, Eq)]
152struct HiddenPathAllowlist {
153    patterns: Vec<String>,
154}
155
156impl HiddenPathAllowlist {
157    fn load(root: &Path) -> Self {
158        let mut patterns = DEFAULT_HIDDEN_ALLOWLIST_PATTERNS
159            .iter()
160            .map(|pattern| (*pattern).to_string())
161            .collect::<Vec<_>>();
162        patterns.extend(read_project_hidden_allowlist(root));
163        Self::from_patterns(patterns)
164    }
165
166    fn from_patterns(patterns: Vec<String>) -> Self {
167        let patterns = patterns
168            .into_iter()
169            .map(|pattern| pattern.trim().replace('\\', "/"))
170            .filter(|pattern| is_valid_allowlist_pattern(pattern))
171            .flat_map(|pattern| expand_zero_depth_globstar(&pattern))
172            .collect();
173        Self { patterns }
174    }
175
176    fn discover(&self, root: &Path) -> Vec<PathBuf> {
177        let mut paths = BTreeSet::new();
178        for pattern in &self.patterns {
179            let Some(abs_pattern) = absolute_glob_pattern(root, pattern) else {
180                continue;
181            };
182            let Ok(entries) = glob::glob(&abs_pattern) else {
183                continue;
184            };
185            for entry in entries.flatten() {
186                if entry.is_file() && is_hidden_path(root, &entry) {
187                    paths.insert(entry);
188                }
189            }
190        }
191        paths.into_iter().collect()
192    }
193}
194
195fn read_project_hidden_allowlist(root: &Path) -> Vec<String> {
196    let Ok(contents) = std::fs::read_to_string(root.join(GCODE_CONFIG_PATH)) else {
197        return Vec::new();
198    };
199    let Ok(json) = serde_json::from_str::<serde_json::Value>(&contents) else {
200        return Vec::new();
201    };
202    json.get("index")
203        .and_then(|index| index.get("hidden_allowlist"))
204        .and_then(|allowlist| allowlist.as_array())
205        .into_iter()
206        .flatten()
207        .filter_map(|value| value.as_str().map(ToOwned::to_owned))
208        .collect()
209}
210
211fn is_valid_allowlist_pattern(pattern: &str) -> bool {
212    if pattern.is_empty() {
213        return false;
214    }
215    let path = Path::new(pattern);
216    !path.is_absolute()
217        && !path.components().any(|component| {
218            matches!(
219                component,
220                Component::ParentDir | Component::Prefix(_) | Component::RootDir
221            )
222        })
223}
224
225fn expand_zero_depth_globstar(pattern: &str) -> Vec<String> {
226    let mut expanded = vec![pattern.to_string()];
227    if let Some((prefix, suffix)) = pattern.split_once("/**/") {
228        expanded.push(format!("{prefix}/{suffix}"));
229    }
230    expanded
231}
232
233fn absolute_glob_pattern(root: &Path, pattern: &str) -> Option<String> {
234    let root = root.to_str()?;
235    Some(format!("{}/{}", glob::Pattern::escape(root), pattern))
236}
237
238fn is_hidden_path(root: &Path, path: &Path) -> bool {
239    let rel = path.strip_prefix(root).unwrap_or(path);
240    rel.components().any(|component| {
241        component
242            .as_os_str()
243            .to_str()
244            .is_some_and(|name| name.starts_with('.') && name != "." && name != "..")
245    })
246}
247
248fn is_hidden_metadata_content_only(root: &Path, path: &Path) -> bool {
249    let rel = path.strip_prefix(root).unwrap_or(path);
250    let components = rel
251        .components()
252        .filter_map(|component| match component {
253            Component::Normal(value) => value.to_str(),
254            _ => None,
255        })
256        .collect::<Vec<_>>();
257
258    if components.len() >= 3
259        && components[0] == ".gobby"
260        && components[1] == "plans"
261        && path_has_extension(path, &["md"])
262    {
263        return true;
264    }
265
266    components.len() >= 3
267        && components[0] == ".github"
268        && components[1] == "workflows"
269        && path_has_extension(path, &["yml", "yaml"])
270}
271
272fn path_has_extension(path: &Path, extensions: &[&str]) -> bool {
273    path.extension()
274        .and_then(|extension| extension.to_str())
275        .map(|extension| {
276            let extension = extension.to_ascii_lowercase();
277            extensions.contains(&extension.as_str())
278        })
279        .unwrap_or(false)
280}
281
282fn is_safe_text_file(root: &Path, path: &Path, exclude_patterns: &[String]) -> bool {
283    if !path.is_file() {
284        return false;
285    }
286    if !security::validate_path(path, root) {
287        return false;
288    }
289    if !security::is_symlink_safe(path, root) {
290        return false;
291    }
292    if security::should_exclude_path(root, path, exclude_patterns) {
293        return false;
294    }
295    if security::has_secret_extension(path) {
296        return false;
297    }
298
299    let Ok(meta) = path.metadata() else {
300        return false;
301    };
302    if meta.len() == 0 || meta.len() > MAX_FILE_SIZE {
303        return false;
304    }
305
306    !security::is_binary(path)
307}
308
309fn is_generated_js_bundle(path: &Path) -> bool {
310    if !is_js_family_file(path) {
311        return false;
312    }
313
314    let Ok(metadata) = path.metadata() else {
315        return false;
316    };
317    let Ok(bytes) = read_file_prefix(path, GENERATED_JS_ANALYSIS_READ_BYTES) else {
318        return false;
319    };
320    if contains_generated_js_marker(&bytes) {
321        return true;
322    }
323
324    if metadata.len() < MINIFIED_JS_MIN_BYTES as u64 {
325        return false;
326    };
327
328    looks_minified_js_bundle(&bytes)
329}
330
331fn read_file_prefix(path: &Path, max_bytes: u64) -> std::io::Result<Vec<u8>> {
332    let mut file = std::fs::File::open(path)?;
333    let mut bytes = Vec::with_capacity(max_bytes.min(usize::MAX as u64) as usize);
334    file.by_ref().take(max_bytes).read_to_end(&mut bytes)?;
335    Ok(bytes)
336}
337
338fn is_js_family_file(path: &Path) -> bool {
339    path.extension()
340        .and_then(|ext| ext.to_str())
341        .map(|ext| {
342            matches!(
343                ext.to_ascii_lowercase().as_str(),
344                "js" | "jsx" | "cjs" | "mjs"
345            )
346        })
347        .unwrap_or(false)
348}
349
350fn contains_generated_js_marker(bytes: &[u8]) -> bool {
351    let scan_len = bytes.len().min(GENERATED_JS_MARKER_SCAN_BYTES);
352    let scan = String::from_utf8_lossy(&bytes[..scan_len]).to_ascii_lowercase();
353    GENERATED_JS_MARKERS
354        .iter()
355        .any(|marker| scan.contains(marker))
356}
357
358fn looks_minified_js_bundle(bytes: &[u8]) -> bool {
359    if bytes.len() < MINIFIED_JS_MIN_BYTES {
360        return false;
361    }
362
363    let mut line_count = 0usize;
364    let mut total_line_bytes = 0usize;
365    let mut longest_line_bytes = 0usize;
366    for line in bytes.split(|byte| *byte == b'\n') {
367        let line_len = line.len();
368        if line_len == 0 {
369            continue;
370        }
371        line_count += 1;
372        total_line_bytes += line_len;
373        longest_line_bytes = longest_line_bytes.max(line_len);
374    }
375
376    if line_count == 0 {
377        return false;
378    }
379
380    longest_line_bytes >= MINIFIED_JS_LONG_LINE_BYTES
381        || (line_count <= MINIFIED_JS_MAX_LINES
382            && total_line_bytes / line_count >= MINIFIED_JS_AVG_LINE_BYTES)
383}
384
385#[cfg(test)]
386mod tests {
387    use super::*;
388
389    fn write_file(root: &Path, rel: &str, contents: &[u8]) {
390        let path = root.join(rel);
391        if let Some(parent) = path.parent() {
392            std::fs::create_dir_all(parent).expect("create parent");
393        }
394        std::fs::write(path, contents).expect("write file");
395    }
396
397    fn rels(root: &Path, paths: Vec<PathBuf>) -> Vec<String> {
398        let mut rels: Vec<String> = paths
399            .into_iter()
400            .map(|path| {
401                path.strip_prefix(root)
402                    .expect("path under root")
403                    .to_string_lossy()
404                    .to_string()
405            })
406            .collect();
407        rels.sort();
408        rels
409    }
410
411    #[test]
412    fn discovers_ast_and_content_only_text_files() {
413        let tmp = tempfile::tempdir().expect("tempdir");
414        let root = tmp.path();
415        write_file(root, "README.md", b"# Title\n");
416        write_file(root, "skills/gcode/SKILL.md", b"# gcode\n");
417        write_file(root, "src/lib.rs", b"fn main() {}\n");
418        write_file(root, "src/module.mjs", b"export const value = 1;\n");
419        write_file(root, "docs/reference.markdown", b"# Reference\n");
420        write_file(root, "docs/guide.rst", b"Guide\n=====\n");
421        write_file(root, "notes.txt", b"plain notes\n");
422        write_file(root, "config/app.properties", b"mode=dev\n");
423        write_file(root, "config/app.toml", b"mode = 'dev'\n");
424        write_file(root, "scripts/setup.sh", b"#!/usr/bin/env bash\n");
425        write_file(root, "Dockerfile", b"FROM rust:latest\n");
426        write_file(root, "image.bin", b"PNG\0binary");
427        write_file(root, "api_key.txt", b"secret-ish\n");
428        write_file(root, "target/generated.txt", b"generated\n");
429
430        let excludes = vec!["target".to_string()];
431        let (ast, content_only) = discover_files(root, &excludes);
432
433        // discover_files omits api_key.txt via the security module
434        // (SECRET_SUBSTRINGS matches "api_key"), image.bin via binary
435        // detection, and target/* via the explicit excludes vector.
436        assert_eq!(rels(root, ast), vec!["src/lib.rs", "src/module.mjs"]);
437        assert_eq!(
438            rels(root, content_only),
439            vec![
440                "Dockerfile",
441                "README.md",
442                "config/app.properties",
443                "config/app.toml",
444                "docs/guide.rst",
445                "docs/reference.markdown",
446                "notes.txt",
447                "scripts/setup.sh",
448                "skills/gcode/SKILL.md"
449            ]
450        );
451    }
452
453    #[test]
454    fn classifies_extensionless_text_as_content_only() {
455        let tmp = tempfile::tempdir().expect("tempdir");
456        let root = tmp.path();
457        write_file(root, "Makefile", b"test:\n\tcargo test\n");
458        let excludes = Vec::new();
459
460        assert_eq!(
461            classify_file(root, &root.join("Makefile"), &excludes),
462            Some(FileClassification::ContentOnly)
463        );
464        assert_eq!(content_language(&root.join("Makefile")), "text");
465    }
466
467    #[test]
468    fn classifies_markdown_content_language_as_markdown() {
469        assert_eq!(content_language(Path::new("README.md")), "markdown");
470        assert_eq!(
471            content_language(Path::new("docs/guide.markdown")),
472            "markdown"
473        );
474        assert_eq!(
475            content_language(Path::new("skills/gcode/SKILL.md")),
476            "markdown"
477        );
478    }
479
480    #[test]
481    fn classifies_yaml_content_language_as_yaml() {
482        assert_eq!(
483            content_language(Path::new(".github/workflows/ci.yml")),
484            "yaml"
485        );
486        assert_eq!(
487            content_language(Path::new(".github/workflows/release.yaml")),
488            "yaml"
489        );
490    }
491
492    #[test]
493    fn classifies_mjs_as_ast_and_markdown_as_content_only() {
494        let tmp = tempfile::tempdir().expect("tempdir");
495        let root = tmp.path();
496        write_file(root, "src/module.mjs", b"export const value = 1;\n");
497        write_file(root, "README.md", b"# Title\n");
498        write_file(root, "docs/guide.markdown", b"# Guide\n");
499        let excludes = Vec::new();
500
501        assert_eq!(
502            classify_file(root, &root.join("src/module.mjs"), &excludes),
503            Some(FileClassification::Ast)
504        );
505        assert_eq!(
506            classify_file(root, &root.join("README.md"), &excludes),
507            Some(FileClassification::ContentOnly)
508        );
509        assert_eq!(
510            classify_file(root, &root.join("docs/guide.markdown"), &excludes),
511            Some(FileClassification::ContentOnly)
512        );
513    }
514
515    #[test]
516    fn classifies_github_workflow_yaml_as_content_only() {
517        let tmp = tempfile::tempdir().expect("tempdir");
518        let root = tmp.path();
519        write_file(root, ".github/workflows/ci.yml", b"name: ci\n");
520        write_file(root, ".github/workflows/release.yaml", b"name: release\n");
521        let excludes = Vec::new();
522
523        assert_eq!(
524            classify_file(root, &root.join(".github/workflows/ci.yml"), &excludes),
525            Some(FileClassification::ContentOnly)
526        );
527        assert_eq!(
528            classify_file(
529                root,
530                &root.join(".github/workflows/release.yaml"),
531                &excludes
532            ),
533            Some(FileClassification::ContentOnly)
534        );
535    }
536
537    #[test]
538    fn discovers_default_hidden_metadata_allowlist() {
539        let tmp = tempfile::tempdir().expect("tempdir");
540        let root = tmp.path();
541        write_file(root, "src/lib.rs", b"fn main() {}\n");
542        write_file(root, ".gobby/plans/foo.md", b"# Plan\n");
543        write_file(root, ".gobby/plans/nested/bar.md", b"# Nested\n");
544        write_file(root, ".github/workflows/ci.yml", b"name: ci\n");
545        write_file(root, ".github/workflows/release.yaml", b"name: release\n");
546
547        let (ast, content_only) = discover_files(root, &[]);
548
549        assert_eq!(rels(root, ast), vec!["src/lib.rs"]);
550        assert_eq!(
551            rels(root, content_only),
552            vec![
553                ".github/workflows/ci.yml",
554                ".github/workflows/release.yaml",
555                ".gobby/plans/foo.md",
556                ".gobby/plans/nested/bar.md",
557            ]
558        );
559    }
560
561    #[test]
562    fn skips_non_allowlisted_hidden_metadata_by_default() {
563        let tmp = tempfile::tempdir().expect("tempdir");
564        let root = tmp.path();
565        write_file(root, ".github/ISSUE_TEMPLATE/bug.md", b"# Bug\n");
566        write_file(root, ".gobby/gcode.json", br#"{"id":"project"}"#);
567        write_file(root, ".gobby/project.json", br#"{"id":"project"}"#);
568        write_file(root, ".gobby/wiki/page.md", b"# Wiki\n");
569        write_file(root, ".gobby/screenshots/shot.md", b"# Screenshot\n");
570        write_file(root, ".gobby/tasks.jsonl", b"{}\n");
571        write_file(root, ".gobby/memories.jsonl", b"{}\n");
572
573        let (ast, content_only) = discover_files(root, &[]);
574
575        assert!(rels(root, ast).is_empty());
576        assert!(rels(root, content_only).is_empty());
577    }
578
579    #[test]
580    fn discovers_project_hidden_allowlist_from_gcode_json() {
581        let tmp = tempfile::tempdir().expect("tempdir");
582        let root = tmp.path();
583        write_file(
584            root,
585            ".gobby/gcode.json",
586            br#"{"index":{"hidden_allowlist":[".custom/agent-docs/**/*.md"]}}"#,
587        );
588        write_file(root, ".custom/agent-docs/guide.md", b"# Guide\n");
589        write_file(root, ".custom/agent-docs/nested/runbook.md", b"# Runbook\n");
590        write_file(root, ".custom/other.md", b"# Other\n");
591
592        let (ast, content_only) = discover_files(root, &[]);
593
594        assert!(rels(root, ast).is_empty());
595        assert_eq!(
596            rels(root, content_only),
597            vec![
598                ".custom/agent-docs/guide.md",
599                ".custom/agent-docs/nested/runbook.md",
600            ]
601        );
602    }
603
604    #[test]
605    fn excludes_win_over_allowlisted_hidden_paths() {
606        let tmp = tempfile::tempdir().expect("tempdir");
607        let root = tmp.path();
608        write_file(root, ".gobby/plans/foo.md", b"# Plan\n");
609        write_file(root, ".github/workflows/ci.yml", b"name: ci\n");
610
611        let excludes = vec![".gobby".to_string(), "workflows".to_string()];
612        let (ast, content_only) = discover_files(root, &excludes);
613
614        assert!(rels(root, ast).is_empty());
615        assert!(rels(root, content_only).is_empty());
616    }
617
618    #[test]
619    fn skips_js_family_files_with_generated_markers() {
620        let tmp = tempfile::tempdir().expect("tempdir");
621        let root = tmp.path();
622        let excludes = Vec::new();
623
624        for (rel, marker) in [
625            ("src/setup.mjs", "Generated by gcode setup"),
626            ("src/app.js", "DO NOT EDIT"),
627            ("src/view.jsx", "@generated"),
628            ("src/runtime.cjs", "auto-generated"),
629        ] {
630            write_file(
631                root,
632                rel,
633                format!("// {marker}\nexport const value = 1;\n").as_bytes(),
634            );
635            assert_eq!(classify_file(root, &root.join(rel), &excludes), None);
636        }
637    }
638
639    #[test]
640    fn keeps_ordinary_mjs_source_ast_indexable() {
641        let tmp = tempfile::tempdir().expect("tempdir");
642        let root = tmp.path();
643        write_file(
644            root,
645            "src/config.mjs",
646            b"export function loadConfig() {\n  return { mode: 'dev' };\n}\n",
647        );
648        let excludes = Vec::new();
649
650        assert_eq!(
651            classify_file(root, &root.join("src/config.mjs"), &excludes),
652            Some(FileClassification::Ast)
653        );
654    }
655
656    #[test]
657    fn skips_large_minified_js_bundles() {
658        let tmp = tempfile::tempdir().expect("tempdir");
659        let root = tmp.path();
660        let mut bundle = b"var bundle='".to_vec();
661        bundle.extend(std::iter::repeat_n(b'a', MINIFIED_JS_MIN_BYTES));
662        bundle.extend(b"';\n");
663        write_file(root, "src/bundle.js", &bundle);
664        let excludes = Vec::new();
665
666        assert_eq!(
667            classify_file(root, &root.join("src/bundle.js"), &excludes),
668            None
669        );
670    }
671
672    #[test]
673    fn skips_single_line_minified_js_bundle_with_newline() {
674        let tmp = tempfile::tempdir().expect("tempdir");
675        let root = tmp.path();
676        let mut bundle = b"(()=>{const bundle='".to_vec();
677        bundle.extend(std::iter::repeat_n(b'a', MINIFIED_JS_MIN_BYTES));
678        bundle.extend(b"';})();\n");
679        write_file(root, "dist/app.js", &bundle);
680        let excludes = Vec::new();
681
682        assert_eq!(
683            classify_file(root, &root.join("dist/app.js"), &excludes),
684            None
685        );
686    }
687
688    #[test]
689    fn skips_single_line_minified_js_bundle_without_newline() {
690        let tmp = tempfile::tempdir().expect("tempdir");
691        let root = tmp.path();
692        let mut bundle = b"(()=>{const bundle='".to_vec();
693        bundle.extend(std::iter::repeat_n(b'a', MINIFIED_JS_MIN_BYTES));
694        bundle.extend(b"';})();");
695        write_file(root, "dist/app.js", &bundle);
696        let excludes = Vec::new();
697
698        assert_eq!(
699            classify_file(root, &root.join("dist/app.js"), &excludes),
700            None
701        );
702    }
703
704    #[test]
705    fn classifies_source_build_directory_as_ast_indexable() {
706        let tmp = tempfile::tempdir().expect("tempdir");
707        let root = tmp.path();
708        write_file(
709            root,
710            "src/gobby/build/workspaces.py",
711            b"class WorkspaceBuilder:\n    pass\n",
712        );
713        let excludes = vec!["build".to_string(), "dist".to_string()];
714
715        assert_eq!(
716            classify_file(root, &root.join("src/gobby/build/workspaces.py"), &excludes),
717            Some(FileClassification::Ast)
718        );
719    }
720
721    #[test]
722    fn skips_root_build_directory() {
723        let tmp = tempfile::tempdir().expect("tempdir");
724        let root = tmp.path();
725        write_file(root, "build/generated.py", b"class Generated:\n    pass\n");
726        let excludes = vec!["build".to_string(), "dist".to_string()];
727
728        assert_eq!(
729            classify_file(root, &root.join("build/generated.py"), &excludes),
730            None
731        );
732    }
733
734    #[test]
735    fn walker_consumes_gobby_core_walker_settings() {
736        let source = include_str!("walker.rs");
737        let settings = ["gobby_core", "::indexing::WalkerSettings"].concat();
738        let direct_builder = ["WalkBuilder", "::new(root)"].concat();
739
740        assert!(source.contains(&settings));
741        assert!(!source.contains(&direct_builder));
742    }
743}