Skip to main content

gobby_code/index/
walker.rs

1//! Git-aware file discovery using the `ignore` crate.
2//! Respects .gitignore and exclude patterns.
3
4use std::collections::BTreeSet;
5use std::io::Read;
6use std::path::{Component, Path, PathBuf};
7
8use crate::index::languages;
9use crate::index::security;
10
11/// Maximum file size to index (10 MB).
12const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
13const GENERATED_JS_MARKER_SCAN_BYTES: usize = 64 * 1024;
14const GENERATED_JS_ANALYSIS_READ_BYTES: u64 = 256 * 1024;
15const MINIFIED_JS_MIN_BYTES: usize = 128 * 1024;
16const MINIFIED_JS_LONG_LINE_BYTES: usize = 20 * 1024;
17const MINIFIED_JS_MAX_LINES: usize = 20;
18const MINIFIED_JS_AVG_LINE_BYTES: usize = 2 * 1024;
19const GCODE_CONFIG_PATH: &str = ".gobby/gcode.json";
20const DEFAULT_HIDDEN_ALLOWLIST_PATTERNS: &[&str] = &[
21    ".gobby/plans/**/*.md",
22    ".github/workflows/**/*.yml",
23    ".github/workflows/**/*.yaml",
24];
25const GENERATED_JS_MARKERS: &[&str] = &[
26    "generated by",
27    "do not edit",
28    "@generated",
29    "auto-generated",
30    "automatically generated",
31];
32
33/// How a file should be indexed.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum FileClassification {
36    Ast,
37    ContentOnly,
38}
39
40/// Discover files eligible for indexing under `root`.
41/// Returns (ast_candidates, content_only_candidates) as absolute paths.
42pub fn discover_files<S: AsRef<str>>(
43    root: &Path,
44    exclude_patterns: &[S],
45) -> (Vec<PathBuf>, Vec<PathBuf>) {
46    let mut candidates = Vec::new();
47    let mut content_only = Vec::new();
48    let mut seen = BTreeSet::new();
49
50    let mut settings = gobby_core::indexing::WalkerSettings::new(root);
51    settings.max_filesize = Some(MAX_FILE_SIZE);
52    let mut builder = settings.into_walker();
53    builder.hidden(true);
54    let walker = builder.build();
55
56    for entry in walker.flatten() {
57        let path = entry.path();
58        if !path.is_file() {
59            continue;
60        }
61
62        push_classified_file(
63            root,
64            path,
65            exclude_patterns,
66            &mut candidates,
67            &mut content_only,
68            &mut seen,
69        );
70    }
71
72    let hidden_allowlist = HiddenPathAllowlist::load(root);
73    for path in hidden_allowlist.discover(root) {
74        push_classified_file(
75            root,
76            &path,
77            exclude_patterns,
78            &mut candidates,
79            &mut content_only,
80            &mut seen,
81        );
82    }
83
84    (candidates, content_only)
85}
86
87/// Classify an individual file for indexing.
88pub fn classify_file(
89    root: &Path,
90    path: &Path,
91    exclude_patterns: &[impl AsRef<str>],
92) -> Option<FileClassification> {
93    if !is_safe_text_file(root, path, exclude_patterns) {
94        return None;
95    }
96    if is_generated_js_bundle(path) {
97        return None;
98    }
99
100    if is_hidden_metadata_content_only(root, path) {
101        return Some(FileClassification::ContentOnly);
102    }
103
104    if languages::detect_language(&path.to_string_lossy()).is_some() {
105        Some(FileClassification::Ast)
106    } else {
107        Some(FileClassification::ContentOnly)
108    }
109}
110
111/// Return true when `path` is an unsupported, safe text file suitable for chunks.
112pub fn is_content_indexable(
113    root: &Path,
114    path: &Path,
115    exclude_patterns: &[impl AsRef<str>],
116) -> bool {
117    matches!(
118        classify_file(root, path, exclude_patterns),
119        Some(FileClassification::ContentOnly)
120    )
121}
122
123/// Language label for content-only files.
124pub fn content_language(path: &Path) -> String {
125    let extension = path
126        .extension()
127        .map(|e| e.to_string_lossy().to_lowercase())
128        .filter(|ext| !ext.is_empty())
129        .unwrap_or_else(|| "text".to_string());
130
131    match extension.as_str() {
132        "md" | "markdown" => "markdown".to_string(),
133        "yml" | "yaml" => "yaml".to_string(),
134        _ => extension,
135    }
136}
137
138fn push_classified_file(
139    root: &Path,
140    path: &Path,
141    exclude_patterns: &[impl AsRef<str>],
142    candidates: &mut Vec<PathBuf>,
143    content_only: &mut Vec<PathBuf>,
144    seen: &mut BTreeSet<PathBuf>,
145) {
146    let key = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
147    if !seen.insert(key) {
148        return;
149    }
150
151    match classify_file(root, path, exclude_patterns) {
152        Some(FileClassification::Ast) => candidates.push(path.to_path_buf()),
153        Some(FileClassification::ContentOnly) => content_only.push(path.to_path_buf()),
154        None => {}
155    }
156}
157
158#[derive(Debug, Clone, PartialEq, Eq)]
159struct HiddenPathAllowlist {
160    patterns: Vec<String>,
161}
162
163impl HiddenPathAllowlist {
164    fn load(root: &Path) -> Self {
165        let mut patterns = DEFAULT_HIDDEN_ALLOWLIST_PATTERNS
166            .iter()
167            .map(|pattern| (*pattern).to_string())
168            .collect::<Vec<_>>();
169        patterns.extend(read_project_hidden_allowlist(root));
170        Self::from_patterns(patterns)
171    }
172
173    fn from_patterns(patterns: Vec<String>) -> Self {
174        let patterns = patterns
175            .into_iter()
176            .map(|pattern| pattern.trim().replace('\\', "/"))
177            .filter(|pattern| is_valid_allowlist_pattern(pattern))
178            .flat_map(|pattern| expand_zero_depth_globstar(&pattern))
179            .collect();
180        Self { patterns }
181    }
182
183    fn discover(&self, root: &Path) -> Vec<PathBuf> {
184        let mut paths = BTreeSet::new();
185        for pattern in &self.patterns {
186            let Some(abs_pattern) = absolute_glob_pattern(root, pattern) else {
187                continue;
188            };
189            let Ok(entries) = glob::glob(&abs_pattern) else {
190                continue;
191            };
192            for entry in entries.flatten() {
193                if entry.is_file() && is_hidden_path(root, &entry) {
194                    paths.insert(entry);
195                }
196            }
197        }
198        paths.into_iter().collect()
199    }
200}
201
202fn read_project_hidden_allowlist(root: &Path) -> Vec<String> {
203    let Ok(contents) = std::fs::read_to_string(root.join(GCODE_CONFIG_PATH)) else {
204        return Vec::new();
205    };
206    let Ok(json) = serde_json::from_str::<serde_json::Value>(&contents) else {
207        return Vec::new();
208    };
209    json.get("index")
210        .and_then(|index| index.get("hidden_allowlist"))
211        .and_then(|allowlist| allowlist.as_array())
212        .into_iter()
213        .flatten()
214        .filter_map(|value| value.as_str().map(ToOwned::to_owned))
215        .collect()
216}
217
218fn is_valid_allowlist_pattern(pattern: &str) -> bool {
219    if pattern.is_empty() {
220        return false;
221    }
222    let path = Path::new(pattern);
223    !path.is_absolute()
224        && !path.components().any(|component| {
225            matches!(
226                component,
227                Component::ParentDir | Component::Prefix(_) | Component::RootDir
228            )
229        })
230}
231
232fn expand_zero_depth_globstar(pattern: &str) -> Vec<String> {
233    let mut expanded = vec![pattern.to_string()];
234    if let Some((prefix, suffix)) = pattern.split_once("/**/") {
235        expanded.push(format!("{prefix}/{suffix}"));
236    }
237    expanded
238}
239
240fn absolute_glob_pattern(root: &Path, pattern: &str) -> Option<String> {
241    let root = root.to_str()?;
242    Some(format!("{}/{}", glob::Pattern::escape(root), pattern))
243}
244
245fn is_hidden_path(root: &Path, path: &Path) -> bool {
246    let rel = path.strip_prefix(root).unwrap_or(path);
247    rel.components().any(|component| {
248        component
249            .as_os_str()
250            .to_str()
251            .is_some_and(|name| name.starts_with('.') && name != "." && name != "..")
252    })
253}
254
255fn is_hidden_metadata_content_only(root: &Path, path: &Path) -> bool {
256    let rel = path.strip_prefix(root).unwrap_or(path);
257    let components = rel
258        .components()
259        .filter_map(|component| match component {
260            Component::Normal(value) => value.to_str(),
261            _ => None,
262        })
263        .collect::<Vec<_>>();
264
265    if components.len() >= 3
266        && components[0] == ".gobby"
267        && components[1] == "plans"
268        && path_has_extension(path, &["md"])
269    {
270        return true;
271    }
272
273    components.len() >= 3
274        && components[0] == ".github"
275        && components[1] == "workflows"
276        && path_has_extension(path, &["yml", "yaml"])
277}
278
279fn path_has_extension(path: &Path, extensions: &[&str]) -> bool {
280    path.extension()
281        .and_then(|extension| extension.to_str())
282        .map(|extension| {
283            let extension = extension.to_ascii_lowercase();
284            extensions.contains(&extension.as_str())
285        })
286        .unwrap_or(false)
287}
288
289fn is_safe_text_file(root: &Path, path: &Path, exclude_patterns: &[impl AsRef<str>]) -> bool {
290    if !path.is_file() {
291        return false;
292    }
293    if !security::validate_path(path, root) {
294        return false;
295    }
296    if !security::is_symlink_safe(path, root) {
297        return false;
298    }
299    if security::should_exclude_path(root, path, exclude_patterns) {
300        return false;
301    }
302    if security::has_secret_extension(path) {
303        return false;
304    }
305
306    let Ok(meta) = path.metadata() else {
307        return false;
308    };
309    if meta.len() == 0 || meta.len() > MAX_FILE_SIZE {
310        return false;
311    }
312
313    !security::is_binary(path)
314}
315
316fn is_generated_js_bundle(path: &Path) -> bool {
317    if !is_js_family_file(path) {
318        return false;
319    }
320
321    let Ok(metadata) = path.metadata() else {
322        return false;
323    };
324    let Ok(bytes) = read_file_prefix(path, GENERATED_JS_ANALYSIS_READ_BYTES) else {
325        return false;
326    };
327    if contains_generated_js_marker(&bytes) {
328        return true;
329    }
330
331    if metadata.len() < MINIFIED_JS_MIN_BYTES as u64 {
332        return false;
333    };
334
335    looks_minified_js_bundle(&bytes)
336}
337
338fn read_file_prefix(path: &Path, max_bytes: u64) -> std::io::Result<Vec<u8>> {
339    let mut file = std::fs::File::open(path)?;
340    let mut bytes = Vec::with_capacity(max_bytes.min(usize::MAX as u64) as usize);
341    file.by_ref().take(max_bytes).read_to_end(&mut bytes)?;
342    Ok(bytes)
343}
344
345fn is_js_family_file(path: &Path) -> bool {
346    path.extension()
347        .and_then(|ext| ext.to_str())
348        .map(|ext| {
349            matches!(
350                ext.to_ascii_lowercase().as_str(),
351                "js" | "jsx" | "cjs" | "mjs"
352            )
353        })
354        .unwrap_or(false)
355}
356
357fn contains_generated_js_marker(bytes: &[u8]) -> bool {
358    let scan_len = bytes.len().min(GENERATED_JS_MARKER_SCAN_BYTES);
359    let scan = String::from_utf8_lossy(&bytes[..scan_len]).to_ascii_lowercase();
360    GENERATED_JS_MARKERS
361        .iter()
362        .any(|marker| scan.contains(marker))
363}
364
365fn looks_minified_js_bundle(bytes: &[u8]) -> bool {
366    if bytes.len() < MINIFIED_JS_MIN_BYTES {
367        return false;
368    }
369
370    let mut line_count = 0usize;
371    let mut total_line_bytes = 0usize;
372    let mut longest_line_bytes = 0usize;
373    for line in bytes.split(|byte| *byte == b'\n') {
374        let line_len = line.len();
375        if line_len == 0 {
376            continue;
377        }
378        line_count += 1;
379        total_line_bytes += line_len;
380        longest_line_bytes = longest_line_bytes.max(line_len);
381    }
382
383    if line_count == 0 {
384        return false;
385    }
386
387    longest_line_bytes >= MINIFIED_JS_LONG_LINE_BYTES
388        || (line_count <= MINIFIED_JS_MAX_LINES
389            && total_line_bytes / line_count >= MINIFIED_JS_AVG_LINE_BYTES)
390}
391
392#[cfg(test)]
393mod tests {
394    use super::*;
395
396    fn write_file(root: &Path, rel: &str, contents: &[u8]) {
397        let path = root.join(rel);
398        if let Some(parent) = path.parent() {
399            std::fs::create_dir_all(parent).expect("create parent");
400        }
401        std::fs::write(path, contents).expect("write file");
402    }
403
404    fn rels(root: &Path, paths: Vec<PathBuf>) -> Vec<String> {
405        let mut rels: Vec<String> = paths
406            .into_iter()
407            .map(|path| {
408                path.strip_prefix(root)
409                    .expect("path under root")
410                    .to_string_lossy()
411                    .to_string()
412            })
413            .collect();
414        rels.sort();
415        rels
416    }
417
418    #[test]
419    fn discovers_ast_and_content_only_text_files() {
420        let tmp = tempfile::tempdir().expect("tempdir");
421        let root = tmp.path();
422        write_file(root, "README.md", b"# Title\n");
423        write_file(root, "skills/gcode/SKILL.md", b"# gcode\n");
424        write_file(root, "src/lib.rs", b"fn main() {}\n");
425        write_file(root, "src/module.mjs", b"export const value = 1;\n");
426        write_file(root, "docs/reference.markdown", b"# Reference\n");
427        write_file(root, "docs/guide.rst", b"Guide\n=====\n");
428        write_file(root, "notes.txt", b"plain notes\n");
429        write_file(root, "config/app.properties", b"mode=dev\n");
430        write_file(root, "config/app.toml", b"mode = 'dev'\n");
431        write_file(root, "scripts/setup.sh", b"#!/usr/bin/env bash\n");
432        write_file(root, "Dockerfile", b"FROM rust:latest\n");
433        write_file(root, "image.bin", b"PNG\0binary");
434        write_file(root, "api_key.txt", b"secret-ish\n");
435        write_file(root, "target/generated.txt", b"generated\n");
436
437        let excludes = vec!["target".to_string()];
438        let (ast, content_only) = discover_files(root, &excludes);
439
440        // discover_files omits api_key.txt via the security module
441        // (SECRET_SUBSTRINGS matches "api_key"), image.bin via binary
442        // detection, and target/* via the explicit excludes vector.
443        assert_eq!(rels(root, ast), vec!["src/lib.rs", "src/module.mjs"]);
444        assert_eq!(
445            rels(root, content_only),
446            vec![
447                "Dockerfile",
448                "README.md",
449                "config/app.properties",
450                "config/app.toml",
451                "docs/guide.rst",
452                "docs/reference.markdown",
453                "notes.txt",
454                "scripts/setup.sh",
455                "skills/gcode/SKILL.md"
456            ]
457        );
458    }
459
460    #[test]
461    fn classifies_extensionless_text_as_content_only() {
462        let tmp = tempfile::tempdir().expect("tempdir");
463        let root = tmp.path();
464        write_file(root, "Makefile", b"test:\n\tcargo test\n");
465        let excludes: Vec<&str> = Vec::new();
466
467        assert_eq!(
468            classify_file(root, &root.join("Makefile"), &excludes),
469            Some(FileClassification::ContentOnly)
470        );
471        assert_eq!(content_language(&root.join("Makefile")), "text");
472    }
473
474    #[test]
475    fn classifies_markdown_content_language_as_markdown() {
476        assert_eq!(content_language(Path::new("README.md")), "markdown");
477        assert_eq!(
478            content_language(Path::new("docs/guide.markdown")),
479            "markdown"
480        );
481        assert_eq!(
482            content_language(Path::new("skills/gcode/SKILL.md")),
483            "markdown"
484        );
485    }
486
487    #[test]
488    fn classifies_yaml_content_language_as_yaml() {
489        assert_eq!(
490            content_language(Path::new(".github/workflows/ci.yml")),
491            "yaml"
492        );
493        assert_eq!(
494            content_language(Path::new(".github/workflows/release.yaml")),
495            "yaml"
496        );
497    }
498
499    #[test]
500    fn classifies_mjs_as_ast_and_markdown_as_content_only() {
501        let tmp = tempfile::tempdir().expect("tempdir");
502        let root = tmp.path();
503        write_file(root, "src/module.mjs", b"export const value = 1;\n");
504        write_file(root, "README.md", b"# Title\n");
505        write_file(root, "docs/guide.markdown", b"# Guide\n");
506        let excludes: Vec<&str> = Vec::new();
507
508        assert_eq!(
509            classify_file(root, &root.join("src/module.mjs"), &excludes),
510            Some(FileClassification::Ast)
511        );
512        assert_eq!(
513            classify_file(root, &root.join("README.md"), &excludes),
514            Some(FileClassification::ContentOnly)
515        );
516        assert_eq!(
517            classify_file(root, &root.join("docs/guide.markdown"), &excludes),
518            Some(FileClassification::ContentOnly)
519        );
520    }
521
522    #[test]
523    fn classifies_github_workflow_yaml_as_content_only() {
524        let tmp = tempfile::tempdir().expect("tempdir");
525        let root = tmp.path();
526        write_file(root, ".github/workflows/ci.yml", b"name: ci\n");
527        write_file(root, ".github/workflows/release.yaml", b"name: release\n");
528        let excludes: Vec<&str> = Vec::new();
529
530        assert_eq!(
531            classify_file(root, &root.join(".github/workflows/ci.yml"), &excludes),
532            Some(FileClassification::ContentOnly)
533        );
534        assert_eq!(
535            classify_file(
536                root,
537                &root.join(".github/workflows/release.yaml"),
538                &excludes
539            ),
540            Some(FileClassification::ContentOnly)
541        );
542    }
543
544    #[test]
545    fn discovers_default_hidden_metadata_allowlist() {
546        let tmp = tempfile::tempdir().expect("tempdir");
547        let root = tmp.path();
548        write_file(root, "src/lib.rs", b"fn main() {}\n");
549        write_file(root, ".gobby/plans/foo.md", b"# Plan\n");
550        write_file(root, ".gobby/plans/nested/bar.md", b"# Nested\n");
551        write_file(root, ".github/workflows/ci.yml", b"name: ci\n");
552        write_file(root, ".github/workflows/release.yaml", b"name: release\n");
553
554        let (ast, content_only) = discover_files(root, &[] as &[&str]);
555
556        assert_eq!(rels(root, ast), vec!["src/lib.rs"]);
557        assert_eq!(
558            rels(root, content_only),
559            vec![
560                ".github/workflows/ci.yml",
561                ".github/workflows/release.yaml",
562                ".gobby/plans/foo.md",
563                ".gobby/plans/nested/bar.md",
564            ]
565        );
566    }
567
568    #[test]
569    fn skips_non_allowlisted_hidden_metadata_by_default() {
570        let tmp = tempfile::tempdir().expect("tempdir");
571        let root = tmp.path();
572        write_file(root, ".github/ISSUE_TEMPLATE/bug.md", b"# Bug\n");
573        write_file(root, ".gobby/gcode.json", br#"{"id":"project"}"#);
574        write_file(root, ".gobby/project.json", br#"{"id":"project"}"#);
575        write_file(root, ".gobby/wiki/page.md", b"# Wiki\n");
576        write_file(root, ".gobby/screenshots/shot.md", b"# Screenshot\n");
577        write_file(root, ".gobby/tasks.jsonl", b"{}\n");
578        write_file(root, ".gobby/memories.jsonl", b"{}\n");
579
580        let (ast, content_only) = discover_files(root, &[] as &[&str]);
581
582        assert!(rels(root, ast).is_empty());
583        assert!(rels(root, content_only).is_empty());
584    }
585
586    #[test]
587    fn discovers_project_hidden_allowlist_from_gcode_json() {
588        let tmp = tempfile::tempdir().expect("tempdir");
589        let root = tmp.path();
590        write_file(
591            root,
592            ".gobby/gcode.json",
593            br#"{"index":{"hidden_allowlist":[".custom/agent-docs/**/*.md"]}}"#,
594        );
595        write_file(root, ".custom/agent-docs/guide.md", b"# Guide\n");
596        write_file(root, ".custom/agent-docs/nested/runbook.md", b"# Runbook\n");
597        write_file(root, ".custom/other.md", b"# Other\n");
598
599        let (ast, content_only) = discover_files(root, &[] as &[&str]);
600
601        assert!(rels(root, ast).is_empty());
602        assert_eq!(
603            rels(root, content_only),
604            vec![
605                ".custom/agent-docs/guide.md",
606                ".custom/agent-docs/nested/runbook.md",
607            ]
608        );
609    }
610
611    #[test]
612    fn excludes_win_over_allowlisted_hidden_paths() {
613        let tmp = tempfile::tempdir().expect("tempdir");
614        let root = tmp.path();
615        write_file(root, ".gobby/plans/foo.md", b"# Plan\n");
616        write_file(root, ".github/workflows/ci.yml", b"name: ci\n");
617
618        let excludes = vec![".gobby".to_string(), "workflows".to_string()];
619        let (ast, content_only) = discover_files(root, &excludes);
620
621        assert!(rels(root, ast).is_empty());
622        assert!(rels(root, content_only).is_empty());
623    }
624
625    #[test]
626    fn skips_js_family_files_with_generated_markers() {
627        let tmp = tempfile::tempdir().expect("tempdir");
628        let root = tmp.path();
629        let excludes: Vec<&str> = Vec::new();
630
631        for (rel, marker) in [
632            ("src/setup.mjs", "Generated by gcode setup"),
633            ("src/app.js", "DO NOT EDIT"),
634            ("src/view.jsx", "@generated"),
635            ("src/runtime.cjs", "auto-generated"),
636        ] {
637            write_file(
638                root,
639                rel,
640                format!("// {marker}\nexport const value = 1;\n").as_bytes(),
641            );
642            assert_eq!(classify_file(root, &root.join(rel), &excludes), None);
643        }
644    }
645
646    #[test]
647    fn keeps_ordinary_mjs_source_ast_indexable() {
648        let tmp = tempfile::tempdir().expect("tempdir");
649        let root = tmp.path();
650        write_file(
651            root,
652            "src/config.mjs",
653            b"export function loadConfig() {\n  return { mode: 'dev' };\n}\n",
654        );
655        let excludes: Vec<&str> = Vec::new();
656
657        assert_eq!(
658            classify_file(root, &root.join("src/config.mjs"), &excludes),
659            Some(FileClassification::Ast)
660        );
661    }
662
663    #[test]
664    fn skips_large_minified_js_bundles() {
665        let tmp = tempfile::tempdir().expect("tempdir");
666        let root = tmp.path();
667        let mut bundle = b"var bundle='".to_vec();
668        bundle.extend(std::iter::repeat_n(b'a', MINIFIED_JS_MIN_BYTES));
669        bundle.extend(b"';\n");
670        write_file(root, "src/bundle.js", &bundle);
671        let excludes: Vec<&str> = Vec::new();
672
673        assert_eq!(
674            classify_file(root, &root.join("src/bundle.js"), &excludes),
675            None
676        );
677    }
678
679    #[test]
680    fn skips_single_line_minified_js_bundle_with_newline() {
681        let tmp = tempfile::tempdir().expect("tempdir");
682        let root = tmp.path();
683        let mut bundle = b"(()=>{const bundle='".to_vec();
684        bundle.extend(std::iter::repeat_n(b'a', MINIFIED_JS_MIN_BYTES));
685        bundle.extend(b"';})();\n");
686        write_file(root, "dist/app.js", &bundle);
687        let excludes: Vec<&str> = Vec::new();
688
689        assert_eq!(
690            classify_file(root, &root.join("dist/app.js"), &excludes),
691            None
692        );
693    }
694
695    #[test]
696    fn skips_single_line_minified_js_bundle_without_newline() {
697        let tmp = tempfile::tempdir().expect("tempdir");
698        let root = tmp.path();
699        let mut bundle = b"(()=>{const bundle='".to_vec();
700        bundle.extend(std::iter::repeat_n(b'a', MINIFIED_JS_MIN_BYTES));
701        bundle.extend(b"';})();");
702        write_file(root, "dist/app.js", &bundle);
703        let excludes: Vec<&str> = Vec::new();
704
705        assert_eq!(
706            classify_file(root, &root.join("dist/app.js"), &excludes),
707            None
708        );
709    }
710
711    #[test]
712    fn classifies_source_build_directory_as_ast_indexable() {
713        let tmp = tempfile::tempdir().expect("tempdir");
714        let root = tmp.path();
715        write_file(
716            root,
717            "src/gobby/build/workspaces.py",
718            b"class WorkspaceBuilder:\n    pass\n",
719        );
720        let excludes = vec!["build".to_string(), "dist".to_string()];
721
722        assert_eq!(
723            classify_file(root, &root.join("src/gobby/build/workspaces.py"), &excludes),
724            Some(FileClassification::Ast)
725        );
726    }
727
728    #[test]
729    fn skips_root_build_directory() {
730        let tmp = tempfile::tempdir().expect("tempdir");
731        let root = tmp.path();
732        write_file(root, "build/generated.py", b"class Generated:\n    pass\n");
733        let excludes = vec!["build".to_string(), "dist".to_string()];
734
735        assert_eq!(
736            classify_file(root, &root.join("build/generated.py"), &excludes),
737            None
738        );
739    }
740
741    #[test]
742    fn walker_consumes_gobby_core_walker_settings() {
743        let source = include_str!("walker.rs");
744        let settings = ["gobby_core", "::indexing::WalkerSettings"].concat();
745        let direct_builder = ["WalkBuilder", "::new(root)"].concat();
746
747        assert!(source.contains(&settings));
748        assert!(!source.contains(&direct_builder));
749    }
750}