Skip to main content

gobby_code/index/walker/
discovery.rs

1use std::collections::BTreeSet;
2use std::path::{Path, PathBuf};
3
4use crate::index::MAX_FILE_SIZE;
5
6use super::classification::classify_file;
7use super::hidden::HiddenPathAllowlist;
8use super::types::{DiscoveryOptions, FileClassification};
9
10/// Discover files eligible for indexing under `root`.
11/// Returns (ast_candidates, content_only_candidates) as absolute paths.
12pub fn discover_files<S: AsRef<str>>(
13    root: &Path,
14    exclude_patterns: &[S],
15) -> (Vec<PathBuf>, Vec<PathBuf>) {
16    discover_files_with_options(root, exclude_patterns, DiscoveryOptions::default())
17}
18
19pub fn discover_files_with_options<S: AsRef<str>>(
20    root: &Path,
21    exclude_patterns: &[S],
22    options: DiscoveryOptions,
23) -> (Vec<PathBuf>, Vec<PathBuf>) {
24    let mut candidates = Vec::new();
25    let mut content_only = Vec::new();
26    let mut seen = BTreeSet::new();
27
28    let mut settings = gobby_core::indexing::WalkerSettings::new(root);
29    settings.respect_gitignore = options.respect_gitignore;
30    settings.max_filesize = Some(MAX_FILE_SIZE);
31    let mut builder = settings.into_walker();
32    builder.hidden(true);
33    let walker = builder.build();
34
35    for entry in walker.flatten() {
36        let path = entry.path();
37        if !path.is_file() {
38            continue;
39        }
40
41        push_classified_file(
42            root,
43            path,
44            exclude_patterns,
45            &mut candidates,
46            &mut content_only,
47            &mut seen,
48        );
49    }
50
51    let hidden_allowlist = HiddenPathAllowlist::load(root);
52    for path in hidden_allowlist.discover(root) {
53        push_classified_file(
54            root,
55            &path,
56            exclude_patterns,
57            &mut candidates,
58            &mut content_only,
59            &mut seen,
60        );
61    }
62
63    (candidates, content_only)
64}
65
66fn push_classified_file(
67    root: &Path,
68    path: &Path,
69    exclude_patterns: &[impl AsRef<str>],
70    candidates: &mut Vec<PathBuf>,
71    content_only: &mut Vec<PathBuf>,
72    seen: &mut BTreeSet<PathBuf>,
73) {
74    let key = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
75    if !seen.insert(key) {
76        return;
77    }
78
79    match classify_file(root, path, exclude_patterns) {
80        Some(FileClassification::Ast) => candidates.push(path.to_path_buf()),
81        Some(FileClassification::ContentOnly) => content_only.push(path.to_path_buf()),
82        None => {}
83    }
84}