agentroot_core/index/
scanner.rs

1//! File scanning for indexing
2
3use crate::error::Result;
4use glob::Pattern;
5use std::path::{Path, PathBuf};
6use walkdir::{DirEntry, WalkDir};
7
8/// Directories to exclude from scanning
9const EXCLUDE_DIRS: &[&str] = &[
10    "node_modules",
11    ".git",
12    ".cache",
13    "vendor",
14    "dist",
15    "build",
16    "__pycache__",
17    ".venv",
18    "target",
19];
20
21/// Scan result
22#[derive(Debug, Clone)]
23pub struct ScanResult {
24    pub path: PathBuf,
25    pub relative_path: String,
26}
27
28/// Scan options
29#[derive(Debug, Clone)]
30pub struct ScanOptions {
31    pub pattern: String,
32    pub follow_symlinks: bool,
33    pub exclude_dirs: Vec<String>,
34    pub exclude_hidden: bool,
35}
36
37impl Default for ScanOptions {
38    fn default() -> Self {
39        Self {
40            pattern: "**/*.md".to_string(),
41            follow_symlinks: true,
42            exclude_dirs: EXCLUDE_DIRS.iter().map(|s| s.to_string()).collect(),
43            exclude_hidden: true,
44        }
45    }
46}
47
48/// Scan directory for files matching pattern
49pub fn scan_files(root: &Path, options: &ScanOptions) -> Result<Vec<ScanResult>> {
50    let pattern = Pattern::new(&options.pattern)?;
51    let mut results = Vec::new();
52
53    let walker = WalkDir::new(root)
54        .follow_links(options.follow_symlinks)
55        .into_iter()
56        .filter_entry(|e| !should_skip(e, options));
57
58    for entry in walker {
59        let entry = entry?;
60        if !entry.file_type().is_file() {
61            continue;
62        }
63
64        let path = entry.path();
65        let relative = path
66            .strip_prefix(root)
67            .map(|p| p.to_string_lossy().to_string())
68            .unwrap_or_else(|_| path.to_string_lossy().to_string());
69
70        if pattern.matches(&relative) {
71            results.push(ScanResult {
72                path: path.to_path_buf(),
73                relative_path: relative,
74            });
75        }
76    }
77
78    Ok(results)
79}
80
81fn should_skip(entry: &DirEntry, options: &ScanOptions) -> bool {
82    let name = entry.file_name().to_string_lossy();
83
84    if options.exclude_hidden && name.starts_with('.') {
85        return true;
86    }
87
88    if entry.file_type().is_dir() && options.exclude_dirs.iter().any(|d| name == *d) {
89        return true;
90    }
91
92    false
93}
94
95#[cfg(test)]
96mod tests {
97    use super::*;
98
99    #[test]
100    fn test_default_options() {
101        let opts = ScanOptions::default();
102        assert_eq!(opts.pattern, "**/*.md");
103        assert!(opts.exclude_hidden);
104    }
105}