arbor_watcher/
indexer.rs

1//! Directory indexing.
2//!
3//! Walks directories to find and parse source files, building
4//! the initial code graph.
5
6use arbor_core::{parse_file, CodeNode};
7use arbor_graph::{ArborGraph, GraphBuilder};
8use ignore::WalkBuilder;
9use std::path::Path;
10use std::time::Instant;
11use tracing::{debug, info, warn};
12
13/// Result of indexing a directory.
14pub struct IndexResult {
15    /// The built graph.
16    pub graph: ArborGraph,
17
18    /// Number of files processed.
19    pub files_indexed: usize,
20
21    /// Number of nodes extracted.
22    pub nodes_extracted: usize,
23
24    /// Time taken in milliseconds.
25    pub duration_ms: u64,
26
27    /// Files that failed to parse.
28    pub errors: Vec<(String, String)>,
29}
30
31/// Indexes a directory and returns the code graph.
32///
33/// This walks all source files, parses them, and builds the
34/// relationship graph. It respects .gitignore patterns.
35///
36/// # Example
37///
38/// ```no_run
39/// use arbor_watcher::index_directory;
40/// use std::path::Path;
41///
42/// let result = index_directory(Path::new("./src")).unwrap();
43/// println!("Indexed {} files, {} nodes", result.files_indexed, result.nodes_extracted);
44/// ```
45pub fn index_directory(root: &Path) -> Result<IndexResult, std::io::Error> {
46    let start = Instant::now();
47    let mut builder = GraphBuilder::new();
48    let mut files_indexed = 0;
49    let mut nodes_extracted = 0;
50    let mut errors = Vec::new();
51
52    info!("Starting index of {}", root.display());
53
54    // Walk the directory, respecting .gitignore
55    let walker = WalkBuilder::new(root)
56        .hidden(true) // Skip hidden files
57        .git_ignore(true) // Respect .gitignore
58        .git_global(true)
59        .git_exclude(true)
60        .build();
61
62    for entry in walker.filter_map(Result::ok) {
63        let path = entry.path();
64
65        // Skip directories
66        if path.is_dir() {
67            continue;
68        }
69
70        // Check if it's a supported file type
71        let extension = match path.extension().and_then(|e| e.to_str()) {
72            Some(ext) => ext,
73            None => continue,
74        };
75
76        if !arbor_core::languages::is_supported(extension) {
77            continue;
78        }
79
80        debug!("Parsing {}", path.display());
81
82        match parse_file(path) {
83            Ok(nodes) => {
84                nodes_extracted += nodes.len();
85                files_indexed += 1;
86                builder.add_nodes(nodes);
87            }
88            Err(e) => {
89                warn!("Failed to parse {}: {}", path.display(), e);
90                errors.push((path.display().to_string(), e.to_string()));
91            }
92        }
93    }
94
95    let graph = builder.build();
96    let duration = start.elapsed();
97
98    info!(
99        "Indexed {} files ({} nodes) in {:?}",
100        files_indexed, nodes_extracted, duration
101    );
102
103    Ok(IndexResult {
104        graph,
105        files_indexed,
106        nodes_extracted,
107        duration_ms: duration.as_millis() as u64,
108        errors,
109    })
110}
111
112/// Parses a single file and returns its nodes.
113#[allow(dead_code)]
114pub fn parse_single_file(path: &Path) -> Result<Vec<CodeNode>, arbor_core::ParseError> {
115    parse_file(path)
116}
117
118#[cfg(test)]
119mod tests {
120    use super::*;
121    use std::fs;
122    use tempfile::tempdir;
123
124    #[test]
125    fn test_index_empty_directory() {
126        let dir = tempdir().unwrap();
127        let result = index_directory(dir.path()).unwrap();
128        assert_eq!(result.files_indexed, 0);
129        assert_eq!(result.nodes_extracted, 0);
130    }
131
132    #[test]
133    fn test_index_with_rust_file() {
134        let dir = tempdir().unwrap();
135        let file_path = dir.path().join("test.rs");
136
137        fs::write(
138            &file_path,
139            r#"
140            pub fn hello() {
141                println!("Hello!");
142            }
143        "#,
144        )
145        .unwrap();
146
147        let result = index_directory(dir.path()).unwrap();
148        assert_eq!(result.files_indexed, 1);
149        assert!(result.nodes_extracted > 0);
150    }
151}