Skip to main content

arbor_watcher/
indexer.rs

1//! Directory indexing.
2//!
3//! Walks directories to find and parse source files, building
4//! the initial code graph.
5
6use arbor_core::{parse_file, CodeNode};
7use arbor_graph::{ArborGraph, GraphBuilder, GraphStore};
8use ignore::WalkBuilder;
9use std::collections::HashSet;
10use std::path::{Path, PathBuf};
11use std::time::Instant;
12use tracing::{debug, info, warn};
13
14/// Result of indexing a directory.
15pub struct IndexResult {
16    /// The built graph.
17    pub graph: ArborGraph,
18
19    /// Number of files processed (parsed fresh).
20    pub files_indexed: usize,
21
22    /// Number of files loaded from cache.
23    pub cache_hits: usize,
24
25    /// Number of nodes extracted.
26    pub nodes_extracted: usize,
27
28    /// Time taken in milliseconds.
29    pub duration_ms: u64,
30
31    /// Files that failed to parse.
32    pub errors: Vec<(String, String)>,
33}
34
35/// Options for directory indexing.
36#[derive(Debug, Clone, Default)]
37pub struct IndexOptions {
38    /// Follow symbolic links when walking directories.
39    pub follow_symlinks: bool,
40
41    /// Path to cache directory (e.g., `.arbor/cache`).
42    /// If None, caching is disabled.
43    pub cache_path: Option<PathBuf>,
44}
45
46/// Indexes a directory and returns the code graph.
47///
48/// This walks all source files, parses them, and builds the
49/// relationship graph. It respects .gitignore patterns.
50///
51/// If `options.cache_path` is set, files are cached with their mtimes.
52/// Only files with changed mtimes are re-parsed.
53///
54/// # Example
55///
56/// ```no_run
57/// use arbor_watcher::{index_directory, IndexOptions};
58/// use std::path::Path;
59///
60/// let result = index_directory(Path::new("./src"), IndexOptions::default()).unwrap();
61/// println!("Indexed {} files, {} nodes", result.files_indexed, result.nodes_extracted);
62/// ```
63pub fn index_directory(root: &Path, options: IndexOptions) -> Result<IndexResult, std::io::Error> {
64    let start = Instant::now();
65    let mut builder = GraphBuilder::new();
66    let mut files_indexed = 0;
67    let mut cache_hits = 0;
68    let mut nodes_extracted = 0;
69    let mut errors = Vec::new();
70
71    info!("Starting index of {}", root.display());
72
73    // Open cache if configured
74    let store =
75        options
76            .cache_path
77            .as_ref()
78            .and_then(|path| match GraphStore::open_or_reset(path) {
79                Ok(s) => Some(s),
80                Err(e) => {
81                    warn!("Failed to open cache: {}, proceeding without cache", e);
82                    None
83                }
84            });
85
86    // Track files we've seen (for detecting deleted files)
87    let mut seen_files: HashSet<String> = HashSet::new();
88
89    // Walk the directory, respecting .gitignore
90    let walker = WalkBuilder::new(root)
91        .hidden(true) // Skip hidden files
92        .git_ignore(true) // Respect .gitignore
93        .git_global(true)
94        .git_exclude(true)
95        .follow_links(options.follow_symlinks)
96        .build();
97
98    for entry in walker.filter_map(Result::ok) {
99        let path = entry.path();
100
101        // Skip directories
102        if path.is_dir() {
103            continue;
104        }
105
106        // Check if it's a supported file type
107        let extension = match path.extension().and_then(|e| e.to_str()) {
108            Some(ext) => ext,
109            None => continue,
110        };
111
112        if !arbor_core::languages::is_supported(extension) {
113            continue;
114        }
115
116        let path_str = path.display().to_string();
117        seen_files.insert(path_str.clone());
118
119        // Check cache
120        if let Some(ref store) = store {
121            // Get file mtime
122            let current_mtime = match std::fs::metadata(path) {
123                Ok(meta) => meta
124                    .modified()
125                    .ok()
126                    .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
127                    .map(|d| d.as_secs())
128                    .unwrap_or(0),
129                Err(_) => 0,
130            };
131
132            // Check cached mtime
133            if let Ok(Some(cached_mtime)) = store.get_mtime(&path_str) {
134                if cached_mtime == current_mtime {
135                    // File unchanged, load from cache
136                    if let Ok(Some(cached_nodes)) = store.get_file_nodes(&path_str) {
137                        debug!("Cache hit: {}", path.display());
138                        nodes_extracted += cached_nodes.len();
139                        cache_hits += 1;
140                        builder.add_nodes(cached_nodes);
141                        continue;
142                    }
143                }
144            }
145
146            // Cache miss or stale, parse file
147            debug!("Parsing (cache miss): {}", path.display());
148            match parse_file(path) {
149                Ok(nodes) => {
150                    nodes_extracted += nodes.len();
151                    files_indexed += 1;
152                    // Update cache
153                    if let Err(e) = store.update_file(&path_str, &nodes, current_mtime) {
154                        warn!("Failed to update cache for {}: {}", path_str, e);
155                    }
156                    builder.add_nodes(nodes);
157                }
158                Err(e) => {
159                    warn!("Failed to parse {}: {}", path.display(), e);
160                    errors.push((path_str, e.to_string()));
161                }
162            }
163        } else {
164            // No cache, parse directly
165            debug!("Parsing {}", path.display());
166            match parse_file(path) {
167                Ok(nodes) => {
168                    nodes_extracted += nodes.len();
169                    files_indexed += 1;
170                    builder.add_nodes(nodes);
171                }
172                Err(e) => {
173                    warn!("Failed to parse {}: {}", path.display(), e);
174                    errors.push((path_str, e.to_string()));
175                }
176            }
177        }
178    }
179
180    // Handle deleted files: remove from cache any files that no longer exist
181    if let Some(ref store) = store {
182        if let Ok(cached_files) = store.list_cached_files() {
183            for cached_file in cached_files {
184                if !seen_files.contains(&cached_file) {
185                    debug!("Removing deleted file from cache: {}", cached_file);
186                    if let Err(e) = store.remove_file(&cached_file) {
187                        warn!("Failed to remove {} from cache: {}", cached_file, e);
188                    }
189                }
190            }
191        }
192    }
193
194    let graph = builder.build();
195    let duration = start.elapsed();
196
197    info!(
198        "Indexed {} files, {} cache hits ({} nodes) in {:?}",
199        files_indexed, cache_hits, nodes_extracted, duration
200    );
201
202    Ok(IndexResult {
203        graph,
204        files_indexed,
205        cache_hits,
206        nodes_extracted,
207        duration_ms: duration.as_millis() as u64,
208        errors,
209    })
210}
211
212/// Parses a single file and returns its nodes.
213#[allow(dead_code)]
214pub fn parse_single_file(path: &Path) -> Result<Vec<CodeNode>, arbor_core::ParseError> {
215    parse_file(path)
216}
217
218#[cfg(test)]
219mod tests {
220    use super::*;
221    use std::fs;
222    use tempfile::tempdir;
223
224    #[test]
225    fn test_index_empty_directory() {
226        let dir = tempdir().unwrap();
227        let result = index_directory(dir.path(), IndexOptions::default()).unwrap();
228        assert_eq!(result.files_indexed, 0);
229        assert_eq!(result.nodes_extracted, 0);
230    }
231
232    #[test]
233    fn test_index_with_rust_file() {
234        let dir = tempdir().unwrap();
235        let file_path = dir.path().join("test.rs");
236
237        fs::write(
238            &file_path,
239            r#"
240            pub fn hello() {
241                println!("Hello!");
242            }
243        "#,
244        )
245        .unwrap();
246
247        let result = index_directory(dir.path(), IndexOptions::default()).unwrap();
248        assert_eq!(result.files_indexed, 1);
249        assert!(result.nodes_extracted > 0);
250    }
251
252    /// Helper to create a directory symlink cross-platform.
253    /// Returns None if symlink creation fails (e.g., no privileges on Windows).
254    fn create_dir_symlink(original: &std::path::Path, link: &std::path::Path) -> Option<()> {
255        #[cfg(unix)]
256        {
257            std::os::unix::fs::symlink(original, link).ok()
258        }
259        #[cfg(windows)]
260        {
261            std::os::windows::fs::symlink_dir(original, link).ok()
262        }
263        #[cfg(not(any(unix, windows)))]
264        {
265            None
266        }
267    }
268
269    #[test]
270    fn test_index_does_not_follow_symlinks_by_default() {
271        let dir = tempdir().unwrap();
272        let linked_dir = tempdir().unwrap();
273
274        // Create a file in the linked directory
275        let linked_file = linked_dir.path().join("linked.rs");
276        fs::write(&linked_file, "pub fn linked_func() {}").unwrap();
277
278        // Create a symlink to the linked directory
279        let symlink_path = dir.path().join("linked");
280        if create_dir_symlink(linked_dir.path(), &symlink_path).is_none() {
281            // Skip test if symlinks not supported (e.g., Windows without privileges)
282            return;
283        }
284
285        // Index without following symlinks (default)
286        let result = index_directory(dir.path(), IndexOptions::default()).unwrap();
287        assert_eq!(result.files_indexed, 0);
288    }
289
290    #[test]
291    fn test_index_follows_symlinks_when_enabled() {
292        let dir = tempdir().unwrap();
293        let linked_dir = tempdir().unwrap();
294
295        // Create a file in the linked directory
296        let linked_file = linked_dir.path().join("linked.rs");
297        fs::write(&linked_file, "pub fn linked_func() {}").unwrap();
298
299        // Create a symlink to the linked directory
300        let symlink_path = dir.path().join("linked");
301        if create_dir_symlink(linked_dir.path(), &symlink_path).is_none() {
302            // Skip test if symlinks not supported (e.g., Windows without privileges)
303            return;
304        }
305
306        // Index with follow_symlinks enabled
307        let options = IndexOptions {
308            follow_symlinks: true,
309            cache_path: None,
310        };
311        let result = index_directory(dir.path(), options).unwrap();
312        assert_eq!(result.files_indexed, 1);
313        assert!(result.nodes_extracted > 0);
314    }
315}