1use arbor_core::{parse_file, CodeNode};
7use arbor_graph::{ArborGraph, GraphBuilder, GraphStore};
8use ignore::WalkBuilder;
9use std::collections::HashSet;
10use std::path::{Path, PathBuf};
11use std::time::Instant;
12use tracing::{debug, info, warn};
13
14pub struct IndexResult {
16 pub graph: ArborGraph,
18
19 pub files_indexed: usize,
21
22 pub cache_hits: usize,
24
25 pub nodes_extracted: usize,
27
28 pub duration_ms: u64,
30
31 pub errors: Vec<(String, String)>,
33}
34
35#[derive(Debug, Clone, Default)]
37pub struct IndexOptions {
38 pub follow_symlinks: bool,
40
41 pub cache_path: Option<PathBuf>,
44}
45
46pub fn index_directory(root: &Path, options: IndexOptions) -> Result<IndexResult, std::io::Error> {
64 let start = Instant::now();
65 let mut builder = GraphBuilder::new();
66 let mut files_indexed = 0;
67 let mut cache_hits = 0;
68 let mut nodes_extracted = 0;
69 let mut errors = Vec::new();
70
71 info!("Starting index of {}", root.display());
72
73 let store =
75 options
76 .cache_path
77 .as_ref()
78 .and_then(|path| match GraphStore::open_or_reset(path) {
79 Ok(s) => Some(s),
80 Err(e) => {
81 warn!("Failed to open cache: {}, proceeding without cache", e);
82 None
83 }
84 });
85
86 let mut seen_files: HashSet<String> = HashSet::new();
88
89 let walker = WalkBuilder::new(root)
91 .hidden(true) .git_ignore(true) .git_global(true)
94 .git_exclude(true)
95 .follow_links(options.follow_symlinks)
96 .build();
97
98 for entry in walker.filter_map(Result::ok) {
99 let path = entry.path();
100
101 if path.is_dir() {
103 continue;
104 }
105
106 let extension = match path.extension().and_then(|e| e.to_str()) {
108 Some(ext) => ext,
109 None => continue,
110 };
111
112 if !arbor_core::languages::is_supported(extension) {
113 continue;
114 }
115
116 let path_str = path.display().to_string();
117 seen_files.insert(path_str.clone());
118
119 if let Some(ref store) = store {
121 let current_mtime = match std::fs::metadata(path) {
123 Ok(meta) => meta
124 .modified()
125 .ok()
126 .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
127 .map(|d| d.as_secs())
128 .unwrap_or(0),
129 Err(_) => 0,
130 };
131
132 if let Ok(Some(cached_mtime)) = store.get_mtime(&path_str) {
134 if cached_mtime == current_mtime {
135 if let Ok(Some(cached_nodes)) = store.get_file_nodes(&path_str) {
137 debug!("Cache hit: {}", path.display());
138 nodes_extracted += cached_nodes.len();
139 cache_hits += 1;
140 builder.add_nodes(cached_nodes);
141 continue;
142 }
143 }
144 }
145
146 debug!("Parsing (cache miss): {}", path.display());
148 match parse_file(path) {
149 Ok(nodes) => {
150 nodes_extracted += nodes.len();
151 files_indexed += 1;
152 if let Err(e) = store.update_file(&path_str, &nodes, current_mtime) {
154 warn!("Failed to update cache for {}: {}", path_str, e);
155 }
156 builder.add_nodes(nodes);
157 }
158 Err(e) => {
159 warn!("Failed to parse {}: {}", path.display(), e);
160 errors.push((path_str, e.to_string()));
161 }
162 }
163 } else {
164 debug!("Parsing {}", path.display());
166 match parse_file(path) {
167 Ok(nodes) => {
168 nodes_extracted += nodes.len();
169 files_indexed += 1;
170 builder.add_nodes(nodes);
171 }
172 Err(e) => {
173 warn!("Failed to parse {}: {}", path.display(), e);
174 errors.push((path_str, e.to_string()));
175 }
176 }
177 }
178 }
179
180 if let Some(ref store) = store {
182 if let Ok(cached_files) = store.list_cached_files() {
183 for cached_file in cached_files {
184 if !seen_files.contains(&cached_file) {
185 debug!("Removing deleted file from cache: {}", cached_file);
186 if let Err(e) = store.remove_file(&cached_file) {
187 warn!("Failed to remove {} from cache: {}", cached_file, e);
188 }
189 }
190 }
191 }
192 }
193
194 let graph = builder.build();
195 let duration = start.elapsed();
196
197 info!(
198 "Indexed {} files, {} cache hits ({} nodes) in {:?}",
199 files_indexed, cache_hits, nodes_extracted, duration
200 );
201
202 Ok(IndexResult {
203 graph,
204 files_indexed,
205 cache_hits,
206 nodes_extracted,
207 duration_ms: duration.as_millis() as u64,
208 errors,
209 })
210}
211
212#[allow(dead_code)]
214pub fn parse_single_file(path: &Path) -> Result<Vec<CodeNode>, arbor_core::ParseError> {
215 parse_file(path)
216}
217
218#[cfg(test)]
219mod tests {
220 use super::*;
221 use std::fs;
222 use tempfile::tempdir;
223
224 #[test]
225 fn test_index_empty_directory() {
226 let dir = tempdir().unwrap();
227 let result = index_directory(dir.path(), IndexOptions::default()).unwrap();
228 assert_eq!(result.files_indexed, 0);
229 assert_eq!(result.nodes_extracted, 0);
230 }
231
232 #[test]
233 fn test_index_with_rust_file() {
234 let dir = tempdir().unwrap();
235 let file_path = dir.path().join("test.rs");
236
237 fs::write(
238 &file_path,
239 r#"
240 pub fn hello() {
241 println!("Hello!");
242 }
243 "#,
244 )
245 .unwrap();
246
247 let result = index_directory(dir.path(), IndexOptions::default()).unwrap();
248 assert_eq!(result.files_indexed, 1);
249 assert!(result.nodes_extracted > 0);
250 }
251
252 fn create_dir_symlink(original: &std::path::Path, link: &std::path::Path) -> Option<()> {
255 #[cfg(unix)]
256 {
257 std::os::unix::fs::symlink(original, link).ok()
258 }
259 #[cfg(windows)]
260 {
261 std::os::windows::fs::symlink_dir(original, link).ok()
262 }
263 #[cfg(not(any(unix, windows)))]
264 {
265 None
266 }
267 }
268
269 #[test]
270 fn test_index_does_not_follow_symlinks_by_default() {
271 let dir = tempdir().unwrap();
272 let linked_dir = tempdir().unwrap();
273
274 let linked_file = linked_dir.path().join("linked.rs");
276 fs::write(&linked_file, "pub fn linked_func() {}").unwrap();
277
278 let symlink_path = dir.path().join("linked");
280 if create_dir_symlink(linked_dir.path(), &symlink_path).is_none() {
281 return;
283 }
284
285 let result = index_directory(dir.path(), IndexOptions::default()).unwrap();
287 assert_eq!(result.files_indexed, 0);
288 }
289
290 #[test]
291 fn test_index_follows_symlinks_when_enabled() {
292 let dir = tempdir().unwrap();
293 let linked_dir = tempdir().unwrap();
294
295 let linked_file = linked_dir.path().join("linked.rs");
297 fs::write(&linked_file, "pub fn linked_func() {}").unwrap();
298
299 let symlink_path = dir.path().join("linked");
301 if create_dir_symlink(linked_dir.path(), &symlink_path).is_none() {
302 return;
304 }
305
306 let options = IndexOptions {
308 follow_symlinks: true,
309 cache_path: None,
310 };
311 let result = index_directory(dir.path(), options).unwrap();
312 assert_eq!(result.files_indexed, 1);
313 assert!(result.nodes_extracted > 0);
314 }
315}