1use crate::storage::UnifiedGraphStore;
7use crate::watcher::WatchEvent;
8use std::collections::HashSet;
9use std::path::{Path, PathBuf};
10use std::sync::Arc;
11
12#[derive(Clone, Debug)]
16pub struct PathFilter {
17 include_patterns: Vec<String>,
19 exclude_patterns: Vec<String>,
21 extensions: Vec<String>,
23}
24
25impl Default for PathFilter {
26 fn default() -> Self {
27 Self::new_with_defaults()
28 }
29}
30
31impl PathFilter {
32 pub fn new() -> Self {
35 Self {
36 include_patterns: vec![],
37 exclude_patterns: vec![],
38 extensions: vec![],
39 }
40 }
41
42 pub fn new_with_defaults() -> Self {
44 Self {
45 include_patterns: vec!["**/src/**".to_string(), "**/tests/**".to_string()],
47 exclude_patterns: vec![
49 "**/target/**".to_string(),
50 "**/node_modules/**".to_string(),
51 ".git/**".to_string(),
52 "**/.forge/**".to_string(),
53 "**/Cargo.lock".to_string(),
54 "**/package-lock.json".to_string(),
55 "**/yarn.lock".to_string(),
56 "**/*.min.js".to_string(),
57 "**/*.min.css".to_string(),
58 ],
59 extensions: vec![
61 "rs".to_string(), "py".to_string(), "js".to_string(), "ts".to_string(), "jsx".to_string(), "tsx".to_string(), "go".to_string(), "java".to_string(), "c".to_string(), "cpp".to_string(), "h".to_string(), "hpp".to_string(), "mod".to_string(), ],
75 }
76 }
77
78 pub fn include_dirs(dirs: &[&str]) -> Self {
84 Self {
85 include_patterns: dirs.iter().map(|d| format!("**/{}/**", d)).collect(),
86 ..Self::default()
87 }
88 }
89
90 pub fn should_index(&self, path: &Path) -> bool {
101 let path_str = path.to_string_lossy();
102
103 for pattern in &self.exclude_patterns {
105 if Self::match_glob(&path_str, pattern) {
106 return false;
107 }
108 }
109
110 let mut included = false;
112 for pattern in &self.include_patterns {
113 if Self::match_glob(&path_str, pattern) {
114 included = true;
115 break;
116 }
117 }
118 if !included {
119 return false;
120 }
121
122 if !self.extensions.is_empty() {
124 if let Some(ext) = path.extension() {
125 let ext = ext.to_string_lossy().to_lowercase();
126 if !self.extensions.contains(&ext) {
127 return false;
128 }
129 } else {
130 return false;
132 }
133 }
134
135 true
136 }
137
138 fn match_glob(path: &str, pattern: &str) -> bool {
140 if pattern.starts_with("**/") && pattern.ends_with("/**") {
142 let dir = &pattern[3..pattern.len() - 3]; return path.contains(&format!("{}/", dir)) || path.starts_with(&format!("{}/", dir));
145 }
146
147 if let Some(suffix) = pattern.strip_prefix("**/") {
149 return path.contains(suffix) || path.ends_with(suffix);
151 }
152
153 if pattern.contains("/**/") {
155 let parts: Vec<&str> = pattern.split("/**/").collect();
156 if parts.len() == 2 {
157 let prefix = parts[0];
158 let suffix = parts[1];
159 return path.starts_with(prefix) && path.contains(suffix);
160 }
161 }
162
163 if pattern.contains('*') {
165 let mut regex_str = String::with_capacity(pattern.len() * 2);
167 regex_str.push('^');
168
169 for c in pattern.chars() {
170 match c {
171 '*' => regex_str.push_str(".*"),
172 '.' => regex_str.push_str("\\."),
173 '?' => regex_str.push('.'),
174 '+' => regex_str.push_str("\\+"),
175 '(' | ')' | '[' | ']' | '{' | '}' | '^' | '$' | '|' | '\\' => {
176 regex_str.push('\\');
177 regex_str.push(c);
178 }
179 _ => regex_str.push(c),
180 }
181 }
182 regex_str.push('$');
183
184 if let Ok(re) = regex::Regex::new(®ex_str) {
185 return re.is_match(path);
186 }
187 }
188
189 path == pattern || path.contains(pattern)
191 }
192
193 pub fn add_include(&mut self, pattern: impl Into<String>) {
195 self.include_patterns.push(pattern.into());
196 }
197
198 pub fn add_exclude(&mut self, pattern: impl Into<String>) {
200 self.exclude_patterns.push(pattern.into());
201 }
202
203 pub fn add_extension(&mut self, ext: impl Into<String>) {
205 self.extensions.push(ext.into());
206 }
207}
208
209#[derive(Clone, Debug)]
239pub struct IncrementalIndexer {
240 store: Arc<UnifiedGraphStore>,
242 pending: Arc<tokio::sync::Mutex<HashSet<PathBuf>>>,
244 deleted: Arc<tokio::sync::Mutex<HashSet<PathBuf>>>,
246 filter: PathFilter,
248}
249
250impl IncrementalIndexer {
251 pub fn new(store: Arc<UnifiedGraphStore>) -> Self {
259 Self {
260 store,
261 pending: Arc::new(tokio::sync::Mutex::new(HashSet::new())),
262 deleted: Arc::new(tokio::sync::Mutex::new(HashSet::new())),
263 filter: PathFilter::default(),
264 }
265 }
266
267 pub fn with_filter(store: Arc<UnifiedGraphStore>, filter: PathFilter) -> Self {
274 Self {
275 store,
276 pending: Arc::new(tokio::sync::Mutex::new(HashSet::new())),
277 deleted: Arc::new(tokio::sync::Mutex::new(HashSet::new())),
278 filter,
279 }
280 }
281
282 pub fn filter(&self) -> &PathFilter {
284 &self.filter
285 }
286
287 pub fn set_filter(&mut self, filter: PathFilter) {
289 self.filter = filter;
290 }
291
292 pub fn queue(&self, event: WatchEvent) {
300 match event {
301 WatchEvent::Created(path) | WatchEvent::Modified(path) => {
302 if !self.filter.should_index(&path) {
304 return;
305 }
306
307 let pending = self.pending.clone();
308 tokio::spawn(async move {
309 pending.lock().await.insert(path);
310 });
311 }
312 WatchEvent::Deleted(path) => {
313 if !self.filter.should_index(&path) {
315 return;
316 }
317
318 let deleted = self.deleted.clone();
319 tokio::spawn(async move {
320 deleted.lock().await.insert(path);
321 });
322 }
323 WatchEvent::Error(_) => {
324 }
326 }
327 }
328
329 pub async fn flush(&self) -> anyhow::Result<FlushStats> {
342 let mut pending = self.pending.lock().await;
343 let mut deleted = self.deleted.lock().await;
344
345 let mut stats = FlushStats::default();
346
347 for path in deleted.drain() {
349 if let Err(e) = self.delete_file(&path).await {
350 eprintln!("Error deleting {:?}: {}", path, e);
351 } else {
352 stats.deleted += 1;
353 }
354 }
355
356 for path in pending.drain() {
358 if let Err(e) = self.index_file(&path).await {
359 eprintln!("Error indexing {:?}: {}", path, e);
360 } else {
361 stats.indexed += 1;
362 }
363 }
364
365 Ok(stats)
366 }
367
368 pub async fn full_rescan(&self, root: &Path) -> anyhow::Result<usize> {
381 self.pending.lock().await.clear();
383 self.deleted.lock().await.clear();
384
385 let mut count = 0;
386
387 if root.is_dir() {
389 self.scan_directory(root, &mut count).await?;
390 }
391
392 Ok(count)
393 }
394
395 async fn scan_directory(&self, dir: &Path, count: &mut usize) -> anyhow::Result<()> {
397 let mut entries = tokio::fs::read_dir(dir).await?;
398
399 while let Some(entry) = entries.next_entry().await? {
400 let path = entry.path();
401
402 if path.is_dir() {
403 let path_str = path.to_string_lossy();
405 if path_str.contains("/target/")
406 || path_str.contains("/node_modules/")
407 || path_str.contains("/.git/")
408 || path_str.contains("/.forge/")
409 {
410 continue;
411 }
412
413 Box::pin(self.scan_directory(&path, count)).await?;
415 } else if path.is_file() && self.filter.should_index(&path) {
416 self.pending.lock().await.insert(path);
417 *count += 1;
418 }
419 }
420
421 Ok(())
422 }
423
424 pub async fn pending_count(&self) -> usize {
426 self.pending.lock().await.len() + self.deleted.lock().await.len()
427 }
428
429 pub async fn clear_pending(&self) {
431 self.pending.lock().await.clear();
432 self.deleted.lock().await.clear();
433 }
434
435 async fn index_file(&self, path: &Path) -> anyhow::Result<()> {
437 if !path.exists() || !path.is_file() {
438 return Ok(());
439 }
440
441 let db_path = self.store.db_path().join("graph.db");
442 if !db_path.exists() {
443 return Ok(());
444 }
445
446 {
447 let mut graph = magellan::CodeGraph::open(&db_path)?;
448 if let Some(parent) = path.parent() {
449 graph.scan_directory(parent, None)?;
450 }
451 }
452
453 Ok(())
454 }
455
456 async fn delete_file(&self, path: &Path) -> anyhow::Result<()> {
458 let db_path = self.store.db_path().join("graph.db");
459 if !db_path.exists() {
460 return Ok(());
461 }
462
463 {
464 let mut graph = magellan::CodeGraph::open(&db_path)?;
465 let path_str = path.to_string_lossy();
466 let _ = graph.delete_file(&path_str);
467 }
468
469 Ok(())
470 }
471}
472
473#[derive(Debug, Default, Clone, PartialEq)]
475pub struct FlushStats {
476 pub indexed: usize,
478 pub deleted: usize,
480}
481
482#[cfg(test)]
483mod tests {
484 use super::*;
485 use crate::storage::{BackendKind, UnifiedGraphStore};
486
487 #[tokio::test]
488 async fn test_indexer_creation() {
489 let store = Arc::new(UnifiedGraphStore::memory().await.unwrap());
490 let indexer = IncrementalIndexer::new(store);
491
492 assert_eq!(indexer.pending_count().await, 0);
493 }
494
495 #[tokio::test]
496 async fn test_queue_events() {
497 let store = Arc::new(UnifiedGraphStore::memory().await.unwrap());
498 let indexer = IncrementalIndexer::new(store);
499
500 indexer.queue(WatchEvent::Created(PathBuf::from("src/a.rs")));
501 indexer.queue(WatchEvent::Modified(PathBuf::from("src/b.rs")));
502 indexer.queue(WatchEvent::Deleted(PathBuf::from("src/c.rs")));
503
504 tokio::time::sleep(std::time::Duration::from_millis(50)).await;
505
506 assert_eq!(indexer.pending_count().await, 3);
507 }
508
509 #[tokio::test]
510 async fn test_queue_filtered_events() {
511 let store = Arc::new(UnifiedGraphStore::memory().await.unwrap());
512 let indexer = IncrementalIndexer::new(store);
513
514 indexer.queue(WatchEvent::Created(PathBuf::from("src/a.rs")));
516 indexer.queue(WatchEvent::Modified(PathBuf::from("tests/b.rs")));
517
518 indexer.queue(WatchEvent::Modified(PathBuf::from("target/debug/build.rs")));
520 indexer.queue(WatchEvent::Modified(PathBuf::from(
521 "node_modules/foo/index.js",
522 )));
523 indexer.queue(WatchEvent::Modified(PathBuf::from(".git/config")));
524 indexer.queue(WatchEvent::Modified(PathBuf::from("Cargo.lock")));
525 indexer.queue(WatchEvent::Modified(PathBuf::from("README.md"))); tokio::time::sleep(std::time::Duration::from_millis(50)).await;
528
529 assert_eq!(indexer.pending_count().await, 2);
531 }
532
533 #[test]
534 fn test_path_filter_default() {
535 let filter = PathFilter::default();
536
537 assert!(filter.should_index(Path::new("src/lib.rs")));
539 assert!(filter.should_index(Path::new("src/main.rs")));
540 assert!(filter.should_index(Path::new("project/src/module.rs")));
541
542 assert!(filter.should_index(Path::new("tests/test.rs")));
544 assert!(filter.should_index(Path::new("project/tests/integration.rs")));
545
546 assert!(!filter.should_index(Path::new("target/debug/build.rs")));
548 assert!(!filter.should_index(Path::new("target/release/app")));
549
550 assert!(!filter.should_index(Path::new("node_modules/foo/index.js")));
552
553 assert!(!filter.should_index(Path::new(".git/config")));
555
556 assert!(!filter.should_index(Path::new("Cargo.lock")));
558
559 assert!(!filter.should_index(Path::new("README.md")));
561 assert!(!filter.should_index(Path::new("Cargo.toml")));
562 assert!(!filter.should_index(Path::new("build.rs"))); }
564
565 #[test]
566 fn test_path_filter_extensions() {
567 let filter = PathFilter::default();
568
569 assert!(filter.should_index(Path::new("src/lib.rs")));
571 assert!(filter.should_index(Path::new("tests/test.rs")));
572
573 assert!(filter.should_index(Path::new("src/main.py")));
575
576 assert!(filter.should_index(Path::new("src/index.js")));
578 assert!(filter.should_index(Path::new("src/index.ts")));
579 assert!(filter.should_index(Path::new("src/App.jsx")));
580 assert!(filter.should_index(Path::new("src/App.tsx")));
581
582 assert!(!filter.should_index(Path::new("src/logo.png")));
584 assert!(!filter.should_index(Path::new("src/data.bin")));
585 }
586
587 #[test]
588 fn test_path_filter_custom() {
589 let mut filter = PathFilter::new();
590 filter.add_include("**/lib/**");
591 filter.add_extension("go");
592
593 assert!(filter.should_index(Path::new("lib/main.go")));
594 assert!(!filter.should_index(Path::new("src/main.go"))); assert!(!filter.should_index(Path::new("lib/main.rs"))); }
597
598 #[tokio::test]
599 async fn test_flush_clears_pending() {
600 let store = Arc::new(UnifiedGraphStore::memory().await.unwrap());
601 let indexer = IncrementalIndexer::new(store);
602
603 indexer.queue(WatchEvent::Modified(PathBuf::from("src/lib.rs")));
604 tokio::time::sleep(std::time::Duration::from_millis(50)).await;
605
606 let stats = indexer.flush().await.unwrap();
607 assert_eq!(stats.indexed, 1);
608 assert_eq!(indexer.pending_count().await, 0);
609 }
610
611 #[tokio::test]
612 async fn test_flush_stats() {
613 let store = Arc::new(UnifiedGraphStore::memory().await.unwrap());
614 let indexer = IncrementalIndexer::new(store);
615
616 indexer.queue(WatchEvent::Modified(PathBuf::from("src/a.rs")));
617 indexer.queue(WatchEvent::Created(PathBuf::from("src/b.rs")));
618 indexer.queue(WatchEvent::Deleted(PathBuf::from("src/c.rs")));
619
620 tokio::time::sleep(std::time::Duration::from_millis(50)).await;
621
622 let stats = indexer.flush().await.unwrap();
623
624 assert_eq!(stats.indexed, 2);
625 assert_eq!(stats.deleted, 1);
626 }
627
628 #[tokio::test]
629 async fn test_clear_pending() {
630 let store = Arc::new(UnifiedGraphStore::memory().await.unwrap());
631 let indexer = IncrementalIndexer::new(store);
632
633 indexer.queue(WatchEvent::Modified(PathBuf::from("src/a.rs")));
634 tokio::time::sleep(std::time::Duration::from_millis(50)).await;
635
636 assert_eq!(indexer.pending_count().await, 1);
637
638 indexer.clear_pending().await;
639
640 assert_eq!(indexer.pending_count().await, 0);
641 }
642
643 #[tokio::test]
644 async fn test_full_rescan() {
645 let temp = tempfile::tempdir().unwrap();
646 let store = Arc::new(
647 UnifiedGraphStore::open(temp.path(), BackendKind::default())
648 .await
649 .unwrap(),
650 );
651 let indexer = IncrementalIndexer::new(store);
652
653 let src_dir = temp.path().join("src");
655 let tests_dir = temp.path().join("tests");
656 let target_dir = temp.path().join("target");
657 tokio::fs::create_dir(&src_dir).await.unwrap();
658 tokio::fs::create_dir(&tests_dir).await.unwrap();
659 tokio::fs::create_dir(&target_dir).await.unwrap();
660
661 tokio::fs::write(src_dir.join("lib.rs"), "pub fn foo() {}")
663 .await
664 .unwrap();
665 tokio::fs::write(src_dir.join("main.rs"), "fn main() {}")
666 .await
667 .unwrap();
668 tokio::fs::write(tests_dir.join("test.rs"), "#[test] fn test() {}")
669 .await
670 .unwrap();
671 tokio::fs::write(target_dir.join("build.rs"), "// build")
672 .await
673 .unwrap(); tokio::fs::write(temp.path().join("README.md"), "# Project")
675 .await
676 .unwrap(); let count = indexer.full_rescan(temp.path()).await.unwrap();
680
681 assert_eq!(count, 3);
683
684 let pending = indexer.pending.lock().await;
686 assert!(pending.contains(&src_dir.join("lib.rs")));
687 assert!(pending.contains(&src_dir.join("main.rs")));
688 assert!(pending.contains(&tests_dir.join("test.rs")));
689 assert!(!pending.contains(&target_dir.join("build.rs")));
690 assert!(!pending.contains(&temp.path().join("README.md")));
691 }
692}