1use anyhow::Result;
9use ignore::WalkBuilder;
10use regex::Regex;
11use serde::{Deserialize, Serialize};
12use std::collections::HashMap;
13use std::fs;
14use std::io::ErrorKind;
15use std::path::{Path, PathBuf};
16use std::sync::Arc;
17use std::time::SystemTime;
18
19pub trait IndexStorage: Send + Sync {
21 fn init(&self, index_dir: &Path) -> Result<()>;
23
24 fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()>;
26}
27
28pub trait TraversalFilter: Send + Sync {
30 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
32
33 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
35}
36
37#[derive(Debug, Default, Clone)]
39pub struct MarkdownIndexStorage;
40
41impl IndexStorage for MarkdownIndexStorage {
42 fn init(&self, index_dir: &Path) -> Result<()> {
43 fs::create_dir_all(index_dir)?;
44 Ok(())
45 }
46
47 fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()> {
48 let file_name = format!("{}.md", calculate_hash(&entry.path));
49 let index_path = index_dir.join(file_name);
50
51 let markdown = format!(
52 "# File Index: {}\n\n\
53 - **Path**: {}\n\
54 - **Hash**: {}\n\
55 - **Modified**: {}\n\
56 - **Size**: {} bytes\n\
57 - **Language**: {}\n\
58 - **Tags**: {}\n\n",
59 entry.path,
60 entry.path,
61 entry.hash,
62 entry.modified,
63 entry.size,
64 entry.language,
65 entry.tags.join(", ")
66 );
67
68 fs::write(index_path, markdown)?;
69 Ok(())
70 }
71}
72
73#[derive(Debug, Default, Clone)]
75pub struct ConfigTraversalFilter;
76
77impl TraversalFilter for ConfigTraversalFilter {
78 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
79 !should_skip_dir(path, config)
80 }
81
82 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
83 if !path.is_file() {
84 return false;
85 }
86
87 if config.ignore_hidden
89 && path
90 .file_name()
91 .and_then(|n| n.to_str())
92 .map_or(false, |s| s.starts_with('.'))
93 {
94 return false;
95 }
96
97 if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
99 let is_sensitive = matches!(
100 file_name,
101 ".env"
102 | ".env.local"
103 | ".env.production"
104 | ".env.development"
105 | ".env.test"
106 | ".git"
107 | ".gitignore"
108 | ".DS_Store"
109 ) || file_name.starts_with(".env.");
110 if is_sensitive {
111 return false;
112 }
113 }
114
115 true
116 }
117}
118
119#[derive(Clone, Debug)]
121pub struct SimpleIndexerConfig {
122 workspace_root: PathBuf,
123 index_dir: PathBuf,
124 ignore_hidden: bool,
125 excluded_dirs: Vec<PathBuf>,
126 allowed_dirs: Vec<PathBuf>,
127}
128
129impl SimpleIndexerConfig {
130 pub fn new(workspace_root: PathBuf) -> Self {
132 let index_dir = workspace_root.join(".vtcode").join("index");
133 let vtcode_dir = workspace_root.join(".vtcode");
134 let external_dir = vtcode_dir.join("external");
135
136 let mut excluded_dirs = vec![
137 index_dir.clone(),
138 vtcode_dir,
139 workspace_root.join("target"),
140 workspace_root.join("node_modules"),
141 ];
142
143 excluded_dirs.dedup();
144
145 Self {
146 workspace_root,
147 index_dir,
148 ignore_hidden: true,
149 excluded_dirs,
150 allowed_dirs: vec![external_dir],
151 }
152 }
153
154 pub fn with_index_dir(mut self, index_dir: impl Into<PathBuf>) -> Self {
156 let index_dir = index_dir.into();
157 self.index_dir = index_dir.clone();
158 self.push_unique_excluded(index_dir);
159 self
160 }
161
162 pub fn add_allowed_dir(mut self, path: impl Into<PathBuf>) -> Self {
164 let path = path.into();
165 if !self.allowed_dirs.iter().any(|existing| existing == &path) {
166 self.allowed_dirs.push(path);
167 }
168 self
169 }
170
171 pub fn add_excluded_dir(mut self, path: impl Into<PathBuf>) -> Self {
173 let path = path.into();
174 self.push_unique_excluded(path);
175 self
176 }
177
178 pub fn ignore_hidden(mut self, ignore_hidden: bool) -> Self {
180 self.ignore_hidden = ignore_hidden;
181 self
182 }
183
184 pub fn workspace_root(&self) -> &Path {
186 &self.workspace_root
187 }
188
189 pub fn index_dir(&self) -> &Path {
191 &self.index_dir
192 }
193
194 fn push_unique_excluded(&mut self, path: PathBuf) {
195 if !self.excluded_dirs.iter().any(|existing| existing == &path) {
196 self.excluded_dirs.push(path);
197 }
198 }
199}
200
201#[derive(Debug, Clone, Serialize, Deserialize)]
203pub struct FileIndex {
204 pub path: String,
206 pub hash: String,
208 pub modified: u64,
210 pub size: u64,
212 pub language: String,
214 pub tags: Vec<String>,
216}
217
218#[derive(Debug, Clone, Serialize, Deserialize)]
220pub struct SearchResult {
221 pub file_path: String,
222 pub line_number: usize,
223 pub line_content: String,
224 pub matches: Vec<String>,
225}
226
227pub struct SimpleIndexer {
229 config: SimpleIndexerConfig,
230 index_cache: HashMap<String, FileIndex>,
231 storage: Arc<dyn IndexStorage>,
232 filter: Arc<dyn TraversalFilter>,
233}
234
235impl SimpleIndexer {
236 pub fn new(workspace_root: PathBuf) -> Self {
238 Self::with_components(
239 SimpleIndexerConfig::new(workspace_root),
240 Arc::new(MarkdownIndexStorage),
241 Arc::new(ConfigTraversalFilter),
242 )
243 }
244
245 pub fn with_config(config: SimpleIndexerConfig) -> Self {
247 Self::with_components(
248 config,
249 Arc::new(MarkdownIndexStorage),
250 Arc::new(ConfigTraversalFilter),
251 )
252 }
253
254 pub fn with_index_dir(workspace_root: PathBuf, index_dir: PathBuf) -> Self {
256 let config = SimpleIndexerConfig::new(workspace_root).with_index_dir(index_dir);
257 Self::with_config(config)
258 }
259
260 pub fn with_components(
262 config: SimpleIndexerConfig,
263 storage: Arc<dyn IndexStorage>,
264 filter: Arc<dyn TraversalFilter>,
265 ) -> Self {
266 Self {
267 config,
268 index_cache: HashMap::new(),
269 storage,
270 filter,
271 }
272 }
273
274 pub fn with_storage(self, storage: Arc<dyn IndexStorage>) -> Self {
276 Self { storage, ..self }
277 }
278
279 pub fn with_filter(self, filter: Arc<dyn TraversalFilter>) -> Self {
281 Self { filter, ..self }
282 }
283
284 pub fn init(&self) -> Result<()> {
286 self.storage.init(self.config.index_dir())
287 }
288
289 pub fn workspace_root(&self) -> &Path {
291 self.config.workspace_root()
292 }
293
294 pub fn index_dir(&self) -> &Path {
296 self.config.index_dir()
297 }
298
299 pub fn index_file(&mut self, file_path: &Path) -> Result<()> {
301 if !file_path.exists() || !self.filter.should_index_file(file_path, &self.config) {
302 return Ok(());
303 }
304
305 let content = match fs::read_to_string(file_path) {
306 Ok(text) => text,
307 Err(err) => {
308 if err.kind() == ErrorKind::InvalidData {
309 return Ok(());
310 }
311 return Err(err.into());
312 }
313 };
314 let hash = calculate_hash(&content);
315 let modified = self.get_modified_time(file_path)?;
316 let size = content.len() as u64;
317 let language = self.detect_language(file_path);
318
319 let index = FileIndex {
320 path: file_path.to_string_lossy().to_string(),
321 hash,
322 modified,
323 size,
324 language,
325 tags: vec![],
326 };
327
328 self.index_cache
329 .insert(file_path.to_string_lossy().to_string(), index.clone());
330
331 self.storage.persist(self.config.index_dir(), &index)?;
332
333 Ok(())
334 }
335
336 pub fn index_directory(&mut self, dir_path: &Path) -> Result<()> {
340 let walker = WalkBuilder::new(dir_path)
341 .hidden(true) .git_ignore(true) .git_global(true) .git_exclude(true) .ignore(true) .parents(true) .build();
348
349 for entry in walker.filter_map(|e| e.ok()) {
350 let path = entry.path();
351
352 if entry.file_type().is_some_and(|ft| ft.is_file()) {
354 let should_skip = self
356 .config
357 .excluded_dirs
358 .iter()
359 .any(|excluded| path.starts_with(excluded));
360
361 if !should_skip && self.filter.should_index_file(path, &self.config) {
362 self.index_file(path)?;
363 }
364 }
365 }
366
367 Ok(())
368 }
369
370 pub fn search(&self, pattern: &str, path_filter: Option<&str>) -> Result<Vec<SearchResult>> {
372 let regex = Regex::new(pattern)?;
373
374 let mut results = Vec::new();
375
376 for file_path in self.index_cache.keys() {
378 if path_filter.is_some_and(|filter| !file_path.contains(filter)) {
379 continue;
380 }
381
382 if let Ok(content) = fs::read_to_string(file_path) {
383 for (line_num, line) in content.lines().enumerate() {
384 if regex.is_match(line) {
385 let matches: Vec<String> = regex
386 .find_iter(line)
387 .map(|m| m.as_str().to_string())
388 .collect();
389
390 results.push(SearchResult {
391 file_path: file_path.clone(),
392 line_number: line_num + 1,
393 line_content: line.to_string(),
394 matches,
395 });
396 }
397 }
398 }
399 }
400
401 Ok(results)
402 }
403
404 pub fn find_files(&self, pattern: &str) -> Result<Vec<String>> {
406 let regex = Regex::new(pattern)?;
407 let mut results = Vec::new();
408
409 for file_path in self.index_cache.keys() {
410 if regex.is_match(file_path) {
411 results.push(file_path.clone());
412 }
413 }
414
415 Ok(results)
416 }
417
418 pub fn all_files(&self) -> Vec<String> {
421 self.index_cache.keys().cloned().collect()
422 }
423
424 pub fn get_file_content(
426 &self,
427 file_path: &str,
428 start_line: Option<usize>,
429 end_line: Option<usize>,
430 ) -> Result<String> {
431 let content = fs::read_to_string(file_path)?;
432 let lines: Vec<&str> = content.lines().collect();
433
434 let start = start_line.unwrap_or(1).saturating_sub(1);
435 let end = end_line.unwrap_or(lines.len());
436
437 let selected_lines = &lines[start..end.min(lines.len())];
438
439 let mut result = String::new();
440 for (i, line) in selected_lines.iter().enumerate() {
441 result.push_str(&format!("{}: {}\n", start + i + 1, line));
442 }
443
444 Ok(result)
445 }
446
447 pub fn list_files(&self, dir_path: &str, show_hidden: bool) -> Result<Vec<String>> {
449 let path = Path::new(dir_path);
450 if !path.exists() {
451 return Ok(vec![]);
452 }
453
454 let mut files = Vec::new();
455
456 for entry in fs::read_dir(path)? {
457 let entry = entry?;
458 let file_name = entry.file_name().to_string_lossy().to_string();
459
460 if !show_hidden && file_name.starts_with('.') {
461 continue;
462 }
463
464 files.push(file_name);
465 }
466
467 Ok(files)
468 }
469
470 pub fn grep(&self, pattern: &str, file_pattern: Option<&str>) -> Result<Vec<SearchResult>> {
472 let regex = Regex::new(pattern)?;
473 let mut results = Vec::new();
474
475 for file_path in self.index_cache.keys() {
476 if file_pattern.is_some_and(|fp| !file_path.contains(fp)) {
477 continue;
478 }
479
480 if let Ok(content) = fs::read_to_string(file_path) {
481 for (line_num, line) in content.lines().enumerate() {
482 if regex.is_match(line) {
483 results.push(SearchResult {
484 file_path: file_path.clone(),
485 line_number: line_num + 1,
486 line_content: line.to_string(),
487 matches: vec![line.to_string()],
488 });
489 }
490 }
491 }
492 }
493
494 Ok(results)
495 }
496
497 #[allow(dead_code)]
498 fn walk_directory<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
499 where
500 F: FnMut(&Path) -> Result<()>,
501 {
502 if !dir_path.exists() {
503 return Ok(());
504 }
505
506 self.walk_directory_internal(dir_path, callback)
507 }
508
509 #[allow(dead_code)]
510 fn walk_directory_internal<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
511 where
512 F: FnMut(&Path) -> Result<()>,
513 {
514 for entry in fs::read_dir(dir_path)? {
515 let entry = entry?;
516 let path = entry.path();
517
518 if path.is_dir() {
519 if self.is_allowed_dir(&path) {
520 self.walk_directory_internal(&path, callback)?;
521 continue;
522 }
523
524 if !self.filter.should_descend(&path, &self.config) {
525 self.walk_allowed_descendants(&path, callback)?;
526 continue;
527 }
528
529 self.walk_directory_internal(&path, callback)?;
530 } else if path.is_file() {
531 callback(&path)?;
532 }
533 }
534
535 Ok(())
536 }
537
538 #[allow(dead_code)]
539 fn is_allowed_dir(&self, path: &Path) -> bool {
540 self.config
541 .allowed_dirs
542 .iter()
543 .any(|allowed| path.starts_with(allowed))
544 }
545
546 #[allow(dead_code)]
547 fn walk_allowed_descendants<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
548 where
549 F: FnMut(&Path) -> Result<()>,
550 {
551 let allowed_dirs = self.config.allowed_dirs.clone();
552 for allowed in allowed_dirs {
553 if allowed.starts_with(dir_path) && allowed.exists() {
554 self.walk_directory_internal(&allowed, callback)?;
555 }
556 }
557 Ok(())
558 }
559
560 fn get_modified_time(&self, file_path: &Path) -> Result<u64> {
561 let metadata = fs::metadata(file_path)?;
562 let modified = metadata.modified()?;
563 Ok(modified.duration_since(SystemTime::UNIX_EPOCH)?.as_secs())
564 }
565
566 fn detect_language(&self, file_path: &Path) -> String {
567 file_path
568 .extension()
569 .and_then(|ext| ext.to_str())
570 .unwrap_or("unknown")
571 .to_string()
572 }
573}
574
575impl Clone for SimpleIndexer {
576 fn clone(&self) -> Self {
577 Self {
578 config: self.config.clone(),
579 index_cache: self.index_cache.clone(),
580 storage: self.storage.clone(),
581 filter: self.filter.clone(),
582 }
583 }
584}
585
586fn should_skip_dir(path: &Path, config: &SimpleIndexerConfig) -> bool {
587 if config
588 .allowed_dirs
589 .iter()
590 .any(|allowed| path.starts_with(allowed))
591 {
592 return false;
593 }
594
595 if config
596 .excluded_dirs
597 .iter()
598 .any(|excluded| path.starts_with(excluded))
599 {
600 return true;
601 }
602
603 if config.ignore_hidden
604 && path
605 .file_name()
606 .and_then(|name| name.to_str())
607 .is_some_and(|name_str| name_str.starts_with('.'))
608 {
609 return true;
610 }
611
612 false
613}
614
615fn calculate_hash(content: &str) -> String {
616 use std::collections::hash_map::DefaultHasher;
617 use std::hash::{Hash, Hasher};
618
619 let mut hasher = DefaultHasher::new();
620 content.hash(&mut hasher);
621 format!("{:x}", hasher.finish())
622}
623
624#[cfg(test)]
625mod tests {
626 use super::*;
627 use std::fs;
628 use std::sync::{Arc, Mutex};
629 use tempfile::tempdir;
630
631 #[test]
632 fn skips_hidden_directories_by_default() -> Result<()> {
633 let temp = tempdir()?;
634 let workspace = temp.path();
635 let hidden_dir = workspace.join(".private");
636 fs::create_dir_all(&hidden_dir)?;
637 fs::write(hidden_dir.join("secret.txt"), "classified")?;
638
639 let visible_dir = workspace.join("src");
640 fs::create_dir_all(&visible_dir)?;
641 fs::write(visible_dir.join("lib.rs"), "fn main() {}")?;
642
643 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
644 indexer.init()?;
645 indexer.index_directory(workspace)?;
646
647 assert!(indexer.find_files("secret\\.txt$")?.is_empty());
648 assert!(!indexer.find_files("lib\\.rs$")?.is_empty());
649
650 Ok(())
651 }
652
653 #[test]
654 fn can_include_hidden_directories_when_configured() -> Result<()> {
655 let temp = tempdir()?;
656 let workspace = temp.path();
657 let hidden_dir = workspace.join(".cache");
658 fs::create_dir_all(&hidden_dir)?;
659 fs::write(hidden_dir.join("data.log"), "details")?;
660
661 let config = SimpleIndexerConfig::new(workspace.to_path_buf()).ignore_hidden(false);
662 let mut indexer = SimpleIndexer::with_config(config);
663 indexer.init()?;
664 indexer.index_directory(workspace)?;
665
666 let results = indexer.find_files("data\\.log$")?;
667 assert_eq!(results.len(), 1);
668
669 Ok(())
670 }
671
672 #[test]
673 fn supports_custom_storage_backends() -> Result<()> {
674 #[derive(Clone, Default)]
675 struct MemoryStorage {
676 records: Arc<Mutex<Vec<FileIndex>>>,
677 }
678
679 impl MemoryStorage {
680 fn new(records: Arc<Mutex<Vec<FileIndex>>>) -> Self {
681 Self { records }
682 }
683 }
684
685 impl IndexStorage for MemoryStorage {
686 fn init(&self, _index_dir: &Path) -> Result<()> {
687 Ok(())
688 }
689
690 fn persist(&self, _index_dir: &Path, entry: &FileIndex) -> Result<()> {
691 let mut guard = self.records.lock().expect("lock poisoned");
692 guard.push(entry.clone());
693 Ok(())
694 }
695 }
696
697 let temp = tempdir()?;
698 let workspace = temp.path();
699 fs::write(workspace.join("notes.txt"), "remember this")?;
700
701 let records: Arc<Mutex<Vec<FileIndex>>> = Arc::new(Mutex::new(Vec::new()));
702 let storage = MemoryStorage::new(records.clone());
703
704 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
705 let mut indexer = SimpleIndexer::with_config(config).with_storage(Arc::new(storage));
706 indexer.init()?;
707 indexer.index_directory(workspace)?;
708
709 let entries = records.lock().expect("lock poisoned");
710 assert_eq!(entries.len(), 1);
711 assert_eq!(
712 entries[0].path,
713 workspace.join("notes.txt").to_string_lossy().to_string()
714 );
715
716 Ok(())
717 }
718
719 #[test]
720 fn custom_filters_can_skip_files() -> Result<()> {
721 #[derive(Default)]
722 struct SkipRustFilter {
723 inner: ConfigTraversalFilter,
724 }
725
726 impl TraversalFilter for SkipRustFilter {
727 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
728 self.inner.should_descend(path, config)
729 }
730
731 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
732 if path
733 .extension()
734 .and_then(|ext| ext.to_str())
735 .is_some_and(|ext| ext.eq_ignore_ascii_case("rs"))
736 {
737 return false;
738 }
739
740 self.inner.should_index_file(path, config)
741 }
742 }
743
744 let temp = tempdir()?;
745 let workspace = temp.path();
746 fs::write(workspace.join("lib.rs"), "fn main() {}")?;
747 fs::write(workspace.join("README.md"), "# Notes")?;
748
749 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
750 let mut indexer =
751 SimpleIndexer::with_config(config).with_filter(Arc::new(SkipRustFilter::default()));
752 indexer.init()?;
753 indexer.index_directory(workspace)?;
754
755 assert!(indexer.find_files("lib\\.rs$")?.is_empty());
756 assert!(!indexer.find_files("README\\.md$")?.is_empty());
757
758 Ok(())
759 }
760}