1use anyhow::Result;
9use hashbrown::HashMap;
10use ignore::{DirEntry, Walk, WalkBuilder};
11use regex::Regex;
12use serde::{Deserialize, Serialize};
13use std::fmt::Write as FmtWrite;
14use std::fs;
15use std::io::{BufWriter, ErrorKind, Write};
16use std::path::{Path, PathBuf};
17use std::sync::Arc;
18use std::time::SystemTime;
19
20pub trait IndexStorage: Send + Sync {
22 fn init(&self, index_dir: &Path) -> Result<()>;
24
25 fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()>;
27
28 fn prefers_snapshot_persistence(&self) -> bool {
34 false
35 }
36
37 fn remove(&self, _index_dir: &Path, _file_path: &Path) -> Result<()> {
41 Ok(())
42 }
43
44 fn persist_batch(&self, index_dir: &Path, entries: &[FileIndex]) -> Result<()> {
49 for entry in entries {
50 self.persist(index_dir, entry)?;
51 }
52 Ok(())
53 }
54}
55
56pub trait TraversalFilter: Send + Sync {
58 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
60
61 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
63}
64
65#[derive(Debug, Default, Clone)]
67pub struct MarkdownIndexStorage;
68
69impl IndexStorage for MarkdownIndexStorage {
70 fn init(&self, index_dir: &Path) -> Result<()> {
71 fs::create_dir_all(index_dir)?;
72 Ok(())
73 }
74
75 fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()> {
76 fs::create_dir_all(index_dir)?;
77 let file_name = format!("{}.md", calculate_hash(&entry.path));
78 let index_path = index_dir.join(file_name);
79 let file = fs::File::create(index_path)?;
80 let mut writer = BufWriter::new(file);
81 writeln!(writer, "# File Index: {}", entry.path)?;
82 writeln!(writer)?;
83 write_markdown_fields(&mut writer, entry)?;
84 writer.flush()?;
85 Ok(())
86 }
87
88 fn prefers_snapshot_persistence(&self) -> bool {
89 true
90 }
91
92 fn remove(&self, index_dir: &Path, file_path: &Path) -> Result<()> {
93 let file_name = format!(
94 "{}.md",
95 calculate_hash(file_path.to_string_lossy().as_ref())
96 );
97 let index_path = index_dir.join(file_name);
98 match fs::remove_file(index_path) {
99 Ok(()) => Ok(()),
100 Err(err) if err.kind() == ErrorKind::NotFound => Ok(()),
101 Err(err) => Err(err.into()),
102 }
103 }
104
105 fn persist_batch(&self, index_dir: &Path, entries: &[FileIndex]) -> Result<()> {
106 fs::create_dir_all(index_dir)?;
107 let temp_path = index_dir.join(".index.md.tmp");
108 let final_path = index_dir.join("index.md");
109 let file = fs::File::create(&temp_path)?;
110 let mut writer = BufWriter::new(file);
111
112 writeln!(writer, "# Workspace File Index")?;
113 writeln!(writer)?;
114 writeln!(writer, "- **Entries**: {}", entries.len())?;
115 writeln!(writer)?;
116
117 for entry in entries {
118 write_markdown_entry(&mut writer, entry)?;
119 }
120
121 writer.flush()?;
122 fs::rename(temp_path, final_path)?;
123 cleanup_legacy_markdown_entries(index_dir)?;
124 Ok(())
125 }
126}
127
128#[derive(Debug, Default, Clone)]
130pub struct ConfigTraversalFilter;
131
132impl TraversalFilter for ConfigTraversalFilter {
133 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
134 !should_skip_dir(path, config)
135 }
136
137 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
138 if !path.is_file() {
139 return false;
140 }
141
142 if config.ignore_hidden
144 && path
145 .file_name()
146 .and_then(|n| n.to_str())
147 .is_some_and(|s| s.starts_with('.'))
148 {
149 return false;
150 }
151
152 if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
154 let is_sensitive = matches!(
155 file_name,
156 ".env"
157 | ".env.local"
158 | ".env.production"
159 | ".env.development"
160 | ".env.test"
161 | ".git"
162 | ".gitignore"
163 | ".DS_Store"
164 ) || file_name.starts_with(".env.");
165 if is_sensitive {
166 return false;
167 }
168 }
169
170 true
171 }
172}
173
174#[derive(Clone, Debug)]
176pub struct SimpleIndexerConfig {
177 workspace_root: PathBuf,
178 index_dir: PathBuf,
179 ignore_hidden: bool,
180 excluded_dirs: Vec<PathBuf>,
181 allowed_dirs: Vec<PathBuf>,
182}
183
184impl SimpleIndexerConfig {
185 pub fn new(workspace_root: PathBuf) -> Self {
187 let index_dir = workspace_root.join(".vtcode").join("index");
188 let vtcode_dir = workspace_root.join(".vtcode");
189 let external_dir = vtcode_dir.join("external");
190
191 let mut excluded_dirs = vec![
192 index_dir.clone(),
193 vtcode_dir,
194 workspace_root.join("target"),
195 workspace_root.join("node_modules"),
196 ];
197
198 excluded_dirs.dedup();
199
200 Self {
201 workspace_root,
202 index_dir,
203 ignore_hidden: true,
204 excluded_dirs,
205 allowed_dirs: vec![external_dir],
206 }
207 }
208
209 pub fn with_index_dir(mut self, index_dir: impl Into<PathBuf>) -> Self {
211 let index_dir = index_dir.into();
212 self.index_dir = index_dir.clone();
213 self.push_unique_excluded(index_dir);
214 self
215 }
216
217 pub fn add_allowed_dir(mut self, path: impl Into<PathBuf>) -> Self {
219 let path = path.into();
220 if !self.allowed_dirs.iter().any(|existing| existing == &path) {
221 self.allowed_dirs.push(path);
222 }
223 self
224 }
225
226 pub fn add_excluded_dir(mut self, path: impl Into<PathBuf>) -> Self {
228 let path = path.into();
229 self.push_unique_excluded(path);
230 self
231 }
232
233 pub fn ignore_hidden(mut self, ignore_hidden: bool) -> Self {
235 self.ignore_hidden = ignore_hidden;
236 self
237 }
238
239 pub fn workspace_root(&self) -> &Path {
241 &self.workspace_root
242 }
243
244 pub fn index_dir(&self) -> &Path {
246 &self.index_dir
247 }
248
249 fn push_unique_excluded(&mut self, path: PathBuf) {
250 if !self.excluded_dirs.iter().any(|existing| existing == &path) {
251 self.excluded_dirs.push(path);
252 }
253 }
254}
255
256#[derive(Debug, Clone, Serialize, Deserialize)]
258pub struct FileIndex {
259 pub path: String,
261 pub hash: String,
263 pub modified: u64,
265 pub size: u64,
267 pub language: String,
269 pub tags: Vec<String>,
271}
272
273#[derive(Debug, Clone, Serialize, Deserialize)]
275pub struct SearchResult {
276 pub file_path: String,
277 pub line_number: usize,
278 pub line_content: String,
279 pub matches: Vec<String>,
280}
281
282pub struct SimpleIndexer {
284 config: SimpleIndexerConfig,
285 index_cache: HashMap<String, FileIndex>,
286 storage: Arc<dyn IndexStorage>,
287 filter: Arc<dyn TraversalFilter>,
288}
289
290impl SimpleIndexer {
291 pub fn new(workspace_root: PathBuf) -> Self {
293 Self::with_components(
294 SimpleIndexerConfig::new(workspace_root),
295 Arc::new(MarkdownIndexStorage),
296 Arc::new(ConfigTraversalFilter),
297 )
298 }
299
300 pub fn with_config(config: SimpleIndexerConfig) -> Self {
302 Self::with_components(
303 config,
304 Arc::new(MarkdownIndexStorage),
305 Arc::new(ConfigTraversalFilter),
306 )
307 }
308
309 pub fn with_index_dir(workspace_root: PathBuf, index_dir: PathBuf) -> Self {
311 let config = SimpleIndexerConfig::new(workspace_root).with_index_dir(index_dir);
312 Self::with_config(config)
313 }
314
315 pub fn with_components(
317 config: SimpleIndexerConfig,
318 storage: Arc<dyn IndexStorage>,
319 filter: Arc<dyn TraversalFilter>,
320 ) -> Self {
321 Self {
322 config,
323 index_cache: HashMap::new(),
324 storage,
325 filter,
326 }
327 }
328
329 pub fn with_storage(self, storage: Arc<dyn IndexStorage>) -> Self {
331 Self { storage, ..self }
332 }
333
334 pub fn with_filter(self, filter: Arc<dyn TraversalFilter>) -> Self {
336 Self { filter, ..self }
337 }
338
339 pub fn init(&self) -> Result<()> {
341 self.storage.init(self.config.index_dir())
342 }
343
344 pub fn workspace_root(&self) -> &Path {
346 self.config.workspace_root()
347 }
348
349 pub fn index_dir(&self) -> &Path {
351 self.config.index_dir()
352 }
353
354 pub fn index_file(&mut self, file_path: &Path) -> Result<()> {
356 let cache_key = file_path.to_string_lossy().into_owned();
357
358 if self.storage.prefers_snapshot_persistence() {
359 let mut next_cache = self.index_cache.clone();
360
361 if file_path.exists() && self.should_process_file_path(file_path) {
362 if let Some(index) = self.build_file_index(file_path)? {
363 next_cache.insert(index.path.clone(), index);
364 } else {
365 next_cache.remove(cache_key.as_str());
366 }
367 } else {
368 next_cache.remove(cache_key.as_str());
369 }
370
371 let mut snapshot = next_cache.values().cloned().collect::<Vec<_>>();
372 snapshot.sort_unstable_by(|left, right| left.path.cmp(&right.path));
373 self.storage
374 .persist_batch(self.config.index_dir(), &snapshot)?;
375 self.index_cache = next_cache;
376 return Ok(());
377 }
378
379 if !file_path.exists() || !self.should_process_file_path(file_path) {
380 self.index_cache.remove(cache_key.as_str());
381 self.storage.remove(self.config.index_dir(), file_path)?;
382 return Ok(());
383 }
384
385 if let Some(index) = self.build_file_index(file_path)? {
386 self.storage.persist(self.config.index_dir(), &index)?;
387 self.index_cache.insert(index.path.clone(), index);
388 } else {
389 self.index_cache.remove(cache_key.as_str());
390 self.storage.remove(self.config.index_dir(), file_path)?;
391 }
392
393 Ok(())
394 }
395
396 pub fn index_directory(&mut self, dir_path: &Path) -> Result<()> {
400 let walker = self.build_walker(dir_path);
401
402 let mut entries = Vec::new();
403
404 for entry in walker.filter_map(|e| e.ok()) {
405 let path = entry.path();
406
407 if entry.file_type().is_some_and(|ft| ft.is_file())
409 && let Some(index) = self.build_file_index(path)?
410 {
411 entries.push(index);
412 }
413 }
414
415 if self.storage.prefers_snapshot_persistence() {
416 let mut persisted_entries = self
417 .index_cache
418 .iter()
419 .filter(|(path, _)| !Path::new(path).starts_with(dir_path))
420 .map(|(_, entry)| entry.clone())
421 .collect::<Vec<_>>();
422 persisted_entries.extend(entries.iter().cloned());
423 persisted_entries.sort_unstable_by(|left, right| left.path.cmp(&right.path));
424 self.storage
425 .persist_batch(self.config.index_dir(), &persisted_entries)?;
426 } else {
427 entries.sort_unstable_by(|left, right| left.path.cmp(&right.path));
428 self.storage
429 .persist_batch(self.config.index_dir(), &entries)?;
430 }
431
432 self.replace_cached_entries(dir_path, &entries);
433
434 Ok(())
435 }
436
437 pub fn discover_files(&self, dir_path: &Path) -> Vec<String> {
440 let walker = self.build_walker(dir_path);
441
442 let mut files = walker
443 .filter_map(|e| e.ok())
444 .filter(|e| {
445 if !e.file_type().is_some_and(|ft| ft.is_file()) {
446 return false;
447 }
448
449 self.should_process_file_path(e.path())
450 })
451 .map(|e| e.path().to_string_lossy().into_owned())
452 .collect::<Vec<_>>();
453 files.sort_unstable();
454 files
455 }
456
457 fn search_files_internal(
460 &self,
461 regex: &Regex,
462 path_filter: Option<&str>,
463 extract_matches: bool,
464 ) -> Vec<SearchResult> {
465 let mut results = Vec::new();
466
467 for file_path in self.index_cache.keys() {
468 if path_filter.is_some_and(|filter| !file_path.contains(filter)) {
469 continue;
470 }
471
472 if let Ok(content) = fs::read_to_string(file_path) {
473 for (line_num, line) in content.lines().enumerate() {
474 if regex.is_match(line) {
475 let matches = if extract_matches {
476 regex
477 .find_iter(line)
478 .map(|m| m.as_str().to_string())
479 .collect()
480 } else {
481 vec![line.to_string()]
482 };
483
484 results.push(SearchResult {
485 file_path: file_path.clone(),
486 line_number: line_num + 1,
487 line_content: line.to_string(),
488 matches,
489 });
490 }
491 }
492 }
493 }
494
495 results.sort_unstable_by(|left, right| {
496 left.file_path
497 .cmp(&right.file_path)
498 .then_with(|| left.line_number.cmp(&right.line_number))
499 });
500 results
501 }
502
503 pub fn search(&self, pattern: &str, path_filter: Option<&str>) -> Result<Vec<SearchResult>> {
505 let regex = Regex::new(pattern)?;
506 Ok(self.search_files_internal(®ex, path_filter, true))
507 }
508
509 pub fn find_files(&self, pattern: &str) -> Result<Vec<String>> {
511 let regex = Regex::new(pattern)?;
512 let mut results = Vec::new();
513
514 for file_path in self.index_cache.keys() {
515 if regex.is_match(file_path) {
516 results.push(file_path.clone());
517 }
518 }
519
520 results.sort_unstable();
521 Ok(results)
522 }
523
524 pub fn all_files(&self) -> Vec<String> {
527 let mut files = self.index_cache.keys().cloned().collect::<Vec<_>>();
528 files.sort_unstable();
529 files
530 }
531
532 pub fn get_file_content(
534 &self,
535 file_path: &str,
536 start_line: Option<usize>,
537 end_line: Option<usize>,
538 ) -> Result<String> {
539 let content = fs::read_to_string(file_path)?;
540 let start = start_line.unwrap_or(1).max(1);
541 let end = end_line.unwrap_or(usize::MAX);
542
543 if start > end {
544 return Ok(String::new());
545 }
546
547 let mut result = String::new();
548 for (line_number, line) in content.lines().enumerate() {
549 let line_number = line_number + 1;
550 if line_number < start {
551 continue;
552 }
553 if line_number > end {
554 break;
555 }
556 writeln!(&mut result, "{line_number}: {line}")?;
557 }
558
559 Ok(result)
560 }
561
562 pub fn list_files(&self, dir_path: &str, show_hidden: bool) -> Result<Vec<String>> {
564 let path = Path::new(dir_path);
565 if !path.exists() {
566 return Ok(vec![]);
567 }
568
569 let mut files = Vec::new();
570
571 for entry in fs::read_dir(path)? {
572 let entry = entry?;
573 let file_name = entry.file_name().to_string_lossy().into_owned();
574
575 if !show_hidden && file_name.starts_with('.') {
576 continue;
577 }
578
579 files.push(file_name);
580 }
581
582 files.sort_unstable();
583 Ok(files)
584 }
585
586 pub fn grep(&self, pattern: &str, file_pattern: Option<&str>) -> Result<Vec<SearchResult>> {
588 let regex = Regex::new(pattern)?;
589 Ok(self.search_files_internal(®ex, file_pattern, false))
590 }
591
592 #[allow(dead_code)]
593 fn walk_directory<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
594 where
595 F: FnMut(&Path) -> Result<()>,
596 {
597 if !dir_path.exists() {
598 return Ok(());
599 }
600
601 self.walk_directory_internal(dir_path, callback)
602 }
603
604 #[allow(dead_code)]
605 fn walk_directory_internal<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
606 where
607 F: FnMut(&Path) -> Result<()>,
608 {
609 for entry in fs::read_dir(dir_path)? {
610 let entry = entry?;
611 let path = entry.path();
612
613 if path.is_dir() {
614 if self.is_allowed_path(&path) {
615 self.walk_directory_internal(&path, callback)?;
616 continue;
617 }
618
619 if !self.filter.should_descend(&path, &self.config) {
620 self.walk_allowed_descendants(&path, callback)?;
621 continue;
622 }
623
624 self.walk_directory_internal(&path, callback)?;
625 } else if path.is_file() {
626 callback(&path)?;
627 }
628 }
629
630 Ok(())
631 }
632
633 #[allow(dead_code)]
634 fn is_allowed_path(&self, path: &Path) -> bool {
635 self.config
636 .allowed_dirs
637 .iter()
638 .any(|allowed| path.starts_with(allowed))
639 }
640
641 #[allow(dead_code)]
642 fn walk_allowed_descendants<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
643 where
644 F: FnMut(&Path) -> Result<()>,
645 {
646 let allowed_dirs = self.config.allowed_dirs.clone();
647 for allowed in allowed_dirs {
648 if allowed.starts_with(dir_path) && allowed.exists() {
649 self.walk_directory_internal(&allowed, callback)?;
650 }
651 }
652 Ok(())
653 }
654
655 #[inline]
656 fn get_modified_time(&self, file_path: &Path) -> Result<u64> {
657 let metadata = fs::metadata(file_path)?;
658 let modified = metadata.modified()?;
659 Ok(modified.duration_since(SystemTime::UNIX_EPOCH)?.as_secs())
660 }
661
662 #[inline]
663 fn detect_language(&self, file_path: &Path) -> String {
664 file_path
665 .extension()
666 .and_then(|ext| ext.to_str())
667 .unwrap_or("unknown")
668 .to_string()
669 }
670
671 fn build_file_index(&self, file_path: &Path) -> Result<Option<FileIndex>> {
672 if !self.should_process_file_path(file_path) {
673 return Ok(None);
674 }
675
676 let content = match fs::read_to_string(file_path) {
677 Ok(text) => text,
678 Err(err) => {
679 if err.kind() == ErrorKind::InvalidData {
680 return Ok(None);
681 }
682 return Err(err.into());
683 }
684 };
685
686 let index = FileIndex {
687 path: file_path.to_string_lossy().into_owned(),
688 hash: calculate_hash(&content),
689 modified: self.get_modified_time(file_path)?,
690 size: content.len() as u64,
691 language: self.detect_language(file_path),
692 tags: vec![],
693 };
694
695 Ok(Some(index))
696 }
697
698 #[inline]
699 fn is_excluded_path(&self, path: &Path) -> bool {
700 self.config
701 .excluded_dirs
702 .iter()
703 .any(|excluded| path.starts_with(excluded))
704 }
705
706 #[inline]
707 fn should_index_file_path(&self, path: &Path) -> bool {
708 self.filter.should_index_file(path, &self.config)
709 }
710
711 #[inline]
712 fn should_process_file_path(&self, path: &Path) -> bool {
713 if self.is_allowed_path(path) {
714 return self.should_index_file_path(path);
715 }
716
717 !self.is_excluded_path(path) && self.should_index_file_path(path)
718 }
719
720 fn build_walker(&self, dir_path: &Path) -> Walk {
721 let walk_root = dir_path.to_path_buf();
722 let config = self.config.clone();
723 let filter = Arc::clone(&self.filter);
724
725 let mut builder = WalkBuilder::new(dir_path);
726 builder
727 .hidden(false)
728 .git_ignore(true)
729 .git_global(true)
730 .git_exclude(true)
731 .ignore(true)
732 .parents(true);
733 builder.filter_entry(move |entry| {
734 should_visit_entry(entry, walk_root.as_path(), &config, filter.as_ref())
735 });
736 builder.build()
737 }
738
739 fn replace_cached_entries(&mut self, dir_path: &Path, entries: &[FileIndex]) {
740 self.index_cache
741 .retain(|path, _| !Path::new(path).starts_with(dir_path));
742
743 self.index_cache.extend(
744 entries
745 .iter()
746 .cloned()
747 .map(|entry| (entry.path.clone(), entry)),
748 );
749 }
750}
751
752impl Clone for SimpleIndexer {
753 fn clone(&self) -> Self {
754 Self {
755 config: self.config.clone(),
756 index_cache: self.index_cache.clone(),
757 storage: self.storage.clone(),
758 filter: self.filter.clone(),
759 }
760 }
761}
762
763fn should_skip_dir(path: &Path, config: &SimpleIndexerConfig) -> bool {
764 if is_allowed_path_or_ancestor(path, config) {
765 return false;
766 }
767
768 if config
769 .excluded_dirs
770 .iter()
771 .any(|excluded| path.starts_with(excluded))
772 {
773 return true;
774 }
775
776 if config.ignore_hidden
777 && path
778 .file_name()
779 .and_then(|name| name.to_str())
780 .is_some_and(|name_str| name_str.starts_with('.'))
781 {
782 return true;
783 }
784
785 false
786}
787
788fn is_allowed_path_or_ancestor(path: &Path, config: &SimpleIndexerConfig) -> bool {
789 config
790 .allowed_dirs
791 .iter()
792 .any(|allowed| path.starts_with(allowed) || allowed.starts_with(path))
793}
794
795fn should_visit_entry(
796 entry: &DirEntry,
797 walk_root: &Path,
798 config: &SimpleIndexerConfig,
799 filter: &dyn TraversalFilter,
800) -> bool {
801 if entry.path() == walk_root {
802 return true;
803 }
804
805 if !entry
806 .file_type()
807 .is_some_and(|file_type| file_type.is_dir())
808 {
809 return true;
810 }
811
812 filter.should_descend(entry.path(), config)
813}
814
815#[inline]
816fn calculate_hash(content: &str) -> String {
817 vtcode_commons::utils::calculate_sha256(content.as_bytes())
818}
819
820fn write_markdown_entry(writer: &mut impl Write, entry: &FileIndex) -> std::io::Result<()> {
821 writeln!(writer, "## {}", entry.path)?;
822 writeln!(writer)?;
823 write_markdown_fields(writer, entry)?;
824 writeln!(writer)?;
825 Ok(())
826}
827
828fn write_markdown_fields(writer: &mut impl Write, entry: &FileIndex) -> std::io::Result<()> {
829 writeln!(writer, "- **Path**: {}", entry.path)?;
830 writeln!(writer, "- **Hash**: {}", entry.hash)?;
831 writeln!(writer, "- **Modified**: {}", entry.modified)?;
832 writeln!(writer, "- **Size**: {} bytes", entry.size)?;
833 writeln!(writer, "- **Language**: {}", entry.language)?;
834 writeln!(writer, "- **Tags**: {}", entry.tags.join(", "))?;
835 Ok(())
836}
837
838fn cleanup_legacy_markdown_entries(index_dir: &Path) -> Result<()> {
839 for entry in fs::read_dir(index_dir)? {
840 let entry = entry?;
841 let file_name = entry.file_name();
842 let file_name = file_name.to_string_lossy();
843 if is_legacy_markdown_entry_name(file_name.as_ref()) {
844 fs::remove_file(entry.path())?;
845 }
846 }
847 Ok(())
848}
849
850#[inline]
851fn is_legacy_markdown_entry_name(file_name: &str) -> bool {
852 let Some(hash_part) = file_name.strip_suffix(".md") else {
853 return false;
854 };
855 hash_part.len() == 64 && hash_part.bytes().all(|byte| byte.is_ascii_hexdigit())
856}
857
858#[cfg(test)]
859mod tests {
860 use super::*;
861 use std::fs;
862 use std::sync::{Arc, Mutex};
863 use tempfile::tempdir;
864
865 #[test]
866 fn skips_hidden_directories_by_default() -> Result<()> {
867 let temp = tempdir()?;
868 let workspace = temp.path();
869 let hidden_dir = workspace.join(".private");
870 fs::create_dir_all(&hidden_dir)?;
871 fs::write(hidden_dir.join("secret.txt"), "classified")?;
872
873 let visible_dir = workspace.join("src");
874 fs::create_dir_all(&visible_dir)?;
875 fs::write(visible_dir.join("lib.rs"), "fn main() {}")?;
876
877 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
878 indexer.init()?;
879 indexer.index_directory(workspace)?;
880
881 assert!(indexer.find_files("secret\\.txt$")?.is_empty());
882 assert!(!indexer.find_files("lib\\.rs$")?.is_empty());
883
884 Ok(())
885 }
886
887 #[test]
888 fn can_include_hidden_directories_when_configured() -> Result<()> {
889 let temp = tempdir()?;
890 let workspace = temp.path();
891 let hidden_dir = workspace.join(".cache");
892 fs::create_dir_all(&hidden_dir)?;
893 fs::write(hidden_dir.join("data.log"), "details")?;
894
895 let config = SimpleIndexerConfig::new(workspace.to_path_buf()).ignore_hidden(false);
896 let mut indexer = SimpleIndexer::with_config(config);
897 indexer.init()?;
898 indexer.index_directory(workspace)?;
899
900 let results = indexer.find_files("data\\.log$")?;
901 assert_eq!(results.len(), 1);
902
903 Ok(())
904 }
905
906 #[test]
907 fn indexes_allowed_directories_inside_hidden_excluded_parents() -> Result<()> {
908 let temp = tempdir()?;
909 let workspace = temp.path();
910 let allowed_dir = workspace.join(".vtcode").join("external");
911 fs::create_dir_all(&allowed_dir)?;
912 fs::write(allowed_dir.join("plugin.toml"), "name = 'demo'")?;
913
914 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
915 indexer.init()?;
916 indexer.index_directory(workspace)?;
917
918 let results = indexer.find_files("plugin\\.toml$")?;
919 assert_eq!(results.len(), 1);
920
921 Ok(())
922 }
923
924 #[test]
925 fn reindexing_prunes_deleted_files_from_cache() -> Result<()> {
926 let temp = tempdir()?;
927 let workspace = temp.path();
928 let file_path = workspace.join("notes.txt");
929 fs::write(&file_path, "remember this")?;
930
931 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
932 indexer.init()?;
933 indexer.index_directory(workspace)?;
934 assert_eq!(indexer.find_files("notes\\.txt$")?.len(), 1);
935
936 fs::remove_file(&file_path)?;
937 indexer.index_directory(workspace)?;
938
939 assert!(indexer.find_files("notes\\.txt$")?.is_empty());
940 assert!(indexer.all_files().is_empty());
941
942 Ok(())
943 }
944
945 #[test]
946 fn index_file_skips_excluded_paths() -> Result<()> {
947 let temp = tempdir()?;
948 let workspace = temp.path();
949 let index_dir = workspace.join(".vtcode").join("index");
950 fs::create_dir_all(&index_dir)?;
951 let generated_index = index_dir.join("index.md");
952 fs::write(&generated_index, "# generated")?;
953
954 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
955 indexer.init()?;
956 indexer.index_file(&generated_index)?;
957
958 assert!(indexer.all_files().is_empty());
959
960 Ok(())
961 }
962
963 #[test]
964 fn index_file_removes_stale_entry_when_file_becomes_unreadable() -> Result<()> {
965 let temp = tempdir()?;
966 let workspace = temp.path();
967 let file_path = workspace.join("notes.txt");
968 fs::write(&file_path, "remember this")?;
969
970 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
971 indexer.init()?;
972 indexer.index_file(&file_path)?;
973 assert!(
974 indexer
975 .find_files("notes\\.txt$")?
976 .iter()
977 .any(|file| file.ends_with("notes.txt"))
978 );
979
980 fs::write(&file_path, [0xFF, 0xFE, 0xFD])?;
981 indexer.index_file(&file_path)?;
982
983 assert!(indexer.find_files("notes\\.txt$")?.is_empty());
984
985 let index_content =
986 fs::read_to_string(workspace.join(".vtcode").join("index").join("index.md"))?;
987 assert!(!index_content.contains(file_path.to_string_lossy().as_ref()));
988
989 Ok(())
990 }
991
992 #[test]
993 fn index_file_maintains_markdown_snapshot_across_updates() -> Result<()> {
994 let temp = tempdir()?;
995 let workspace = temp.path();
996 let first = workspace.join("first.txt");
997 let second = workspace.join("second.txt");
998 fs::write(&first, "one")?;
999 fs::write(&second, "two")?;
1000
1001 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1002 indexer.init()?;
1003 indexer.index_file(&first)?;
1004 indexer.index_file(&second)?;
1005
1006 let index_dir = workspace.join(".vtcode").join("index");
1007 let files = fs::read_dir(&index_dir)?
1008 .filter_map(|entry| entry.ok())
1009 .map(|entry| entry.file_name().to_string_lossy().into_owned())
1010 .collect::<Vec<_>>();
1011 assert_eq!(files, vec!["index.md".to_string()]);
1012
1013 let index_content = fs::read_to_string(index_dir.join("index.md"))?;
1014 assert!(index_content.contains(first.to_string_lossy().as_ref()));
1015 assert!(index_content.contains(second.to_string_lossy().as_ref()));
1016
1017 Ok(())
1018 }
1019
1020 #[test]
1021 fn index_directory_writes_markdown_snapshot_without_manual_init() -> Result<()> {
1022 let temp = tempdir()?;
1023 let workspace = temp.path();
1024 fs::write(workspace.join("notes.txt"), "remember this")?;
1025
1026 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1027 indexer.index_directory(workspace)?;
1028
1029 let index_content =
1030 fs::read_to_string(workspace.join(".vtcode").join("index").join("index.md"))?;
1031 assert!(index_content.contains(workspace.join("notes.txt").to_string_lossy().as_ref()));
1032
1033 Ok(())
1034 }
1035
1036 #[test]
1037 fn get_file_content_clamps_ranges_without_panicking() -> Result<()> {
1038 let temp = tempdir()?;
1039 let workspace = temp.path();
1040 let file_path = workspace.join("notes.txt");
1041 fs::write(&file_path, "first\nsecond")?;
1042
1043 let indexer = SimpleIndexer::new(workspace.to_path_buf());
1044 let file_path = file_path.to_string_lossy().into_owned();
1045
1046 assert_eq!(indexer.get_file_content(&file_path, Some(5), None)?, "");
1047 assert_eq!(
1048 indexer.get_file_content(&file_path, Some(0), Some(1))?,
1049 "1: first\n"
1050 );
1051 assert_eq!(indexer.get_file_content(&file_path, Some(2), Some(1))?, "");
1052
1053 Ok(())
1054 }
1055
1056 #[test]
1057 fn supports_custom_storage_backends() -> Result<()> {
1058 #[derive(Clone, Default)]
1059 struct MemoryStorage {
1060 records: Arc<Mutex<Vec<FileIndex>>>,
1061 }
1062
1063 impl MemoryStorage {
1064 fn new(records: Arc<Mutex<Vec<FileIndex>>>) -> Self {
1065 Self { records }
1066 }
1067 }
1068
1069 impl IndexStorage for MemoryStorage {
1070 fn init(&self, _index_dir: &Path) -> Result<()> {
1071 Ok(())
1072 }
1073
1074 fn persist(&self, _index_dir: &Path, entry: &FileIndex) -> Result<()> {
1075 let mut guard = self.records.lock().expect("lock poisoned");
1076 guard.push(entry.clone());
1077 Ok(())
1078 }
1079 }
1080
1081 let temp = tempdir()?;
1082 let workspace = temp.path();
1083 fs::write(workspace.join("notes.txt"), "remember this")?;
1084
1085 let records: Arc<Mutex<Vec<FileIndex>>> = Arc::new(Mutex::new(Vec::new()));
1086 let storage = MemoryStorage::new(records.clone());
1087
1088 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1089 let mut indexer = SimpleIndexer::with_config(config).with_storage(Arc::new(storage));
1090 indexer.init()?;
1091 indexer.index_directory(workspace)?;
1092
1093 let entries = records.lock().expect("lock poisoned");
1094 assert_eq!(entries.len(), 1);
1095 assert_eq!(
1096 entries[0].path,
1097 workspace.join("notes.txt").to_string_lossy().into_owned()
1098 );
1099
1100 Ok(())
1101 }
1102
1103 #[test]
1104 fn custom_filters_can_skip_files() -> Result<()> {
1105 #[derive(Default)]
1106 struct SkipRustFilter {
1107 inner: ConfigTraversalFilter,
1108 }
1109
1110 impl TraversalFilter for SkipRustFilter {
1111 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1112 self.inner.should_descend(path, config)
1113 }
1114
1115 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1116 if path
1117 .extension()
1118 .and_then(|ext| ext.to_str())
1119 .is_some_and(|ext| ext.eq_ignore_ascii_case("rs"))
1120 {
1121 return false;
1122 }
1123
1124 self.inner.should_index_file(path, config)
1125 }
1126 }
1127
1128 let temp = tempdir()?;
1129 let workspace = temp.path();
1130 fs::write(workspace.join("lib.rs"), "fn main() {}")?;
1131 fs::write(workspace.join("README.md"), "# Notes")?;
1132
1133 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1134 let mut indexer =
1135 SimpleIndexer::with_config(config).with_filter(Arc::new(SkipRustFilter::default()));
1136 indexer.init()?;
1137 indexer.index_directory(workspace)?;
1138
1139 assert!(indexer.find_files("lib\\.rs$")?.is_empty());
1140 assert!(!indexer.find_files("README\\.md$")?.is_empty());
1141
1142 Ok(())
1143 }
1144
1145 #[test]
1146 fn custom_filters_can_skip_directories() -> Result<()> {
1147 #[derive(Default)]
1148 struct SkipGeneratedFilter {
1149 inner: ConfigTraversalFilter,
1150 }
1151
1152 impl TraversalFilter for SkipGeneratedFilter {
1153 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1154 if path.ends_with("generated") {
1155 return false;
1156 }
1157
1158 self.inner.should_descend(path, config)
1159 }
1160
1161 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1162 self.inner.should_index_file(path, config)
1163 }
1164 }
1165
1166 let temp = tempdir()?;
1167 let workspace = temp.path();
1168 let generated_dir = workspace.join("generated");
1169 fs::create_dir_all(&generated_dir)?;
1170 fs::write(generated_dir.join("skip.txt"), "ignore me")?;
1171 fs::write(workspace.join("README.md"), "# Notes")?;
1172
1173 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1174 let indexer = SimpleIndexer::with_config(config)
1175 .with_filter(Arc::new(SkipGeneratedFilter::default()));
1176 let files = indexer.discover_files(workspace);
1177
1178 assert!(!files.iter().any(|file| file.ends_with("skip.txt")));
1179 assert!(files.iter().any(|file| file.ends_with("README.md")));
1180
1181 Ok(())
1182 }
1183
1184 #[test]
1185 fn indexing_multiple_directories_preserves_existing_cache_entries() -> Result<()> {
1186 let temp = tempdir()?;
1187 let workspace = temp.path();
1188 let src_dir = workspace.join("src");
1189 let docs_dir = workspace.join("docs");
1190 fs::create_dir_all(&src_dir)?;
1191 fs::create_dir_all(&docs_dir)?;
1192 fs::write(src_dir.join("lib.rs"), "fn main() {}")?;
1193 fs::write(docs_dir.join("guide.md"), "# Guide")?;
1194
1195 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1196 indexer.init()?;
1197 indexer.index_directory(&src_dir)?;
1198 indexer.index_directory(&docs_dir)?;
1199
1200 assert!(
1201 indexer
1202 .find_files("lib\\.rs$")?
1203 .iter()
1204 .any(|file| file.ends_with("lib.rs"))
1205 );
1206 assert!(
1207 indexer
1208 .find_files("guide\\.md$")?
1209 .iter()
1210 .any(|file| file.ends_with("guide.md"))
1211 );
1212
1213 let index_content =
1214 fs::read_to_string(workspace.join(".vtcode").join("index").join("index.md"))?;
1215 assert!(index_content.contains(src_dir.join("lib.rs").to_string_lossy().as_ref()));
1216 assert!(index_content.contains(docs_dir.join("guide.md").to_string_lossy().as_ref()));
1217
1218 Ok(())
1219 }
1220
1221 #[test]
1222 fn batch_indexing_writes_single_markdown_file() -> Result<()> {
1223 let temp = tempdir()?;
1224 let workspace = temp.path();
1225 fs::write(workspace.join("lib.rs"), "fn main() {}")?;
1226 fs::write(workspace.join("README.md"), "# Notes")?;
1227
1228 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1229 indexer.init()?;
1230 indexer.index_directory(workspace)?;
1231
1232 let index_dir = workspace.join(".vtcode").join("index");
1233 let files = fs::read_dir(&index_dir)?
1234 .filter_map(|entry| entry.ok())
1235 .map(|entry| entry.file_name().to_string_lossy().into_owned())
1236 .collect::<Vec<_>>();
1237 assert_eq!(files, vec!["index.md".to_string()]);
1238
1239 let index_content = fs::read_to_string(index_dir.join("index.md"))?;
1240 assert!(index_content.contains(workspace.join("lib.rs").to_string_lossy().as_ref()));
1241 assert!(index_content.contains(workspace.join("README.md").to_string_lossy().as_ref()));
1242
1243 Ok(())
1244 }
1245
1246 #[test]
1247 fn batch_indexing_removes_legacy_hashed_entries() -> Result<()> {
1248 let temp = tempdir()?;
1249 let workspace = temp.path();
1250 fs::write(workspace.join("lib.rs"), "fn main() {}")?;
1251
1252 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1253 indexer.init()?;
1254
1255 let legacy_file_name = format!("{}.md", calculate_hash("legacy-path"));
1256 let legacy_file_path = workspace
1257 .join(".vtcode")
1258 .join("index")
1259 .join(&legacy_file_name);
1260 fs::write(&legacy_file_path, "# legacy")?;
1261 assert!(legacy_file_path.exists());
1262
1263 indexer.index_directory(workspace)?;
1264
1265 assert!(!legacy_file_path.exists());
1266 let files = fs::read_dir(workspace.join(".vtcode").join("index"))?
1267 .filter_map(|entry| entry.ok())
1268 .map(|entry| entry.file_name().to_string_lossy().into_owned())
1269 .collect::<Vec<_>>();
1270 assert_eq!(files, vec!["index.md".to_string()]);
1271
1272 Ok(())
1273 }
1274}