1pub mod file_search;
9pub mod markdown_store;
10
11use anyhow::Result;
12use hashbrown::HashMap;
13use ignore::{DirEntry, Walk};
14use regex::Regex;
15use serde::{Deserialize, Serialize};
16use std::fmt::Write as FmtWrite;
17use std::fs;
18use std::io::{BufWriter, ErrorKind, Write};
19use std::path::{Path, PathBuf};
20use std::sync::Arc;
21use std::time::SystemTime;
22
23pub trait IndexStorage: Send + Sync {
25 fn init(&self, index_dir: &Path) -> Result<()>;
27
28 fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()>;
30
31 fn prefers_snapshot_persistence(&self) -> bool {
37 false
38 }
39
40 fn remove(&self, _index_dir: &Path, _file_path: &Path) -> Result<()> {
44 Ok(())
45 }
46
47 fn persist_batch(&self, index_dir: &Path, entries: &[FileIndex]) -> Result<()> {
52 for entry in entries {
53 self.persist(index_dir, entry)?;
54 }
55 Ok(())
56 }
57
58 fn persist_batch_refs(&self, index_dir: &Path, entries: &[&FileIndex]) -> Result<()> {
64 let owned = entries
65 .iter()
66 .map(|entry| (*entry).clone())
67 .collect::<Vec<_>>();
68 self.persist_batch(index_dir, &owned)
69 }
70}
71
72pub trait TraversalFilter: Send + Sync {
74 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
76
77 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
79}
80
81#[derive(Debug, Default, Clone)]
83pub struct MarkdownIndexStorage;
84
85impl IndexStorage for MarkdownIndexStorage {
86 fn init(&self, index_dir: &Path) -> Result<()> {
87 fs::create_dir_all(index_dir)?;
88 Ok(())
89 }
90
91 fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()> {
92 fs::create_dir_all(index_dir)?;
93 let file_name = format!("{}.md", calculate_hash(&entry.path));
94 let index_path = index_dir.join(file_name);
95 let file = fs::File::create(index_path)?;
96 let mut writer = BufWriter::new(file);
97 writeln!(writer, "# File Index: {}", entry.path)?;
98 writeln!(writer)?;
99 write_markdown_fields(&mut writer, entry)?;
100 writer.flush()?;
101 Ok(())
102 }
103
104 fn prefers_snapshot_persistence(&self) -> bool {
105 true
106 }
107
108 fn remove(&self, index_dir: &Path, file_path: &Path) -> Result<()> {
109 let file_name = format!(
110 "{}.md",
111 calculate_hash(file_path.to_string_lossy().as_ref())
112 );
113 let index_path = index_dir.join(file_name);
114 match fs::remove_file(index_path) {
115 Ok(()) => Ok(()),
116 Err(err) if err.kind() == ErrorKind::NotFound => Ok(()),
117 Err(err) => Err(err.into()),
118 }
119 }
120
121 fn persist_batch(&self, index_dir: &Path, entries: &[FileIndex]) -> Result<()> {
122 persist_markdown_snapshot(index_dir, entries.iter())
123 }
124
125 fn persist_batch_refs(&self, index_dir: &Path, entries: &[&FileIndex]) -> Result<()> {
126 persist_markdown_snapshot(index_dir, entries.iter().copied())
127 }
128}
129
130fn persist_markdown_snapshot<'a>(
131 index_dir: &Path,
132 entries: impl IntoIterator<Item = &'a FileIndex>,
133) -> Result<()> {
134 let entries = entries.into_iter().collect::<Vec<_>>();
135
136 fs::create_dir_all(index_dir)?;
137 let temp_path = index_dir.join(".index.md.tmp");
138 let final_path = index_dir.join("index.md");
139 let file = fs::File::create(&temp_path)?;
140 let mut writer = BufWriter::new(file);
141
142 writeln!(writer, "# Workspace File Index")?;
143 writeln!(writer)?;
144 writeln!(writer, "- **Entries**: {}", entries.len())?;
145 writeln!(writer)?;
146
147 for entry in entries {
148 write_markdown_entry(&mut writer, entry)?;
149 }
150
151 writer.flush()?;
152 fs::rename(temp_path, final_path)?;
153 cleanup_legacy_markdown_entries(index_dir)?;
154 Ok(())
155}
156
157#[derive(Debug, Default, Clone)]
159pub struct ConfigTraversalFilter;
160
161impl TraversalFilter for ConfigTraversalFilter {
162 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
163 !should_skip_dir(path, config)
164 }
165
166 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
167 if !path.is_file() {
168 return false;
169 }
170
171 if config.ignore_hidden
173 && path
174 .file_name()
175 .and_then(|n| n.to_str())
176 .is_some_and(|s| s.starts_with('.'))
177 {
178 return false;
179 }
180
181 if let Some(file_name) = path.file_name().and_then(|n| n.to_str())
183 && (vtcode_commons::exclusions::is_sensitive_file(file_name)
184 || file_name == ".gitignore"
185 || file_name == ".git")
186 {
187 return false;
188 }
189
190 true
191 }
192}
193
194#[derive(Clone, Debug)]
196pub struct SimpleIndexerConfig {
197 workspace_root: PathBuf,
198 index_dir: PathBuf,
199 ignore_hidden: bool,
200 excluded_dirs: Vec<PathBuf>,
201 allowed_dirs: Vec<PathBuf>,
202}
203
204impl SimpleIndexerConfig {
205 pub fn new(workspace_root: PathBuf) -> Self {
207 let index_dir = workspace_root.join(".vtcode").join("index");
208 let vtcode_dir = workspace_root.join(".vtcode");
209 let external_dir = vtcode_dir.join("external");
210
211 let mut excluded_dirs: Vec<PathBuf> = vtcode_commons::exclusions::DEFAULT_EXCLUDED_DIRS
212 .iter()
213 .map(|name| workspace_root.join(name))
214 .collect();
215 excluded_dirs.push(index_dir.clone());
216 excluded_dirs.push(vtcode_dir);
217
218 excluded_dirs.dedup();
219
220 Self {
221 workspace_root,
222 index_dir,
223 ignore_hidden: true,
224 excluded_dirs,
225 allowed_dirs: vec![external_dir],
226 }
227 }
228
229 pub fn with_index_dir(mut self, index_dir: impl Into<PathBuf>) -> Self {
231 let index_dir = index_dir.into();
232 self.index_dir = index_dir.clone();
233 self.push_unique_excluded(index_dir);
234 self
235 }
236
237 pub fn add_allowed_dir(mut self, path: impl Into<PathBuf>) -> Self {
239 let path = path.into();
240 if !self.allowed_dirs.iter().any(|existing| existing == &path) {
241 self.allowed_dirs.push(path);
242 }
243 self
244 }
245
246 pub fn add_excluded_dir(mut self, path: impl Into<PathBuf>) -> Self {
248 let path = path.into();
249 self.push_unique_excluded(path);
250 self
251 }
252
253 pub fn ignore_hidden(mut self, ignore_hidden: bool) -> Self {
255 self.ignore_hidden = ignore_hidden;
256 self
257 }
258
259 pub fn workspace_root(&self) -> &Path {
261 &self.workspace_root
262 }
263
264 pub fn index_dir(&self) -> &Path {
266 &self.index_dir
267 }
268
269 fn push_unique_excluded(&mut self, path: PathBuf) {
270 if !self.excluded_dirs.iter().any(|existing| existing == &path) {
271 self.excluded_dirs.push(path);
272 }
273 }
274}
275
276#[derive(Debug, Clone, Serialize, Deserialize)]
278pub struct FileIndex {
279 pub path: String,
281 pub hash: String,
283 pub modified: u64,
285 pub size: u64,
287 pub language: String,
289 pub tags: Vec<String>,
291}
292
293#[derive(Debug, Clone, Serialize, Deserialize)]
295pub struct SearchResult {
296 pub file_path: String,
297 pub line_number: usize,
298 pub line_content: String,
299 pub matches: Vec<String>,
300}
301
302pub struct SimpleIndexer {
304 config: SimpleIndexerConfig,
305 index_cache: HashMap<String, FileIndex>,
306 storage: Arc<dyn IndexStorage>,
307 filter: Arc<dyn TraversalFilter>,
308}
309
310impl SimpleIndexer {
311 pub fn new(workspace_root: PathBuf) -> Self {
313 Self::with_components(
314 SimpleIndexerConfig::new(workspace_root),
315 Arc::new(MarkdownIndexStorage),
316 Arc::new(ConfigTraversalFilter),
317 )
318 }
319
320 pub fn with_config(config: SimpleIndexerConfig) -> Self {
322 Self::with_components(
323 config,
324 Arc::new(MarkdownIndexStorage),
325 Arc::new(ConfigTraversalFilter),
326 )
327 }
328
329 pub fn with_index_dir(workspace_root: PathBuf, index_dir: PathBuf) -> Self {
331 let config = SimpleIndexerConfig::new(workspace_root).with_index_dir(index_dir);
332 Self::with_config(config)
333 }
334
335 pub fn with_components(
337 config: SimpleIndexerConfig,
338 storage: Arc<dyn IndexStorage>,
339 filter: Arc<dyn TraversalFilter>,
340 ) -> Self {
341 Self {
342 config,
343 index_cache: HashMap::new(),
344 storage,
345 filter,
346 }
347 }
348
349 pub fn with_storage(self, storage: Arc<dyn IndexStorage>) -> Self {
351 Self { storage, ..self }
352 }
353
354 pub fn with_filter(self, filter: Arc<dyn TraversalFilter>) -> Self {
356 Self { filter, ..self }
357 }
358
359 pub fn init(&self) -> Result<()> {
361 self.storage.init(self.config.index_dir())
362 }
363
364 pub fn workspace_root(&self) -> &Path {
366 self.config.workspace_root()
367 }
368
369 pub fn index_dir(&self) -> &Path {
371 self.config.index_dir()
372 }
373
374 pub fn index_file(&mut self, file_path: &Path) -> Result<()> {
376 let cache_key = file_path.to_string_lossy().into_owned();
377
378 if self.storage.prefers_snapshot_persistence() {
379 let next_entry = if file_path.exists() && self.should_process_file_path(file_path) {
380 self.build_file_index(file_path)?
381 } else {
382 None
383 };
384
385 self.apply_snapshot_file_update(cache_key, next_entry)?;
386 return Ok(());
387 }
388
389 if !file_path.exists() || !self.should_process_file_path(file_path) {
390 self.index_cache.remove(cache_key.as_str());
391 self.storage.remove(self.config.index_dir(), file_path)?;
392 return Ok(());
393 }
394
395 if let Some(index) = self.build_file_index(file_path)? {
396 self.storage.persist(self.config.index_dir(), &index)?;
397 self.index_cache.insert(index.path.clone(), index);
398 } else {
399 self.index_cache.remove(cache_key.as_str());
400 self.storage.remove(self.config.index_dir(), file_path)?;
401 }
402
403 Ok(())
404 }
405
406 pub fn index_directory(&mut self, dir_path: &Path) -> Result<()> {
410 let walker = self.build_walker(dir_path);
411
412 let mut entries = Vec::new();
413
414 for entry in walker.filter_map(|e| e.ok()) {
415 let path = entry.path();
416
417 if entry.file_type().is_some_and(|ft| ft.is_file())
419 && let Some(index) = self.build_file_index(path)?
420 {
421 entries.push(index);
422 }
423 }
424
425 if self.storage.prefers_snapshot_persistence() {
426 self.apply_snapshot_directory_update(dir_path, &entries)?;
427 } else {
428 entries.sort_unstable_by(|left, right| left.path.cmp(&right.path));
429 self.storage
430 .persist_batch(self.config.index_dir(), &entries)?;
431 }
432
433 self.replace_cached_entries(dir_path, &entries);
434
435 Ok(())
436 }
437
438 pub fn discover_files(&self, dir_path: &Path) -> Vec<String> {
441 let walker = self.build_walker(dir_path);
442
443 let mut files = walker
444 .filter_map(|e| e.ok())
445 .filter(|e| {
446 if !e.file_type().is_some_and(|ft| ft.is_file()) {
447 return false;
448 }
449
450 self.should_process_file_path(e.path())
451 })
452 .map(|e| e.path().to_string_lossy().into_owned())
453 .collect::<Vec<_>>();
454 files.sort_unstable();
455 files
456 }
457
458 fn search_files_internal(
461 &self,
462 regex: &Regex,
463 path_filter: Option<&str>,
464 extract_matches: bool,
465 ) -> Vec<SearchResult> {
466 let mut results = Vec::with_capacity(self.index_cache.len());
467
468 for file_path in self.index_cache.keys() {
469 if path_filter.is_some_and(|filter| !file_path.contains(filter)) {
470 continue;
471 }
472
473 if let Ok(content) = fs::read_to_string(file_path) {
474 for (line_num, line) in content.lines().enumerate() {
475 if regex.is_match(line) {
476 let matches = if extract_matches {
477 regex
478 .find_iter(line)
479 .map(|m| m.as_str().to_string())
480 .collect()
481 } else {
482 vec![line.to_string()]
483 };
484
485 results.push(SearchResult {
486 file_path: file_path.clone(),
487 line_number: line_num + 1,
488 line_content: line.to_string(),
489 matches,
490 });
491 }
492 }
493 }
494 }
495
496 results.sort_unstable_by(|left, right| {
497 left.file_path
498 .cmp(&right.file_path)
499 .then_with(|| left.line_number.cmp(&right.line_number))
500 });
501 results
502 }
503
504 pub fn search(&self, pattern: &str, path_filter: Option<&str>) -> Result<Vec<SearchResult>> {
506 let regex = Regex::new(pattern)?;
507 Ok(self.search_files_internal(®ex, path_filter, true))
508 }
509
510 pub fn find_files(&self, pattern: &str) -> Result<Vec<String>> {
512 let regex = Regex::new(pattern)?;
513 let mut results = Vec::with_capacity(self.index_cache.len());
514
515 for file_path in self.index_cache.keys() {
516 if regex.is_match(file_path) {
517 results.push(file_path.clone());
518 }
519 }
520
521 results.sort_unstable();
522 Ok(results)
523 }
524
525 pub fn all_files(&self) -> Vec<String> {
528 let mut files = self.index_cache.keys().cloned().collect::<Vec<_>>();
529 files.sort_unstable();
530 files
531 }
532
533 pub fn get_file_content(
535 &self,
536 file_path: &str,
537 start_line: Option<usize>,
538 end_line: Option<usize>,
539 ) -> Result<String> {
540 let content = fs::read_to_string(file_path)?;
541 let start = start_line.unwrap_or(1).max(1);
542 let end = end_line.unwrap_or(usize::MAX);
543
544 if start > end {
545 return Ok(String::new());
546 }
547
548 let mut result = String::new();
549 for (line_number, line) in content.lines().enumerate() {
550 let line_number = line_number + 1;
551 if line_number < start {
552 continue;
553 }
554 if line_number > end {
555 break;
556 }
557 writeln!(&mut result, "{line_number}: {line}")?;
558 }
559
560 Ok(result)
561 }
562
563 pub fn list_files(&self, dir_path: &str, show_hidden: bool) -> Result<Vec<String>> {
565 let path = Path::new(dir_path);
566 if !path.exists() {
567 return Ok(vec![]);
568 }
569
570 let mut files = Vec::new();
571
572 for entry in fs::read_dir(path)? {
573 let entry = entry?;
574 let file_name = entry.file_name().to_string_lossy().into_owned();
575
576 if !show_hidden && file_name.starts_with('.') {
577 continue;
578 }
579
580 files.push(file_name);
581 }
582
583 files.sort_unstable();
584 Ok(files)
585 }
586
587 pub fn grep(&self, pattern: &str, file_pattern: Option<&str>) -> Result<Vec<SearchResult>> {
589 let regex = Regex::new(pattern)?;
590 Ok(self.search_files_internal(®ex, file_pattern, false))
591 }
592
593 fn is_allowed_path(&self, path: &Path) -> bool {
594 self.config
595 .allowed_dirs
596 .iter()
597 .any(|allowed| path.starts_with(allowed))
598 }
599
600 #[inline]
601 fn get_modified_time(&self, file_path: &Path) -> Result<u64> {
602 let metadata = fs::metadata(file_path)?;
603 let modified = metadata.modified()?;
604 Ok(modified.duration_since(SystemTime::UNIX_EPOCH)?.as_secs())
605 }
606
607 #[inline]
608 fn detect_language(&self, file_path: &Path) -> String {
609 file_path
610 .extension()
611 .and_then(|ext| ext.to_str())
612 .unwrap_or("unknown")
613 .to_string()
614 }
615
616 fn build_file_index(&self, file_path: &Path) -> Result<Option<FileIndex>> {
617 if !self.should_process_file_path(file_path) {
618 return Ok(None);
619 }
620
621 let content = match fs::read_to_string(file_path) {
622 Ok(text) => text,
623 Err(err) => {
624 if err.kind() == ErrorKind::InvalidData {
625 return Ok(None);
626 }
627 return Err(err.into());
628 }
629 };
630
631 let index = FileIndex {
632 path: file_path.to_string_lossy().into_owned(),
633 hash: calculate_hash(&content),
634 modified: self.get_modified_time(file_path)?,
635 size: content.len() as u64,
636 language: self.detect_language(file_path),
637 tags: vec![],
638 };
639
640 Ok(Some(index))
641 }
642
643 #[inline]
644 fn is_excluded_path(&self, path: &Path) -> bool {
645 self.config
646 .excluded_dirs
647 .iter()
648 .any(|excluded| path.starts_with(excluded))
649 }
650
651 #[inline]
652 fn should_index_file_path(&self, path: &Path) -> bool {
653 self.filter.should_index_file(path, &self.config)
654 }
655
656 #[inline]
657 fn should_process_file_path(&self, path: &Path) -> bool {
658 if self.is_allowed_path(path) {
659 return self.should_index_file_path(path);
660 }
661
662 !self.is_excluded_path(path) && self.should_index_file_path(path)
663 }
664
665 fn build_walker(&self, dir_path: &Path) -> Walk {
666 let walk_root = dir_path.to_path_buf();
667 let config = self.config.clone();
668 let filter = Arc::clone(&self.filter);
669
670 let mut builder = vtcode_commons::walk::build_default_walker(dir_path);
671 builder.filter_entry(move |entry| {
672 should_visit_entry(entry, walk_root.as_path(), &config, filter.as_ref())
673 });
674 builder.build()
675 }
676
677 fn replace_cached_entries(&mut self, dir_path: &Path, entries: &[FileIndex]) {
678 self.index_cache
679 .retain(|path, _| !Path::new(path).starts_with(dir_path));
680
681 self.index_cache.extend(
682 entries
683 .iter()
684 .cloned()
685 .map(|entry| (entry.path.clone(), entry)),
686 );
687 }
688
689 fn apply_snapshot_file_update(
690 &mut self,
691 cache_key: String,
692 next_entry: Option<FileIndex>,
693 ) -> Result<()> {
694 let previous_entry = match next_entry {
695 Some(entry) => self.index_cache.insert(cache_key.clone(), entry),
696 None => self.index_cache.remove(cache_key.as_str()),
697 };
698
699 if let Err(err) = self.persist_current_snapshot() {
700 match previous_entry {
701 Some(entry) => {
702 self.index_cache.insert(cache_key, entry);
703 }
704 None => {
705 self.index_cache.remove(cache_key.as_str());
706 }
707 }
708 return Err(err);
709 }
710
711 Ok(())
712 }
713
714 fn apply_snapshot_directory_update(
715 &mut self,
716 dir_path: &Path,
717 entries: &[FileIndex],
718 ) -> Result<()> {
719 let previous_entries = self.take_cached_entries(dir_path);
720 self.index_cache.extend(
721 entries
722 .iter()
723 .cloned()
724 .map(|entry| (entry.path.clone(), entry)),
725 );
726
727 if let Err(err) = self.persist_current_snapshot() {
728 self.index_cache
729 .retain(|path, _| !Path::new(path).starts_with(dir_path));
730 self.index_cache.extend(
731 previous_entries
732 .into_iter()
733 .map(|entry| (entry.path.clone(), entry)),
734 );
735 return Err(err);
736 }
737
738 Ok(())
739 }
740
741 fn take_cached_entries(&mut self, dir_path: &Path) -> Vec<FileIndex> {
742 let keys = self
743 .index_cache
744 .keys()
745 .filter(|path| Path::new(path).starts_with(dir_path))
746 .cloned()
747 .collect::<Vec<_>>();
748
749 keys.into_iter()
750 .filter_map(|path| self.index_cache.remove(path.as_str()))
751 .collect()
752 }
753
754 fn persist_current_snapshot(&self) -> Result<()> {
755 let mut snapshot = self.index_cache.values().collect::<Vec<_>>();
756 snapshot.sort_unstable_by(|left, right| left.path.cmp(&right.path));
757 self.storage
758 .persist_batch_refs(self.config.index_dir(), &snapshot)
759 }
760}
761
762impl Clone for SimpleIndexer {
763 fn clone(&self) -> Self {
764 Self {
765 config: self.config.clone(),
766 index_cache: self.index_cache.clone(),
767 storage: self.storage.clone(),
768 filter: self.filter.clone(),
769 }
770 }
771}
772
773fn should_skip_dir(path: &Path, config: &SimpleIndexerConfig) -> bool {
774 if is_allowed_path_or_ancestor(path, config) {
775 return false;
776 }
777
778 if config
779 .excluded_dirs
780 .iter()
781 .any(|excluded| path.starts_with(excluded))
782 {
783 return true;
784 }
785
786 if config.ignore_hidden
787 && path
788 .file_name()
789 .and_then(|name| name.to_str())
790 .is_some_and(|name_str| name_str.starts_with('.'))
791 {
792 return true;
793 }
794
795 false
796}
797
798fn is_allowed_path_or_ancestor(path: &Path, config: &SimpleIndexerConfig) -> bool {
799 config
800 .allowed_dirs
801 .iter()
802 .any(|allowed| path.starts_with(allowed) || allowed.starts_with(path))
803}
804
805fn should_visit_entry(
806 entry: &DirEntry,
807 walk_root: &Path,
808 config: &SimpleIndexerConfig,
809 filter: &dyn TraversalFilter,
810) -> bool {
811 if entry.path() == walk_root {
812 return true;
813 }
814
815 if !entry
816 .file_type()
817 .is_some_and(|file_type| file_type.is_dir())
818 {
819 return true;
820 }
821
822 filter.should_descend(entry.path(), config)
823}
824
825#[inline]
826fn calculate_hash(content: &str) -> String {
827 vtcode_commons::utils::calculate_sha256(content.as_bytes())
828}
829
830fn write_markdown_entry(writer: &mut impl Write, entry: &FileIndex) -> std::io::Result<()> {
831 writeln!(writer, "## {}", entry.path)?;
832 writeln!(writer)?;
833 write_markdown_fields(writer, entry)?;
834 writeln!(writer)?;
835 Ok(())
836}
837
838fn write_markdown_fields(writer: &mut impl Write, entry: &FileIndex) -> std::io::Result<()> {
839 writeln!(writer, "- **Path**: {}", entry.path)?;
840 writeln!(writer, "- **Hash**: {}", entry.hash)?;
841 writeln!(writer, "- **Modified**: {}", entry.modified)?;
842 writeln!(writer, "- **Size**: {} bytes", entry.size)?;
843 writeln!(writer, "- **Language**: {}", entry.language)?;
844 writeln!(writer, "- **Tags**: {}", entry.tags.join(", "))?;
845 Ok(())
846}
847
848fn cleanup_legacy_markdown_entries(index_dir: &Path) -> Result<()> {
849 for entry in fs::read_dir(index_dir)? {
850 let entry = entry?;
851 let file_name = entry.file_name();
852 let file_name = file_name.to_string_lossy();
853 if is_legacy_markdown_entry_name(file_name.as_ref()) {
854 fs::remove_file(entry.path())?;
855 }
856 }
857 Ok(())
858}
859
860#[inline]
861fn is_legacy_markdown_entry_name(file_name: &str) -> bool {
862 let Some(hash_part) = file_name.strip_suffix(".md") else {
863 return false;
864 };
865 hash_part.len() == 64 && hash_part.bytes().all(|byte| byte.is_ascii_hexdigit())
866}
867
868#[cfg(test)]
869mod tests {
870 use super::*;
871 use std::fs;
872 use std::sync::{Arc, Mutex};
873 use tempfile::tempdir;
874
875 #[test]
876 fn skips_hidden_directories_by_default() -> Result<()> {
877 let temp = tempdir()?;
878 let workspace = temp.path();
879 let hidden_dir = workspace.join(".private");
880 fs::create_dir_all(&hidden_dir)?;
881 fs::write(hidden_dir.join("secret.txt"), "classified")?;
882
883 let visible_dir = workspace.join("src");
884 fs::create_dir_all(&visible_dir)?;
885 fs::write(visible_dir.join("lib.rs"), "fn main() {}")?;
886
887 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
888 indexer.init()?;
889 indexer.index_directory(workspace)?;
890
891 assert!(indexer.find_files("secret\\.txt$")?.is_empty());
892 assert!(!indexer.find_files("lib\\.rs$")?.is_empty());
893
894 Ok(())
895 }
896
897 #[test]
898 fn can_include_hidden_directories_when_configured() -> Result<()> {
899 let temp = tempdir()?;
900 let workspace = temp.path();
901 let hidden_dir = workspace.join(".cache");
902 fs::create_dir_all(&hidden_dir)?;
903 fs::write(hidden_dir.join("data.log"), "details")?;
904
905 let config = SimpleIndexerConfig::new(workspace.to_path_buf()).ignore_hidden(false);
906 let mut indexer = SimpleIndexer::with_config(config);
907 indexer.init()?;
908 indexer.index_directory(workspace)?;
909
910 let results = indexer.find_files("data\\.log$")?;
911 assert_eq!(results.len(), 1);
912
913 Ok(())
914 }
915
916 #[test]
917 fn indexes_allowed_directories_inside_hidden_excluded_parents() -> Result<()> {
918 let temp = tempdir()?;
919 let workspace = temp.path();
920 let allowed_dir = workspace.join(".vtcode").join("external");
921 fs::create_dir_all(&allowed_dir)?;
922 fs::write(allowed_dir.join("plugin.toml"), "name = 'demo'")?;
923
924 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
925 indexer.init()?;
926 indexer.index_directory(workspace)?;
927
928 let results = indexer.find_files("plugin\\.toml$")?;
929 assert_eq!(results.len(), 1);
930
931 Ok(())
932 }
933
934 #[test]
935 fn reindexing_prunes_deleted_files_from_cache() -> Result<()> {
936 let temp = tempdir()?;
937 let workspace = temp.path();
938 let file_path = workspace.join("notes.txt");
939 fs::write(&file_path, "remember this")?;
940
941 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
942 indexer.init()?;
943 indexer.index_directory(workspace)?;
944 assert_eq!(indexer.find_files("notes\\.txt$")?.len(), 1);
945
946 fs::remove_file(&file_path)?;
947 indexer.index_directory(workspace)?;
948
949 assert!(indexer.find_files("notes\\.txt$")?.is_empty());
950 assert!(indexer.all_files().is_empty());
951
952 Ok(())
953 }
954
955 #[test]
956 fn index_file_skips_excluded_paths() -> Result<()> {
957 let temp = tempdir()?;
958 let workspace = temp.path();
959 let index_dir = workspace.join(".vtcode").join("index");
960 fs::create_dir_all(&index_dir)?;
961 let generated_index = index_dir.join("index.md");
962 fs::write(&generated_index, "# generated")?;
963
964 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
965 indexer.init()?;
966 indexer.index_file(&generated_index)?;
967
968 assert!(indexer.all_files().is_empty());
969
970 Ok(())
971 }
972
973 #[test]
974 fn index_file_removes_stale_entry_when_file_becomes_unreadable() -> Result<()> {
975 let temp = tempdir()?;
976 let workspace = temp.path();
977 let file_path = workspace.join("notes.txt");
978 fs::write(&file_path, "remember this")?;
979
980 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
981 indexer.init()?;
982 indexer.index_file(&file_path)?;
983 assert!(
984 indexer
985 .find_files("notes\\.txt$")?
986 .iter()
987 .any(|file| file.ends_with("notes.txt"))
988 );
989
990 fs::write(&file_path, [0xFF, 0xFE, 0xFD])?;
991 indexer.index_file(&file_path)?;
992
993 assert!(indexer.find_files("notes\\.txt$")?.is_empty());
994
995 let index_content =
996 fs::read_to_string(workspace.join(".vtcode").join("index").join("index.md"))?;
997 assert!(!index_content.contains(file_path.to_string_lossy().as_ref()));
998
999 Ok(())
1000 }
1001
1002 #[test]
1003 fn index_file_maintains_markdown_snapshot_across_updates() -> Result<()> {
1004 let temp = tempdir()?;
1005 let workspace = temp.path();
1006 let first = workspace.join("first.txt");
1007 let second = workspace.join("second.txt");
1008 fs::write(&first, "one")?;
1009 fs::write(&second, "two")?;
1010
1011 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1012 indexer.init()?;
1013 indexer.index_file(&first)?;
1014 indexer.index_file(&second)?;
1015
1016 let index_dir = workspace.join(".vtcode").join("index");
1017 let files = fs::read_dir(&index_dir)?
1018 .filter_map(|entry| entry.ok())
1019 .map(|entry| entry.file_name().to_string_lossy().into_owned())
1020 .collect::<Vec<_>>();
1021 assert_eq!(files, vec!["index.md".to_string()]);
1022
1023 let index_content = fs::read_to_string(index_dir.join("index.md"))?;
1024 assert!(index_content.contains(first.to_string_lossy().as_ref()));
1025 assert!(index_content.contains(second.to_string_lossy().as_ref()));
1026
1027 Ok(())
1028 }
1029
1030 #[test]
1031 fn index_directory_writes_markdown_snapshot_without_manual_init() -> Result<()> {
1032 let temp = tempdir()?;
1033 let workspace = temp.path();
1034 fs::write(workspace.join("notes.txt"), "remember this")?;
1035
1036 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1037 indexer.index_directory(workspace)?;
1038
1039 let index_content =
1040 fs::read_to_string(workspace.join(".vtcode").join("index").join("index.md"))?;
1041 assert!(index_content.contains(workspace.join("notes.txt").to_string_lossy().as_ref()));
1042
1043 Ok(())
1044 }
1045
1046 #[test]
1047 fn get_file_content_clamps_ranges_without_panicking() -> Result<()> {
1048 let temp = tempdir()?;
1049 let workspace = temp.path();
1050 let file_path = workspace.join("notes.txt");
1051 fs::write(&file_path, "first\nsecond")?;
1052
1053 let indexer = SimpleIndexer::new(workspace.to_path_buf());
1054 let file_path = file_path.to_string_lossy().into_owned();
1055
1056 assert_eq!(indexer.get_file_content(&file_path, Some(5), None)?, "");
1057 assert_eq!(
1058 indexer.get_file_content(&file_path, Some(0), Some(1))?,
1059 "1: first\n"
1060 );
1061 assert_eq!(indexer.get_file_content(&file_path, Some(2), Some(1))?, "");
1062
1063 Ok(())
1064 }
1065
1066 #[test]
1067 fn supports_custom_storage_backends() -> Result<()> {
1068 #[derive(Clone, Default)]
1069 struct MemoryStorage {
1070 records: Arc<Mutex<Vec<FileIndex>>>,
1071 }
1072
1073 impl MemoryStorage {
1074 fn new(records: Arc<Mutex<Vec<FileIndex>>>) -> Self {
1075 Self { records }
1076 }
1077 }
1078
1079 impl IndexStorage for MemoryStorage {
1080 fn init(&self, _index_dir: &Path) -> Result<()> {
1081 Ok(())
1082 }
1083
1084 fn persist(&self, _index_dir: &Path, entry: &FileIndex) -> Result<()> {
1085 let mut guard = self.records.lock().expect("lock poisoned");
1086 guard.push(entry.clone());
1087 Ok(())
1088 }
1089 }
1090
1091 let temp = tempdir()?;
1092 let workspace = temp.path();
1093 fs::write(workspace.join("notes.txt"), "remember this")?;
1094
1095 let records: Arc<Mutex<Vec<FileIndex>>> = Arc::new(Mutex::new(Vec::new()));
1096 let storage = MemoryStorage::new(records.clone());
1097
1098 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1099 let mut indexer = SimpleIndexer::with_config(config).with_storage(Arc::new(storage));
1100 indexer.init()?;
1101 indexer.index_directory(workspace)?;
1102
1103 let entries = records.lock().expect("lock poisoned");
1104 assert_eq!(entries.len(), 1);
1105 assert_eq!(
1106 entries[0].path,
1107 workspace.join("notes.txt").to_string_lossy().into_owned()
1108 );
1109
1110 Ok(())
1111 }
1112
1113 #[test]
1114 fn custom_filters_can_skip_files() -> Result<()> {
1115 #[derive(Default)]
1116 struct SkipRustFilter {
1117 inner: ConfigTraversalFilter,
1118 }
1119
1120 impl TraversalFilter for SkipRustFilter {
1121 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1122 self.inner.should_descend(path, config)
1123 }
1124
1125 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1126 if path
1127 .extension()
1128 .and_then(|ext| ext.to_str())
1129 .is_some_and(|ext| ext.eq_ignore_ascii_case("rs"))
1130 {
1131 return false;
1132 }
1133
1134 self.inner.should_index_file(path, config)
1135 }
1136 }
1137
1138 let temp = tempdir()?;
1139 let workspace = temp.path();
1140 fs::write(workspace.join("lib.rs"), "fn main() {}")?;
1141 fs::write(workspace.join("README.md"), "# Notes")?;
1142
1143 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1144 let mut indexer =
1145 SimpleIndexer::with_config(config).with_filter(Arc::new(SkipRustFilter::default()));
1146 indexer.init()?;
1147 indexer.index_directory(workspace)?;
1148
1149 assert!(indexer.find_files("lib\\.rs$")?.is_empty());
1150 assert!(!indexer.find_files("README\\.md$")?.is_empty());
1151
1152 Ok(())
1153 }
1154
1155 #[test]
1156 fn custom_filters_can_skip_directories() -> Result<()> {
1157 #[derive(Default)]
1158 struct SkipGeneratedFilter {
1159 inner: ConfigTraversalFilter,
1160 }
1161
1162 impl TraversalFilter for SkipGeneratedFilter {
1163 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1164 if path.ends_with("generated") {
1165 return false;
1166 }
1167
1168 self.inner.should_descend(path, config)
1169 }
1170
1171 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1172 self.inner.should_index_file(path, config)
1173 }
1174 }
1175
1176 let temp = tempdir()?;
1177 let workspace = temp.path();
1178 let generated_dir = workspace.join("generated");
1179 fs::create_dir_all(&generated_dir)?;
1180 fs::write(generated_dir.join("skip.txt"), "ignore me")?;
1181 fs::write(workspace.join("README.md"), "# Notes")?;
1182
1183 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1184 let indexer = SimpleIndexer::with_config(config)
1185 .with_filter(Arc::new(SkipGeneratedFilter::default()));
1186 let files = indexer.discover_files(workspace);
1187
1188 assert!(!files.iter().any(|file| file.ends_with("skip.txt")));
1189 assert!(files.iter().any(|file| file.ends_with("README.md")));
1190
1191 Ok(())
1192 }
1193
1194 #[test]
1195 fn indexing_multiple_directories_preserves_existing_cache_entries() -> Result<()> {
1196 let temp = tempdir()?;
1197 let workspace = temp.path();
1198 let src_dir = workspace.join("src");
1199 let docs_dir = workspace.join("docs");
1200 fs::create_dir_all(&src_dir)?;
1201 fs::create_dir_all(&docs_dir)?;
1202 fs::write(src_dir.join("lib.rs"), "fn main() {}")?;
1203 fs::write(docs_dir.join("guide.md"), "# Guide")?;
1204
1205 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1206 indexer.init()?;
1207 indexer.index_directory(&src_dir)?;
1208 indexer.index_directory(&docs_dir)?;
1209
1210 assert!(
1211 indexer
1212 .find_files("lib\\.rs$")?
1213 .iter()
1214 .any(|file| file.ends_with("lib.rs"))
1215 );
1216 assert!(
1217 indexer
1218 .find_files("guide\\.md$")?
1219 .iter()
1220 .any(|file| file.ends_with("guide.md"))
1221 );
1222
1223 let index_content =
1224 fs::read_to_string(workspace.join(".vtcode").join("index").join("index.md"))?;
1225 assert!(index_content.contains(src_dir.join("lib.rs").to_string_lossy().as_ref()));
1226 assert!(index_content.contains(docs_dir.join("guide.md").to_string_lossy().as_ref()));
1227
1228 Ok(())
1229 }
1230
1231 #[test]
1232 fn batch_indexing_writes_single_markdown_file() -> Result<()> {
1233 let temp = tempdir()?;
1234 let workspace = temp.path();
1235 fs::write(workspace.join("lib.rs"), "fn main() {}")?;
1236 fs::write(workspace.join("README.md"), "# Notes")?;
1237
1238 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1239 indexer.init()?;
1240 indexer.index_directory(workspace)?;
1241
1242 let index_dir = workspace.join(".vtcode").join("index");
1243 let files = fs::read_dir(&index_dir)?
1244 .filter_map(|entry| entry.ok())
1245 .map(|entry| entry.file_name().to_string_lossy().into_owned())
1246 .collect::<Vec<_>>();
1247 assert_eq!(files, vec!["index.md".to_string()]);
1248
1249 let index_content = fs::read_to_string(index_dir.join("index.md"))?;
1250 assert!(index_content.contains(workspace.join("lib.rs").to_string_lossy().as_ref()));
1251 assert!(index_content.contains(workspace.join("README.md").to_string_lossy().as_ref()));
1252
1253 Ok(())
1254 }
1255
1256 #[test]
1257 fn batch_indexing_removes_legacy_hashed_entries() -> Result<()> {
1258 let temp = tempdir()?;
1259 let workspace = temp.path();
1260 fs::write(workspace.join("lib.rs"), "fn main() {}")?;
1261
1262 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1263 indexer.init()?;
1264
1265 let legacy_file_name = format!("{}.md", calculate_hash("legacy-path"));
1266 let legacy_file_path = workspace
1267 .join(".vtcode")
1268 .join("index")
1269 .join(&legacy_file_name);
1270 fs::write(&legacy_file_path, "# legacy")?;
1271 assert!(legacy_file_path.exists());
1272
1273 indexer.index_directory(workspace)?;
1274
1275 assert!(!legacy_file_path.exists());
1276 let files = fs::read_dir(workspace.join(".vtcode").join("index"))?
1277 .filter_map(|entry| entry.ok())
1278 .map(|entry| entry.file_name().to_string_lossy().into_owned())
1279 .collect::<Vec<_>>();
1280 assert_eq!(files, vec!["index.md".to_string()]);
1281
1282 Ok(())
1283 }
1284
1285 #[test]
1286 fn snapshot_storage_uses_default_ref_batch_persistence() -> Result<()> {
1287 #[derive(Clone, Default)]
1288 struct SnapshotMemoryStorage {
1289 snapshots: Arc<Mutex<Vec<Vec<FileIndex>>>>,
1290 }
1291
1292 impl SnapshotMemoryStorage {
1293 fn new(snapshots: Arc<Mutex<Vec<Vec<FileIndex>>>>) -> Self {
1294 Self { snapshots }
1295 }
1296 }
1297
1298 impl IndexStorage for SnapshotMemoryStorage {
1299 fn init(&self, _index_dir: &Path) -> Result<()> {
1300 Ok(())
1301 }
1302
1303 fn persist(&self, _index_dir: &Path, _entry: &FileIndex) -> Result<()> {
1304 Ok(())
1305 }
1306
1307 fn prefers_snapshot_persistence(&self) -> bool {
1308 true
1309 }
1310
1311 fn persist_batch(&self, _index_dir: &Path, entries: &[FileIndex]) -> Result<()> {
1312 self.snapshots
1313 .lock()
1314 .expect("lock poisoned")
1315 .push(entries.to_vec());
1316 Ok(())
1317 }
1318 }
1319
1320 let temp = tempdir()?;
1321 let workspace = temp.path();
1322 let file_path = workspace.join("notes.txt");
1323 fs::write(&file_path, "remember this")?;
1324
1325 let snapshots = Arc::new(Mutex::new(Vec::new()));
1326 let storage = SnapshotMemoryStorage::new(snapshots.clone());
1327
1328 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1329 let mut indexer = SimpleIndexer::with_config(config).with_storage(Arc::new(storage));
1330 indexer.index_file(&file_path)?;
1331
1332 let snapshots = snapshots.lock().expect("lock poisoned");
1333 assert_eq!(snapshots.len(), 1);
1334 assert_eq!(snapshots[0].len(), 1);
1335 assert_eq!(
1336 snapshots[0][0].path,
1337 workspace.join("notes.txt").to_string_lossy().into_owned()
1338 );
1339
1340 Ok(())
1341 }
1342
1343 #[test]
1344 fn snapshot_index_file_rolls_back_cache_when_persist_fails() -> Result<()> {
1345 #[derive(Clone, Default)]
1346 struct FlakySnapshotStorage {
1347 persist_count: Arc<Mutex<usize>>,
1348 }
1349
1350 impl IndexStorage for FlakySnapshotStorage {
1351 fn init(&self, _index_dir: &Path) -> Result<()> {
1352 Ok(())
1353 }
1354
1355 fn persist(&self, _index_dir: &Path, _entry: &FileIndex) -> Result<()> {
1356 Ok(())
1357 }
1358
1359 fn prefers_snapshot_persistence(&self) -> bool {
1360 true
1361 }
1362
1363 fn persist_batch(&self, _index_dir: &Path, _entries: &[FileIndex]) -> Result<()> {
1364 let mut count = self.persist_count.lock().expect("lock poisoned");
1365 *count += 1;
1366 if *count == 2 {
1367 anyhow::bail!("simulated snapshot persistence failure");
1368 }
1369 Ok(())
1370 }
1371 }
1372
1373 let temp = tempdir()?;
1374 let workspace = temp.path();
1375 let first = workspace.join("first.txt");
1376 let second = workspace.join("second.txt");
1377 fs::write(&first, "one")?;
1378 fs::write(&second, "two")?;
1379
1380 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1381 let storage = Arc::new(FlakySnapshotStorage::default());
1382 let mut indexer = SimpleIndexer::with_config(config).with_storage(storage);
1383
1384 indexer.index_file(&first)?;
1385 assert!(
1386 indexer
1387 .find_files("first\\.txt$")?
1388 .iter()
1389 .any(|path| path.ends_with("first.txt"))
1390 );
1391
1392 let err = indexer
1393 .index_file(&second)
1394 .expect_err("second persist should fail");
1395 assert!(
1396 err.to_string()
1397 .contains("simulated snapshot persistence failure")
1398 );
1399 assert!(
1400 indexer
1401 .find_files("first\\.txt$")?
1402 .iter()
1403 .any(|path| path.ends_with("first.txt"))
1404 );
1405 assert!(indexer.find_files("second\\.txt$")?.is_empty());
1406
1407 Ok(())
1408 }
1409}