1use anyhow::Result;
9use hashbrown::HashMap;
10use ignore::{DirEntry, Walk, WalkBuilder};
11use regex::Regex;
12use serde::{Deserialize, Serialize};
13use std::fmt::Write as FmtWrite;
14use std::fs;
15use std::io::{BufWriter, ErrorKind, Write};
16use std::path::{Path, PathBuf};
17use std::sync::Arc;
18use std::time::SystemTime;
19
20pub trait IndexStorage: Send + Sync {
22 fn init(&self, index_dir: &Path) -> Result<()>;
24
25 fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()>;
27
28 fn prefers_snapshot_persistence(&self) -> bool {
34 false
35 }
36
37 fn remove(&self, _index_dir: &Path, _file_path: &Path) -> Result<()> {
41 Ok(())
42 }
43
44 fn persist_batch(&self, index_dir: &Path, entries: &[FileIndex]) -> Result<()> {
49 for entry in entries {
50 self.persist(index_dir, entry)?;
51 }
52 Ok(())
53 }
54
55 fn persist_batch_refs(&self, index_dir: &Path, entries: &[&FileIndex]) -> Result<()> {
61 let owned = entries
62 .iter()
63 .map(|entry| (*entry).clone())
64 .collect::<Vec<_>>();
65 self.persist_batch(index_dir, &owned)
66 }
67}
68
69pub trait TraversalFilter: Send + Sync {
71 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
73
74 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
76}
77
78#[derive(Debug, Default, Clone)]
80pub struct MarkdownIndexStorage;
81
82impl IndexStorage for MarkdownIndexStorage {
83 fn init(&self, index_dir: &Path) -> Result<()> {
84 fs::create_dir_all(index_dir)?;
85 Ok(())
86 }
87
88 fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()> {
89 fs::create_dir_all(index_dir)?;
90 let file_name = format!("{}.md", calculate_hash(&entry.path));
91 let index_path = index_dir.join(file_name);
92 let file = fs::File::create(index_path)?;
93 let mut writer = BufWriter::new(file);
94 writeln!(writer, "# File Index: {}", entry.path)?;
95 writeln!(writer)?;
96 write_markdown_fields(&mut writer, entry)?;
97 writer.flush()?;
98 Ok(())
99 }
100
101 fn prefers_snapshot_persistence(&self) -> bool {
102 true
103 }
104
105 fn remove(&self, index_dir: &Path, file_path: &Path) -> Result<()> {
106 let file_name = format!(
107 "{}.md",
108 calculate_hash(file_path.to_string_lossy().as_ref())
109 );
110 let index_path = index_dir.join(file_name);
111 match fs::remove_file(index_path) {
112 Ok(()) => Ok(()),
113 Err(err) if err.kind() == ErrorKind::NotFound => Ok(()),
114 Err(err) => Err(err.into()),
115 }
116 }
117
118 fn persist_batch(&self, index_dir: &Path, entries: &[FileIndex]) -> Result<()> {
119 persist_markdown_snapshot(index_dir, entries.iter())
120 }
121
122 fn persist_batch_refs(&self, index_dir: &Path, entries: &[&FileIndex]) -> Result<()> {
123 persist_markdown_snapshot(index_dir, entries.iter().copied())
124 }
125}
126
127fn persist_markdown_snapshot<'a>(
128 index_dir: &Path,
129 entries: impl IntoIterator<Item = &'a FileIndex>,
130) -> Result<()> {
131 let entries = entries.into_iter().collect::<Vec<_>>();
132
133 fs::create_dir_all(index_dir)?;
134 let temp_path = index_dir.join(".index.md.tmp");
135 let final_path = index_dir.join("index.md");
136 let file = fs::File::create(&temp_path)?;
137 let mut writer = BufWriter::new(file);
138
139 writeln!(writer, "# Workspace File Index")?;
140 writeln!(writer)?;
141 writeln!(writer, "- **Entries**: {}", entries.len())?;
142 writeln!(writer)?;
143
144 for entry in entries {
145 write_markdown_entry(&mut writer, entry)?;
146 }
147
148 writer.flush()?;
149 fs::rename(temp_path, final_path)?;
150 cleanup_legacy_markdown_entries(index_dir)?;
151 Ok(())
152}
153
154#[derive(Debug, Default, Clone)]
156pub struct ConfigTraversalFilter;
157
158impl TraversalFilter for ConfigTraversalFilter {
159 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
160 !should_skip_dir(path, config)
161 }
162
163 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
164 if !path.is_file() {
165 return false;
166 }
167
168 if config.ignore_hidden
170 && path
171 .file_name()
172 .and_then(|n| n.to_str())
173 .is_some_and(|s| s.starts_with('.'))
174 {
175 return false;
176 }
177
178 if let Some(file_name) = path.file_name().and_then(|n| n.to_str())
180 && (vtcode_commons::exclusions::is_sensitive_file(file_name)
181 || file_name == ".gitignore"
182 || file_name == ".git")
183 {
184 return false;
185 }
186
187 true
188 }
189}
190
191#[derive(Clone, Debug)]
193pub struct SimpleIndexerConfig {
194 workspace_root: PathBuf,
195 index_dir: PathBuf,
196 ignore_hidden: bool,
197 excluded_dirs: Vec<PathBuf>,
198 allowed_dirs: Vec<PathBuf>,
199}
200
201impl SimpleIndexerConfig {
202 pub fn new(workspace_root: PathBuf) -> Self {
204 let index_dir = workspace_root.join(".vtcode").join("index");
205 let vtcode_dir = workspace_root.join(".vtcode");
206 let external_dir = vtcode_dir.join("external");
207
208 let mut excluded_dirs: Vec<PathBuf> = vtcode_commons::exclusions::DEFAULT_EXCLUDED_DIRS
209 .iter()
210 .map(|name| workspace_root.join(name))
211 .collect();
212 excluded_dirs.push(index_dir.clone());
213 excluded_dirs.push(vtcode_dir);
214
215 excluded_dirs.dedup();
216
217 Self {
218 workspace_root,
219 index_dir,
220 ignore_hidden: true,
221 excluded_dirs,
222 allowed_dirs: vec![external_dir],
223 }
224 }
225
226 pub fn with_index_dir(mut self, index_dir: impl Into<PathBuf>) -> Self {
228 let index_dir = index_dir.into();
229 self.index_dir = index_dir.clone();
230 self.push_unique_excluded(index_dir);
231 self
232 }
233
234 pub fn add_allowed_dir(mut self, path: impl Into<PathBuf>) -> Self {
236 let path = path.into();
237 if !self.allowed_dirs.iter().any(|existing| existing == &path) {
238 self.allowed_dirs.push(path);
239 }
240 self
241 }
242
243 pub fn add_excluded_dir(mut self, path: impl Into<PathBuf>) -> Self {
245 let path = path.into();
246 self.push_unique_excluded(path);
247 self
248 }
249
250 pub fn ignore_hidden(mut self, ignore_hidden: bool) -> Self {
252 self.ignore_hidden = ignore_hidden;
253 self
254 }
255
256 pub fn workspace_root(&self) -> &Path {
258 &self.workspace_root
259 }
260
261 pub fn index_dir(&self) -> &Path {
263 &self.index_dir
264 }
265
266 fn push_unique_excluded(&mut self, path: PathBuf) {
267 if !self.excluded_dirs.iter().any(|existing| existing == &path) {
268 self.excluded_dirs.push(path);
269 }
270 }
271}
272
273#[derive(Debug, Clone, Serialize, Deserialize)]
275pub struct FileIndex {
276 pub path: String,
278 pub hash: String,
280 pub modified: u64,
282 pub size: u64,
284 pub language: String,
286 pub tags: Vec<String>,
288}
289
290#[derive(Debug, Clone, Serialize, Deserialize)]
292pub struct SearchResult {
293 pub file_path: String,
294 pub line_number: usize,
295 pub line_content: String,
296 pub matches: Vec<String>,
297}
298
299pub struct SimpleIndexer {
301 config: SimpleIndexerConfig,
302 index_cache: HashMap<String, FileIndex>,
303 storage: Arc<dyn IndexStorage>,
304 filter: Arc<dyn TraversalFilter>,
305}
306
307impl SimpleIndexer {
308 pub fn new(workspace_root: PathBuf) -> Self {
310 Self::with_components(
311 SimpleIndexerConfig::new(workspace_root),
312 Arc::new(MarkdownIndexStorage),
313 Arc::new(ConfigTraversalFilter),
314 )
315 }
316
317 pub fn with_config(config: SimpleIndexerConfig) -> Self {
319 Self::with_components(
320 config,
321 Arc::new(MarkdownIndexStorage),
322 Arc::new(ConfigTraversalFilter),
323 )
324 }
325
326 pub fn with_index_dir(workspace_root: PathBuf, index_dir: PathBuf) -> Self {
328 let config = SimpleIndexerConfig::new(workspace_root).with_index_dir(index_dir);
329 Self::with_config(config)
330 }
331
332 pub fn with_components(
334 config: SimpleIndexerConfig,
335 storage: Arc<dyn IndexStorage>,
336 filter: Arc<dyn TraversalFilter>,
337 ) -> Self {
338 Self {
339 config,
340 index_cache: HashMap::new(),
341 storage,
342 filter,
343 }
344 }
345
346 pub fn with_storage(self, storage: Arc<dyn IndexStorage>) -> Self {
348 Self { storage, ..self }
349 }
350
351 pub fn with_filter(self, filter: Arc<dyn TraversalFilter>) -> Self {
353 Self { filter, ..self }
354 }
355
356 pub fn init(&self) -> Result<()> {
358 self.storage.init(self.config.index_dir())
359 }
360
361 pub fn workspace_root(&self) -> &Path {
363 self.config.workspace_root()
364 }
365
366 pub fn index_dir(&self) -> &Path {
368 self.config.index_dir()
369 }
370
371 pub fn index_file(&mut self, file_path: &Path) -> Result<()> {
373 let cache_key = file_path.to_string_lossy().into_owned();
374
375 if self.storage.prefers_snapshot_persistence() {
376 let next_entry = if file_path.exists() && self.should_process_file_path(file_path) {
377 self.build_file_index(file_path)?
378 } else {
379 None
380 };
381
382 self.apply_snapshot_file_update(cache_key, next_entry)?;
383 return Ok(());
384 }
385
386 if !file_path.exists() || !self.should_process_file_path(file_path) {
387 self.index_cache.remove(cache_key.as_str());
388 self.storage.remove(self.config.index_dir(), file_path)?;
389 return Ok(());
390 }
391
392 if let Some(index) = self.build_file_index(file_path)? {
393 self.storage.persist(self.config.index_dir(), &index)?;
394 self.index_cache.insert(index.path.clone(), index);
395 } else {
396 self.index_cache.remove(cache_key.as_str());
397 self.storage.remove(self.config.index_dir(), file_path)?;
398 }
399
400 Ok(())
401 }
402
403 pub fn index_directory(&mut self, dir_path: &Path) -> Result<()> {
407 let walker = self.build_walker(dir_path);
408
409 let mut entries = Vec::new();
410
411 for entry in walker.filter_map(|e| e.ok()) {
412 let path = entry.path();
413
414 if entry.file_type().is_some_and(|ft| ft.is_file())
416 && let Some(index) = self.build_file_index(path)?
417 {
418 entries.push(index);
419 }
420 }
421
422 if self.storage.prefers_snapshot_persistence() {
423 self.apply_snapshot_directory_update(dir_path, &entries)?;
424 } else {
425 entries.sort_unstable_by(|left, right| left.path.cmp(&right.path));
426 self.storage
427 .persist_batch(self.config.index_dir(), &entries)?;
428 }
429
430 self.replace_cached_entries(dir_path, &entries);
431
432 Ok(())
433 }
434
435 pub fn discover_files(&self, dir_path: &Path) -> Vec<String> {
438 let walker = self.build_walker(dir_path);
439
440 let mut files = walker
441 .filter_map(|e| e.ok())
442 .filter(|e| {
443 if !e.file_type().is_some_and(|ft| ft.is_file()) {
444 return false;
445 }
446
447 self.should_process_file_path(e.path())
448 })
449 .map(|e| e.path().to_string_lossy().into_owned())
450 .collect::<Vec<_>>();
451 files.sort_unstable();
452 files
453 }
454
455 fn search_files_internal(
458 &self,
459 regex: &Regex,
460 path_filter: Option<&str>,
461 extract_matches: bool,
462 ) -> Vec<SearchResult> {
463 let mut results = Vec::with_capacity(self.index_cache.len());
464
465 for file_path in self.index_cache.keys() {
466 if path_filter.is_some_and(|filter| !file_path.contains(filter)) {
467 continue;
468 }
469
470 if let Ok(content) = fs::read_to_string(file_path) {
471 for (line_num, line) in content.lines().enumerate() {
472 if regex.is_match(line) {
473 let matches = if extract_matches {
474 regex
475 .find_iter(line)
476 .map(|m| m.as_str().to_string())
477 .collect()
478 } else {
479 vec![line.to_string()]
480 };
481
482 results.push(SearchResult {
483 file_path: file_path.clone(),
484 line_number: line_num + 1,
485 line_content: line.to_string(),
486 matches,
487 });
488 }
489 }
490 }
491 }
492
493 results.sort_unstable_by(|left, right| {
494 left.file_path
495 .cmp(&right.file_path)
496 .then_with(|| left.line_number.cmp(&right.line_number))
497 });
498 results
499 }
500
501 pub fn search(&self, pattern: &str, path_filter: Option<&str>) -> Result<Vec<SearchResult>> {
503 let regex = Regex::new(pattern)?;
504 Ok(self.search_files_internal(®ex, path_filter, true))
505 }
506
507 pub fn find_files(&self, pattern: &str) -> Result<Vec<String>> {
509 let regex = Regex::new(pattern)?;
510 let mut results = Vec::with_capacity(self.index_cache.len());
511
512 for file_path in self.index_cache.keys() {
513 if regex.is_match(file_path) {
514 results.push(file_path.clone());
515 }
516 }
517
518 results.sort_unstable();
519 Ok(results)
520 }
521
522 pub fn all_files(&self) -> Vec<String> {
525 let mut files = self.index_cache.keys().cloned().collect::<Vec<_>>();
526 files.sort_unstable();
527 files
528 }
529
530 pub fn get_file_content(
532 &self,
533 file_path: &str,
534 start_line: Option<usize>,
535 end_line: Option<usize>,
536 ) -> Result<String> {
537 let content = fs::read_to_string(file_path)?;
538 let start = start_line.unwrap_or(1).max(1);
539 let end = end_line.unwrap_or(usize::MAX);
540
541 if start > end {
542 return Ok(String::new());
543 }
544
545 let mut result = String::new();
546 for (line_number, line) in content.lines().enumerate() {
547 let line_number = line_number + 1;
548 if line_number < start {
549 continue;
550 }
551 if line_number > end {
552 break;
553 }
554 writeln!(&mut result, "{line_number}: {line}")?;
555 }
556
557 Ok(result)
558 }
559
560 pub fn list_files(&self, dir_path: &str, show_hidden: bool) -> Result<Vec<String>> {
562 let path = Path::new(dir_path);
563 if !path.exists() {
564 return Ok(vec![]);
565 }
566
567 let mut files = Vec::new();
568
569 for entry in fs::read_dir(path)? {
570 let entry = entry?;
571 let file_name = entry.file_name().to_string_lossy().into_owned();
572
573 if !show_hidden && file_name.starts_with('.') {
574 continue;
575 }
576
577 files.push(file_name);
578 }
579
580 files.sort_unstable();
581 Ok(files)
582 }
583
584 pub fn grep(&self, pattern: &str, file_pattern: Option<&str>) -> Result<Vec<SearchResult>> {
586 let regex = Regex::new(pattern)?;
587 Ok(self.search_files_internal(®ex, file_pattern, false))
588 }
589
590 fn is_allowed_path(&self, path: &Path) -> bool {
591 self.config
592 .allowed_dirs
593 .iter()
594 .any(|allowed| path.starts_with(allowed))
595 }
596
597 #[inline]
598 fn get_modified_time(&self, file_path: &Path) -> Result<u64> {
599 let metadata = fs::metadata(file_path)?;
600 let modified = metadata.modified()?;
601 Ok(modified.duration_since(SystemTime::UNIX_EPOCH)?.as_secs())
602 }
603
604 #[inline]
605 fn detect_language(&self, file_path: &Path) -> String {
606 file_path
607 .extension()
608 .and_then(|ext| ext.to_str())
609 .unwrap_or("unknown")
610 .to_string()
611 }
612
613 fn build_file_index(&self, file_path: &Path) -> Result<Option<FileIndex>> {
614 if !self.should_process_file_path(file_path) {
615 return Ok(None);
616 }
617
618 let content = match fs::read_to_string(file_path) {
619 Ok(text) => text,
620 Err(err) => {
621 if err.kind() == ErrorKind::InvalidData {
622 return Ok(None);
623 }
624 return Err(err.into());
625 }
626 };
627
628 let index = FileIndex {
629 path: file_path.to_string_lossy().into_owned(),
630 hash: calculate_hash(&content),
631 modified: self.get_modified_time(file_path)?,
632 size: content.len() as u64,
633 language: self.detect_language(file_path),
634 tags: vec![],
635 };
636
637 Ok(Some(index))
638 }
639
640 #[inline]
641 fn is_excluded_path(&self, path: &Path) -> bool {
642 self.config
643 .excluded_dirs
644 .iter()
645 .any(|excluded| path.starts_with(excluded))
646 }
647
648 #[inline]
649 fn should_index_file_path(&self, path: &Path) -> bool {
650 self.filter.should_index_file(path, &self.config)
651 }
652
653 #[inline]
654 fn should_process_file_path(&self, path: &Path) -> bool {
655 if self.is_allowed_path(path) {
656 return self.should_index_file_path(path);
657 }
658
659 !self.is_excluded_path(path) && self.should_index_file_path(path)
660 }
661
662 fn build_walker(&self, dir_path: &Path) -> Walk {
663 let walk_root = dir_path.to_path_buf();
664 let config = self.config.clone();
665 let filter = Arc::clone(&self.filter);
666
667 let mut builder = WalkBuilder::new(dir_path);
668 builder
669 .hidden(false)
670 .git_ignore(true)
671 .git_global(true)
672 .git_exclude(true)
673 .ignore(true)
674 .parents(true);
675 builder.filter_entry(move |entry| {
676 should_visit_entry(entry, walk_root.as_path(), &config, filter.as_ref())
677 });
678 builder.build()
679 }
680
681 fn replace_cached_entries(&mut self, dir_path: &Path, entries: &[FileIndex]) {
682 self.index_cache
683 .retain(|path, _| !Path::new(path).starts_with(dir_path));
684
685 self.index_cache.extend(
686 entries
687 .iter()
688 .cloned()
689 .map(|entry| (entry.path.clone(), entry)),
690 );
691 }
692
693 fn apply_snapshot_file_update(
694 &mut self,
695 cache_key: String,
696 next_entry: Option<FileIndex>,
697 ) -> Result<()> {
698 let previous_entry = match next_entry {
699 Some(entry) => self.index_cache.insert(cache_key.clone(), entry),
700 None => self.index_cache.remove(cache_key.as_str()),
701 };
702
703 if let Err(err) = self.persist_current_snapshot() {
704 match previous_entry {
705 Some(entry) => {
706 self.index_cache.insert(cache_key, entry);
707 }
708 None => {
709 self.index_cache.remove(cache_key.as_str());
710 }
711 }
712 return Err(err);
713 }
714
715 Ok(())
716 }
717
718 fn apply_snapshot_directory_update(
719 &mut self,
720 dir_path: &Path,
721 entries: &[FileIndex],
722 ) -> Result<()> {
723 let previous_entries = self.take_cached_entries(dir_path);
724 self.index_cache.extend(
725 entries
726 .iter()
727 .cloned()
728 .map(|entry| (entry.path.clone(), entry)),
729 );
730
731 if let Err(err) = self.persist_current_snapshot() {
732 self.index_cache
733 .retain(|path, _| !Path::new(path).starts_with(dir_path));
734 self.index_cache.extend(
735 previous_entries
736 .into_iter()
737 .map(|entry| (entry.path.clone(), entry)),
738 );
739 return Err(err);
740 }
741
742 Ok(())
743 }
744
745 fn take_cached_entries(&mut self, dir_path: &Path) -> Vec<FileIndex> {
746 let keys = self
747 .index_cache
748 .keys()
749 .filter(|path| Path::new(path).starts_with(dir_path))
750 .cloned()
751 .collect::<Vec<_>>();
752
753 keys.into_iter()
754 .filter_map(|path| self.index_cache.remove(path.as_str()))
755 .collect()
756 }
757
758 fn persist_current_snapshot(&self) -> Result<()> {
759 let mut snapshot = self.index_cache.values().collect::<Vec<_>>();
760 snapshot.sort_unstable_by(|left, right| left.path.cmp(&right.path));
761 self.storage
762 .persist_batch_refs(self.config.index_dir(), &snapshot)
763 }
764}
765
766impl Clone for SimpleIndexer {
767 fn clone(&self) -> Self {
768 Self {
769 config: self.config.clone(),
770 index_cache: self.index_cache.clone(),
771 storage: self.storage.clone(),
772 filter: self.filter.clone(),
773 }
774 }
775}
776
777fn should_skip_dir(path: &Path, config: &SimpleIndexerConfig) -> bool {
778 if is_allowed_path_or_ancestor(path, config) {
779 return false;
780 }
781
782 if config
783 .excluded_dirs
784 .iter()
785 .any(|excluded| path.starts_with(excluded))
786 {
787 return true;
788 }
789
790 if config.ignore_hidden
791 && path
792 .file_name()
793 .and_then(|name| name.to_str())
794 .is_some_and(|name_str| name_str.starts_with('.'))
795 {
796 return true;
797 }
798
799 false
800}
801
802fn is_allowed_path_or_ancestor(path: &Path, config: &SimpleIndexerConfig) -> bool {
803 config
804 .allowed_dirs
805 .iter()
806 .any(|allowed| path.starts_with(allowed) || allowed.starts_with(path))
807}
808
809fn should_visit_entry(
810 entry: &DirEntry,
811 walk_root: &Path,
812 config: &SimpleIndexerConfig,
813 filter: &dyn TraversalFilter,
814) -> bool {
815 if entry.path() == walk_root {
816 return true;
817 }
818
819 if !entry
820 .file_type()
821 .is_some_and(|file_type| file_type.is_dir())
822 {
823 return true;
824 }
825
826 filter.should_descend(entry.path(), config)
827}
828
829#[inline]
830fn calculate_hash(content: &str) -> String {
831 vtcode_commons::utils::calculate_sha256(content.as_bytes())
832}
833
834fn write_markdown_entry(writer: &mut impl Write, entry: &FileIndex) -> std::io::Result<()> {
835 writeln!(writer, "## {}", entry.path)?;
836 writeln!(writer)?;
837 write_markdown_fields(writer, entry)?;
838 writeln!(writer)?;
839 Ok(())
840}
841
842fn write_markdown_fields(writer: &mut impl Write, entry: &FileIndex) -> std::io::Result<()> {
843 writeln!(writer, "- **Path**: {}", entry.path)?;
844 writeln!(writer, "- **Hash**: {}", entry.hash)?;
845 writeln!(writer, "- **Modified**: {}", entry.modified)?;
846 writeln!(writer, "- **Size**: {} bytes", entry.size)?;
847 writeln!(writer, "- **Language**: {}", entry.language)?;
848 writeln!(writer, "- **Tags**: {}", entry.tags.join(", "))?;
849 Ok(())
850}
851
852fn cleanup_legacy_markdown_entries(index_dir: &Path) -> Result<()> {
853 for entry in fs::read_dir(index_dir)? {
854 let entry = entry?;
855 let file_name = entry.file_name();
856 let file_name = file_name.to_string_lossy();
857 if is_legacy_markdown_entry_name(file_name.as_ref()) {
858 fs::remove_file(entry.path())?;
859 }
860 }
861 Ok(())
862}
863
864#[inline]
865fn is_legacy_markdown_entry_name(file_name: &str) -> bool {
866 let Some(hash_part) = file_name.strip_suffix(".md") else {
867 return false;
868 };
869 hash_part.len() == 64 && hash_part.bytes().all(|byte| byte.is_ascii_hexdigit())
870}
871
872#[cfg(test)]
873mod tests {
874 use super::*;
875 use std::fs;
876 use std::sync::{Arc, Mutex};
877 use tempfile::tempdir;
878
879 #[test]
880 fn skips_hidden_directories_by_default() -> Result<()> {
881 let temp = tempdir()?;
882 let workspace = temp.path();
883 let hidden_dir = workspace.join(".private");
884 fs::create_dir_all(&hidden_dir)?;
885 fs::write(hidden_dir.join("secret.txt"), "classified")?;
886
887 let visible_dir = workspace.join("src");
888 fs::create_dir_all(&visible_dir)?;
889 fs::write(visible_dir.join("lib.rs"), "fn main() {}")?;
890
891 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
892 indexer.init()?;
893 indexer.index_directory(workspace)?;
894
895 assert!(indexer.find_files("secret\\.txt$")?.is_empty());
896 assert!(!indexer.find_files("lib\\.rs$")?.is_empty());
897
898 Ok(())
899 }
900
901 #[test]
902 fn can_include_hidden_directories_when_configured() -> Result<()> {
903 let temp = tempdir()?;
904 let workspace = temp.path();
905 let hidden_dir = workspace.join(".cache");
906 fs::create_dir_all(&hidden_dir)?;
907 fs::write(hidden_dir.join("data.log"), "details")?;
908
909 let config = SimpleIndexerConfig::new(workspace.to_path_buf()).ignore_hidden(false);
910 let mut indexer = SimpleIndexer::with_config(config);
911 indexer.init()?;
912 indexer.index_directory(workspace)?;
913
914 let results = indexer.find_files("data\\.log$")?;
915 assert_eq!(results.len(), 1);
916
917 Ok(())
918 }
919
920 #[test]
921 fn indexes_allowed_directories_inside_hidden_excluded_parents() -> Result<()> {
922 let temp = tempdir()?;
923 let workspace = temp.path();
924 let allowed_dir = workspace.join(".vtcode").join("external");
925 fs::create_dir_all(&allowed_dir)?;
926 fs::write(allowed_dir.join("plugin.toml"), "name = 'demo'")?;
927
928 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
929 indexer.init()?;
930 indexer.index_directory(workspace)?;
931
932 let results = indexer.find_files("plugin\\.toml$")?;
933 assert_eq!(results.len(), 1);
934
935 Ok(())
936 }
937
938 #[test]
939 fn reindexing_prunes_deleted_files_from_cache() -> Result<()> {
940 let temp = tempdir()?;
941 let workspace = temp.path();
942 let file_path = workspace.join("notes.txt");
943 fs::write(&file_path, "remember this")?;
944
945 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
946 indexer.init()?;
947 indexer.index_directory(workspace)?;
948 assert_eq!(indexer.find_files("notes\\.txt$")?.len(), 1);
949
950 fs::remove_file(&file_path)?;
951 indexer.index_directory(workspace)?;
952
953 assert!(indexer.find_files("notes\\.txt$")?.is_empty());
954 assert!(indexer.all_files().is_empty());
955
956 Ok(())
957 }
958
959 #[test]
960 fn index_file_skips_excluded_paths() -> Result<()> {
961 let temp = tempdir()?;
962 let workspace = temp.path();
963 let index_dir = workspace.join(".vtcode").join("index");
964 fs::create_dir_all(&index_dir)?;
965 let generated_index = index_dir.join("index.md");
966 fs::write(&generated_index, "# generated")?;
967
968 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
969 indexer.init()?;
970 indexer.index_file(&generated_index)?;
971
972 assert!(indexer.all_files().is_empty());
973
974 Ok(())
975 }
976
977 #[test]
978 fn index_file_removes_stale_entry_when_file_becomes_unreadable() -> Result<()> {
979 let temp = tempdir()?;
980 let workspace = temp.path();
981 let file_path = workspace.join("notes.txt");
982 fs::write(&file_path, "remember this")?;
983
984 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
985 indexer.init()?;
986 indexer.index_file(&file_path)?;
987 assert!(
988 indexer
989 .find_files("notes\\.txt$")?
990 .iter()
991 .any(|file| file.ends_with("notes.txt"))
992 );
993
994 fs::write(&file_path, [0xFF, 0xFE, 0xFD])?;
995 indexer.index_file(&file_path)?;
996
997 assert!(indexer.find_files("notes\\.txt$")?.is_empty());
998
999 let index_content =
1000 fs::read_to_string(workspace.join(".vtcode").join("index").join("index.md"))?;
1001 assert!(!index_content.contains(file_path.to_string_lossy().as_ref()));
1002
1003 Ok(())
1004 }
1005
1006 #[test]
1007 fn index_file_maintains_markdown_snapshot_across_updates() -> Result<()> {
1008 let temp = tempdir()?;
1009 let workspace = temp.path();
1010 let first = workspace.join("first.txt");
1011 let second = workspace.join("second.txt");
1012 fs::write(&first, "one")?;
1013 fs::write(&second, "two")?;
1014
1015 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1016 indexer.init()?;
1017 indexer.index_file(&first)?;
1018 indexer.index_file(&second)?;
1019
1020 let index_dir = workspace.join(".vtcode").join("index");
1021 let files = fs::read_dir(&index_dir)?
1022 .filter_map(|entry| entry.ok())
1023 .map(|entry| entry.file_name().to_string_lossy().into_owned())
1024 .collect::<Vec<_>>();
1025 assert_eq!(files, vec!["index.md".to_string()]);
1026
1027 let index_content = fs::read_to_string(index_dir.join("index.md"))?;
1028 assert!(index_content.contains(first.to_string_lossy().as_ref()));
1029 assert!(index_content.contains(second.to_string_lossy().as_ref()));
1030
1031 Ok(())
1032 }
1033
1034 #[test]
1035 fn index_directory_writes_markdown_snapshot_without_manual_init() -> Result<()> {
1036 let temp = tempdir()?;
1037 let workspace = temp.path();
1038 fs::write(workspace.join("notes.txt"), "remember this")?;
1039
1040 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1041 indexer.index_directory(workspace)?;
1042
1043 let index_content =
1044 fs::read_to_string(workspace.join(".vtcode").join("index").join("index.md"))?;
1045 assert!(index_content.contains(workspace.join("notes.txt").to_string_lossy().as_ref()));
1046
1047 Ok(())
1048 }
1049
1050 #[test]
1051 fn get_file_content_clamps_ranges_without_panicking() -> Result<()> {
1052 let temp = tempdir()?;
1053 let workspace = temp.path();
1054 let file_path = workspace.join("notes.txt");
1055 fs::write(&file_path, "first\nsecond")?;
1056
1057 let indexer = SimpleIndexer::new(workspace.to_path_buf());
1058 let file_path = file_path.to_string_lossy().into_owned();
1059
1060 assert_eq!(indexer.get_file_content(&file_path, Some(5), None)?, "");
1061 assert_eq!(
1062 indexer.get_file_content(&file_path, Some(0), Some(1))?,
1063 "1: first\n"
1064 );
1065 assert_eq!(indexer.get_file_content(&file_path, Some(2), Some(1))?, "");
1066
1067 Ok(())
1068 }
1069
1070 #[test]
1071 fn supports_custom_storage_backends() -> Result<()> {
1072 #[derive(Clone, Default)]
1073 struct MemoryStorage {
1074 records: Arc<Mutex<Vec<FileIndex>>>,
1075 }
1076
1077 impl MemoryStorage {
1078 fn new(records: Arc<Mutex<Vec<FileIndex>>>) -> Self {
1079 Self { records }
1080 }
1081 }
1082
1083 impl IndexStorage for MemoryStorage {
1084 fn init(&self, _index_dir: &Path) -> Result<()> {
1085 Ok(())
1086 }
1087
1088 fn persist(&self, _index_dir: &Path, entry: &FileIndex) -> Result<()> {
1089 let mut guard = self.records.lock().expect("lock poisoned");
1090 guard.push(entry.clone());
1091 Ok(())
1092 }
1093 }
1094
1095 let temp = tempdir()?;
1096 let workspace = temp.path();
1097 fs::write(workspace.join("notes.txt"), "remember this")?;
1098
1099 let records: Arc<Mutex<Vec<FileIndex>>> = Arc::new(Mutex::new(Vec::new()));
1100 let storage = MemoryStorage::new(records.clone());
1101
1102 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1103 let mut indexer = SimpleIndexer::with_config(config).with_storage(Arc::new(storage));
1104 indexer.init()?;
1105 indexer.index_directory(workspace)?;
1106
1107 let entries = records.lock().expect("lock poisoned");
1108 assert_eq!(entries.len(), 1);
1109 assert_eq!(
1110 entries[0].path,
1111 workspace.join("notes.txt").to_string_lossy().into_owned()
1112 );
1113
1114 Ok(())
1115 }
1116
1117 #[test]
1118 fn custom_filters_can_skip_files() -> Result<()> {
1119 #[derive(Default)]
1120 struct SkipRustFilter {
1121 inner: ConfigTraversalFilter,
1122 }
1123
1124 impl TraversalFilter for SkipRustFilter {
1125 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1126 self.inner.should_descend(path, config)
1127 }
1128
1129 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1130 if path
1131 .extension()
1132 .and_then(|ext| ext.to_str())
1133 .is_some_and(|ext| ext.eq_ignore_ascii_case("rs"))
1134 {
1135 return false;
1136 }
1137
1138 self.inner.should_index_file(path, config)
1139 }
1140 }
1141
1142 let temp = tempdir()?;
1143 let workspace = temp.path();
1144 fs::write(workspace.join("lib.rs"), "fn main() {}")?;
1145 fs::write(workspace.join("README.md"), "# Notes")?;
1146
1147 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1148 let mut indexer =
1149 SimpleIndexer::with_config(config).with_filter(Arc::new(SkipRustFilter::default()));
1150 indexer.init()?;
1151 indexer.index_directory(workspace)?;
1152
1153 assert!(indexer.find_files("lib\\.rs$")?.is_empty());
1154 assert!(!indexer.find_files("README\\.md$")?.is_empty());
1155
1156 Ok(())
1157 }
1158
1159 #[test]
1160 fn custom_filters_can_skip_directories() -> Result<()> {
1161 #[derive(Default)]
1162 struct SkipGeneratedFilter {
1163 inner: ConfigTraversalFilter,
1164 }
1165
1166 impl TraversalFilter for SkipGeneratedFilter {
1167 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1168 if path.ends_with("generated") {
1169 return false;
1170 }
1171
1172 self.inner.should_descend(path, config)
1173 }
1174
1175 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1176 self.inner.should_index_file(path, config)
1177 }
1178 }
1179
1180 let temp = tempdir()?;
1181 let workspace = temp.path();
1182 let generated_dir = workspace.join("generated");
1183 fs::create_dir_all(&generated_dir)?;
1184 fs::write(generated_dir.join("skip.txt"), "ignore me")?;
1185 fs::write(workspace.join("README.md"), "# Notes")?;
1186
1187 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1188 let indexer = SimpleIndexer::with_config(config)
1189 .with_filter(Arc::new(SkipGeneratedFilter::default()));
1190 let files = indexer.discover_files(workspace);
1191
1192 assert!(!files.iter().any(|file| file.ends_with("skip.txt")));
1193 assert!(files.iter().any(|file| file.ends_with("README.md")));
1194
1195 Ok(())
1196 }
1197
1198 #[test]
1199 fn indexing_multiple_directories_preserves_existing_cache_entries() -> Result<()> {
1200 let temp = tempdir()?;
1201 let workspace = temp.path();
1202 let src_dir = workspace.join("src");
1203 let docs_dir = workspace.join("docs");
1204 fs::create_dir_all(&src_dir)?;
1205 fs::create_dir_all(&docs_dir)?;
1206 fs::write(src_dir.join("lib.rs"), "fn main() {}")?;
1207 fs::write(docs_dir.join("guide.md"), "# Guide")?;
1208
1209 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1210 indexer.init()?;
1211 indexer.index_directory(&src_dir)?;
1212 indexer.index_directory(&docs_dir)?;
1213
1214 assert!(
1215 indexer
1216 .find_files("lib\\.rs$")?
1217 .iter()
1218 .any(|file| file.ends_with("lib.rs"))
1219 );
1220 assert!(
1221 indexer
1222 .find_files("guide\\.md$")?
1223 .iter()
1224 .any(|file| file.ends_with("guide.md"))
1225 );
1226
1227 let index_content =
1228 fs::read_to_string(workspace.join(".vtcode").join("index").join("index.md"))?;
1229 assert!(index_content.contains(src_dir.join("lib.rs").to_string_lossy().as_ref()));
1230 assert!(index_content.contains(docs_dir.join("guide.md").to_string_lossy().as_ref()));
1231
1232 Ok(())
1233 }
1234
1235 #[test]
1236 fn batch_indexing_writes_single_markdown_file() -> Result<()> {
1237 let temp = tempdir()?;
1238 let workspace = temp.path();
1239 fs::write(workspace.join("lib.rs"), "fn main() {}")?;
1240 fs::write(workspace.join("README.md"), "# Notes")?;
1241
1242 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1243 indexer.init()?;
1244 indexer.index_directory(workspace)?;
1245
1246 let index_dir = workspace.join(".vtcode").join("index");
1247 let files = fs::read_dir(&index_dir)?
1248 .filter_map(|entry| entry.ok())
1249 .map(|entry| entry.file_name().to_string_lossy().into_owned())
1250 .collect::<Vec<_>>();
1251 assert_eq!(files, vec!["index.md".to_string()]);
1252
1253 let index_content = fs::read_to_string(index_dir.join("index.md"))?;
1254 assert!(index_content.contains(workspace.join("lib.rs").to_string_lossy().as_ref()));
1255 assert!(index_content.contains(workspace.join("README.md").to_string_lossy().as_ref()));
1256
1257 Ok(())
1258 }
1259
1260 #[test]
1261 fn batch_indexing_removes_legacy_hashed_entries() -> Result<()> {
1262 let temp = tempdir()?;
1263 let workspace = temp.path();
1264 fs::write(workspace.join("lib.rs"), "fn main() {}")?;
1265
1266 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1267 indexer.init()?;
1268
1269 let legacy_file_name = format!("{}.md", calculate_hash("legacy-path"));
1270 let legacy_file_path = workspace
1271 .join(".vtcode")
1272 .join("index")
1273 .join(&legacy_file_name);
1274 fs::write(&legacy_file_path, "# legacy")?;
1275 assert!(legacy_file_path.exists());
1276
1277 indexer.index_directory(workspace)?;
1278
1279 assert!(!legacy_file_path.exists());
1280 let files = fs::read_dir(workspace.join(".vtcode").join("index"))?
1281 .filter_map(|entry| entry.ok())
1282 .map(|entry| entry.file_name().to_string_lossy().into_owned())
1283 .collect::<Vec<_>>();
1284 assert_eq!(files, vec!["index.md".to_string()]);
1285
1286 Ok(())
1287 }
1288
1289 #[test]
1290 fn snapshot_storage_uses_default_ref_batch_persistence() -> Result<()> {
1291 #[derive(Clone, Default)]
1292 struct SnapshotMemoryStorage {
1293 snapshots: Arc<Mutex<Vec<Vec<FileIndex>>>>,
1294 }
1295
1296 impl SnapshotMemoryStorage {
1297 fn new(snapshots: Arc<Mutex<Vec<Vec<FileIndex>>>>) -> Self {
1298 Self { snapshots }
1299 }
1300 }
1301
1302 impl IndexStorage for SnapshotMemoryStorage {
1303 fn init(&self, _index_dir: &Path) -> Result<()> {
1304 Ok(())
1305 }
1306
1307 fn persist(&self, _index_dir: &Path, _entry: &FileIndex) -> Result<()> {
1308 Ok(())
1309 }
1310
1311 fn prefers_snapshot_persistence(&self) -> bool {
1312 true
1313 }
1314
1315 fn persist_batch(&self, _index_dir: &Path, entries: &[FileIndex]) -> Result<()> {
1316 self.snapshots
1317 .lock()
1318 .expect("lock poisoned")
1319 .push(entries.to_vec());
1320 Ok(())
1321 }
1322 }
1323
1324 let temp = tempdir()?;
1325 let workspace = temp.path();
1326 let file_path = workspace.join("notes.txt");
1327 fs::write(&file_path, "remember this")?;
1328
1329 let snapshots = Arc::new(Mutex::new(Vec::new()));
1330 let storage = SnapshotMemoryStorage::new(snapshots.clone());
1331
1332 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1333 let mut indexer = SimpleIndexer::with_config(config).with_storage(Arc::new(storage));
1334 indexer.index_file(&file_path)?;
1335
1336 let snapshots = snapshots.lock().expect("lock poisoned");
1337 assert_eq!(snapshots.len(), 1);
1338 assert_eq!(snapshots[0].len(), 1);
1339 assert_eq!(
1340 snapshots[0][0].path,
1341 workspace.join("notes.txt").to_string_lossy().into_owned()
1342 );
1343
1344 Ok(())
1345 }
1346
1347 #[test]
1348 fn snapshot_index_file_rolls_back_cache_when_persist_fails() -> Result<()> {
1349 #[derive(Clone, Default)]
1350 struct FlakySnapshotStorage {
1351 persist_count: Arc<Mutex<usize>>,
1352 }
1353
1354 impl IndexStorage for FlakySnapshotStorage {
1355 fn init(&self, _index_dir: &Path) -> Result<()> {
1356 Ok(())
1357 }
1358
1359 fn persist(&self, _index_dir: &Path, _entry: &FileIndex) -> Result<()> {
1360 Ok(())
1361 }
1362
1363 fn prefers_snapshot_persistence(&self) -> bool {
1364 true
1365 }
1366
1367 fn persist_batch(&self, _index_dir: &Path, _entries: &[FileIndex]) -> Result<()> {
1368 let mut count = self.persist_count.lock().expect("lock poisoned");
1369 *count += 1;
1370 if *count == 2 {
1371 anyhow::bail!("simulated snapshot persistence failure");
1372 }
1373 Ok(())
1374 }
1375 }
1376
1377 let temp = tempdir()?;
1378 let workspace = temp.path();
1379 let first = workspace.join("first.txt");
1380 let second = workspace.join("second.txt");
1381 fs::write(&first, "one")?;
1382 fs::write(&second, "two")?;
1383
1384 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1385 let storage = Arc::new(FlakySnapshotStorage::default());
1386 let mut indexer = SimpleIndexer::with_config(config).with_storage(storage);
1387
1388 indexer.index_file(&first)?;
1389 assert!(
1390 indexer
1391 .find_files("first\\.txt$")?
1392 .iter()
1393 .any(|path| path.ends_with("first.txt"))
1394 );
1395
1396 let err = indexer
1397 .index_file(&second)
1398 .expect_err("second persist should fail");
1399 assert!(
1400 err.to_string()
1401 .contains("simulated snapshot persistence failure")
1402 );
1403 assert!(
1404 indexer
1405 .find_files("first\\.txt$")?
1406 .iter()
1407 .any(|path| path.ends_with("first.txt"))
1408 );
1409 assert!(indexer.find_files("second\\.txt$")?.is_empty());
1410
1411 Ok(())
1412 }
1413}