1use anyhow::Result;
9use hashbrown::HashMap;
10use ignore::{DirEntry, Walk, WalkBuilder};
11use regex::Regex;
12use serde::{Deserialize, Serialize};
13use std::fmt::Write as FmtWrite;
14use std::fs;
15use std::io::{BufWriter, ErrorKind, Write};
16use std::path::{Path, PathBuf};
17use std::sync::Arc;
18use std::time::SystemTime;
19
20pub trait IndexStorage: Send + Sync {
22 fn init(&self, index_dir: &Path) -> Result<()>;
24
25 fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()>;
27
28 fn prefers_snapshot_persistence(&self) -> bool {
34 false
35 }
36
37 fn remove(&self, _index_dir: &Path, _file_path: &Path) -> Result<()> {
41 Ok(())
42 }
43
44 fn persist_batch(&self, index_dir: &Path, entries: &[FileIndex]) -> Result<()> {
49 for entry in entries {
50 self.persist(index_dir, entry)?;
51 }
52 Ok(())
53 }
54
55 fn persist_batch_refs(&self, index_dir: &Path, entries: &[&FileIndex]) -> Result<()> {
61 let owned = entries
62 .iter()
63 .map(|entry| (*entry).clone())
64 .collect::<Vec<_>>();
65 self.persist_batch(index_dir, &owned)
66 }
67}
68
69pub trait TraversalFilter: Send + Sync {
71 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
73
74 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
76}
77
78#[derive(Debug, Default, Clone)]
80pub struct MarkdownIndexStorage;
81
82impl IndexStorage for MarkdownIndexStorage {
83 fn init(&self, index_dir: &Path) -> Result<()> {
84 fs::create_dir_all(index_dir)?;
85 Ok(())
86 }
87
88 fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()> {
89 fs::create_dir_all(index_dir)?;
90 let file_name = format!("{}.md", calculate_hash(&entry.path));
91 let index_path = index_dir.join(file_name);
92 let file = fs::File::create(index_path)?;
93 let mut writer = BufWriter::new(file);
94 writeln!(writer, "# File Index: {}", entry.path)?;
95 writeln!(writer)?;
96 write_markdown_fields(&mut writer, entry)?;
97 writer.flush()?;
98 Ok(())
99 }
100
101 fn prefers_snapshot_persistence(&self) -> bool {
102 true
103 }
104
105 fn remove(&self, index_dir: &Path, file_path: &Path) -> Result<()> {
106 let file_name = format!(
107 "{}.md",
108 calculate_hash(file_path.to_string_lossy().as_ref())
109 );
110 let index_path = index_dir.join(file_name);
111 match fs::remove_file(index_path) {
112 Ok(()) => Ok(()),
113 Err(err) if err.kind() == ErrorKind::NotFound => Ok(()),
114 Err(err) => Err(err.into()),
115 }
116 }
117
118 fn persist_batch(&self, index_dir: &Path, entries: &[FileIndex]) -> Result<()> {
119 persist_markdown_snapshot(index_dir, entries.iter())
120 }
121
122 fn persist_batch_refs(&self, index_dir: &Path, entries: &[&FileIndex]) -> Result<()> {
123 persist_markdown_snapshot(index_dir, entries.iter().copied())
124 }
125}
126
127fn persist_markdown_snapshot<'a>(
128 index_dir: &Path,
129 entries: impl IntoIterator<Item = &'a FileIndex>,
130) -> Result<()> {
131 let entries = entries.into_iter().collect::<Vec<_>>();
132
133 fs::create_dir_all(index_dir)?;
134 let temp_path = index_dir.join(".index.md.tmp");
135 let final_path = index_dir.join("index.md");
136 let file = fs::File::create(&temp_path)?;
137 let mut writer = BufWriter::new(file);
138
139 writeln!(writer, "# Workspace File Index")?;
140 writeln!(writer)?;
141 writeln!(writer, "- **Entries**: {}", entries.len())?;
142 writeln!(writer)?;
143
144 for entry in entries {
145 write_markdown_entry(&mut writer, entry)?;
146 }
147
148 writer.flush()?;
149 fs::rename(temp_path, final_path)?;
150 cleanup_legacy_markdown_entries(index_dir)?;
151 Ok(())
152}
153
154#[derive(Debug, Default, Clone)]
156pub struct ConfigTraversalFilter;
157
158impl TraversalFilter for ConfigTraversalFilter {
159 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
160 !should_skip_dir(path, config)
161 }
162
163 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
164 if !path.is_file() {
165 return false;
166 }
167
168 if config.ignore_hidden
170 && path
171 .file_name()
172 .and_then(|n| n.to_str())
173 .is_some_and(|s| s.starts_with('.'))
174 {
175 return false;
176 }
177
178 if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
180 let is_sensitive = matches!(
181 file_name,
182 ".env"
183 | ".env.local"
184 | ".env.production"
185 | ".env.development"
186 | ".env.test"
187 | ".git"
188 | ".gitignore"
189 | ".DS_Store"
190 ) || file_name.starts_with(".env.");
191 if is_sensitive {
192 return false;
193 }
194 }
195
196 true
197 }
198}
199
200#[derive(Clone, Debug)]
202pub struct SimpleIndexerConfig {
203 workspace_root: PathBuf,
204 index_dir: PathBuf,
205 ignore_hidden: bool,
206 excluded_dirs: Vec<PathBuf>,
207 allowed_dirs: Vec<PathBuf>,
208}
209
210impl SimpleIndexerConfig {
211 pub fn new(workspace_root: PathBuf) -> Self {
213 let index_dir = workspace_root.join(".vtcode").join("index");
214 let vtcode_dir = workspace_root.join(".vtcode");
215 let external_dir = vtcode_dir.join("external");
216
217 let mut excluded_dirs = vec![
218 index_dir.clone(),
219 vtcode_dir,
220 workspace_root.join("target"),
221 workspace_root.join("node_modules"),
222 ];
223
224 excluded_dirs.dedup();
225
226 Self {
227 workspace_root,
228 index_dir,
229 ignore_hidden: true,
230 excluded_dirs,
231 allowed_dirs: vec![external_dir],
232 }
233 }
234
235 pub fn with_index_dir(mut self, index_dir: impl Into<PathBuf>) -> Self {
237 let index_dir = index_dir.into();
238 self.index_dir = index_dir.clone();
239 self.push_unique_excluded(index_dir);
240 self
241 }
242
243 pub fn add_allowed_dir(mut self, path: impl Into<PathBuf>) -> Self {
245 let path = path.into();
246 if !self.allowed_dirs.iter().any(|existing| existing == &path) {
247 self.allowed_dirs.push(path);
248 }
249 self
250 }
251
252 pub fn add_excluded_dir(mut self, path: impl Into<PathBuf>) -> Self {
254 let path = path.into();
255 self.push_unique_excluded(path);
256 self
257 }
258
259 pub fn ignore_hidden(mut self, ignore_hidden: bool) -> Self {
261 self.ignore_hidden = ignore_hidden;
262 self
263 }
264
265 pub fn workspace_root(&self) -> &Path {
267 &self.workspace_root
268 }
269
270 pub fn index_dir(&self) -> &Path {
272 &self.index_dir
273 }
274
275 fn push_unique_excluded(&mut self, path: PathBuf) {
276 if !self.excluded_dirs.iter().any(|existing| existing == &path) {
277 self.excluded_dirs.push(path);
278 }
279 }
280}
281
282#[derive(Debug, Clone, Serialize, Deserialize)]
284pub struct FileIndex {
285 pub path: String,
287 pub hash: String,
289 pub modified: u64,
291 pub size: u64,
293 pub language: String,
295 pub tags: Vec<String>,
297}
298
299#[derive(Debug, Clone, Serialize, Deserialize)]
301pub struct SearchResult {
302 pub file_path: String,
303 pub line_number: usize,
304 pub line_content: String,
305 pub matches: Vec<String>,
306}
307
308pub struct SimpleIndexer {
310 config: SimpleIndexerConfig,
311 index_cache: HashMap<String, FileIndex>,
312 storage: Arc<dyn IndexStorage>,
313 filter: Arc<dyn TraversalFilter>,
314}
315
316impl SimpleIndexer {
317 pub fn new(workspace_root: PathBuf) -> Self {
319 Self::with_components(
320 SimpleIndexerConfig::new(workspace_root),
321 Arc::new(MarkdownIndexStorage),
322 Arc::new(ConfigTraversalFilter),
323 )
324 }
325
326 pub fn with_config(config: SimpleIndexerConfig) -> Self {
328 Self::with_components(
329 config,
330 Arc::new(MarkdownIndexStorage),
331 Arc::new(ConfigTraversalFilter),
332 )
333 }
334
335 pub fn with_index_dir(workspace_root: PathBuf, index_dir: PathBuf) -> Self {
337 let config = SimpleIndexerConfig::new(workspace_root).with_index_dir(index_dir);
338 Self::with_config(config)
339 }
340
341 pub fn with_components(
343 config: SimpleIndexerConfig,
344 storage: Arc<dyn IndexStorage>,
345 filter: Arc<dyn TraversalFilter>,
346 ) -> Self {
347 Self {
348 config,
349 index_cache: HashMap::new(),
350 storage,
351 filter,
352 }
353 }
354
355 pub fn with_storage(self, storage: Arc<dyn IndexStorage>) -> Self {
357 Self { storage, ..self }
358 }
359
360 pub fn with_filter(self, filter: Arc<dyn TraversalFilter>) -> Self {
362 Self { filter, ..self }
363 }
364
365 pub fn init(&self) -> Result<()> {
367 self.storage.init(self.config.index_dir())
368 }
369
370 pub fn workspace_root(&self) -> &Path {
372 self.config.workspace_root()
373 }
374
375 pub fn index_dir(&self) -> &Path {
377 self.config.index_dir()
378 }
379
380 pub fn index_file(&mut self, file_path: &Path) -> Result<()> {
382 let cache_key = file_path.to_string_lossy().into_owned();
383
384 if self.storage.prefers_snapshot_persistence() {
385 let next_entry = if file_path.exists() && self.should_process_file_path(file_path) {
386 self.build_file_index(file_path)?
387 } else {
388 None
389 };
390
391 self.apply_snapshot_file_update(cache_key, next_entry)?;
392 return Ok(());
393 }
394
395 if !file_path.exists() || !self.should_process_file_path(file_path) {
396 self.index_cache.remove(cache_key.as_str());
397 self.storage.remove(self.config.index_dir(), file_path)?;
398 return Ok(());
399 }
400
401 if let Some(index) = self.build_file_index(file_path)? {
402 self.storage.persist(self.config.index_dir(), &index)?;
403 self.index_cache.insert(index.path.clone(), index);
404 } else {
405 self.index_cache.remove(cache_key.as_str());
406 self.storage.remove(self.config.index_dir(), file_path)?;
407 }
408
409 Ok(())
410 }
411
412 pub fn index_directory(&mut self, dir_path: &Path) -> Result<()> {
416 let walker = self.build_walker(dir_path);
417
418 let mut entries = Vec::new();
419
420 for entry in walker.filter_map(|e| e.ok()) {
421 let path = entry.path();
422
423 if entry.file_type().is_some_and(|ft| ft.is_file())
425 && let Some(index) = self.build_file_index(path)?
426 {
427 entries.push(index);
428 }
429 }
430
431 if self.storage.prefers_snapshot_persistence() {
432 self.apply_snapshot_directory_update(dir_path, &entries)?;
433 } else {
434 entries.sort_unstable_by(|left, right| left.path.cmp(&right.path));
435 self.storage
436 .persist_batch(self.config.index_dir(), &entries)?;
437 }
438
439 self.replace_cached_entries(dir_path, &entries);
440
441 Ok(())
442 }
443
444 pub fn discover_files(&self, dir_path: &Path) -> Vec<String> {
447 let walker = self.build_walker(dir_path);
448
449 let mut files = walker
450 .filter_map(|e| e.ok())
451 .filter(|e| {
452 if !e.file_type().is_some_and(|ft| ft.is_file()) {
453 return false;
454 }
455
456 self.should_process_file_path(e.path())
457 })
458 .map(|e| e.path().to_string_lossy().into_owned())
459 .collect::<Vec<_>>();
460 files.sort_unstable();
461 files
462 }
463
464 fn search_files_internal(
467 &self,
468 regex: &Regex,
469 path_filter: Option<&str>,
470 extract_matches: bool,
471 ) -> Vec<SearchResult> {
472 let mut results = Vec::new();
473
474 for file_path in self.index_cache.keys() {
475 if path_filter.is_some_and(|filter| !file_path.contains(filter)) {
476 continue;
477 }
478
479 if let Ok(content) = fs::read_to_string(file_path) {
480 for (line_num, line) in content.lines().enumerate() {
481 if regex.is_match(line) {
482 let matches = if extract_matches {
483 regex
484 .find_iter(line)
485 .map(|m| m.as_str().to_string())
486 .collect()
487 } else {
488 vec![line.to_string()]
489 };
490
491 results.push(SearchResult {
492 file_path: file_path.clone(),
493 line_number: line_num + 1,
494 line_content: line.to_string(),
495 matches,
496 });
497 }
498 }
499 }
500 }
501
502 results.sort_unstable_by(|left, right| {
503 left.file_path
504 .cmp(&right.file_path)
505 .then_with(|| left.line_number.cmp(&right.line_number))
506 });
507 results
508 }
509
510 pub fn search(&self, pattern: &str, path_filter: Option<&str>) -> Result<Vec<SearchResult>> {
512 let regex = Regex::new(pattern)?;
513 Ok(self.search_files_internal(®ex, path_filter, true))
514 }
515
516 pub fn find_files(&self, pattern: &str) -> Result<Vec<String>> {
518 let regex = Regex::new(pattern)?;
519 let mut results = Vec::new();
520
521 for file_path in self.index_cache.keys() {
522 if regex.is_match(file_path) {
523 results.push(file_path.clone());
524 }
525 }
526
527 results.sort_unstable();
528 Ok(results)
529 }
530
531 pub fn all_files(&self) -> Vec<String> {
534 let mut files = self.index_cache.keys().cloned().collect::<Vec<_>>();
535 files.sort_unstable();
536 files
537 }
538
539 pub fn get_file_content(
541 &self,
542 file_path: &str,
543 start_line: Option<usize>,
544 end_line: Option<usize>,
545 ) -> Result<String> {
546 let content = fs::read_to_string(file_path)?;
547 let start = start_line.unwrap_or(1).max(1);
548 let end = end_line.unwrap_or(usize::MAX);
549
550 if start > end {
551 return Ok(String::new());
552 }
553
554 let mut result = String::new();
555 for (line_number, line) in content.lines().enumerate() {
556 let line_number = line_number + 1;
557 if line_number < start {
558 continue;
559 }
560 if line_number > end {
561 break;
562 }
563 writeln!(&mut result, "{line_number}: {line}")?;
564 }
565
566 Ok(result)
567 }
568
569 pub fn list_files(&self, dir_path: &str, show_hidden: bool) -> Result<Vec<String>> {
571 let path = Path::new(dir_path);
572 if !path.exists() {
573 return Ok(vec![]);
574 }
575
576 let mut files = Vec::new();
577
578 for entry in fs::read_dir(path)? {
579 let entry = entry?;
580 let file_name = entry.file_name().to_string_lossy().into_owned();
581
582 if !show_hidden && file_name.starts_with('.') {
583 continue;
584 }
585
586 files.push(file_name);
587 }
588
589 files.sort_unstable();
590 Ok(files)
591 }
592
593 pub fn grep(&self, pattern: &str, file_pattern: Option<&str>) -> Result<Vec<SearchResult>> {
595 let regex = Regex::new(pattern)?;
596 Ok(self.search_files_internal(®ex, file_pattern, false))
597 }
598
599 #[allow(dead_code)]
600 fn walk_directory<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
601 where
602 F: FnMut(&Path) -> Result<()>,
603 {
604 if !dir_path.exists() {
605 return Ok(());
606 }
607
608 self.walk_directory_internal(dir_path, callback)
609 }
610
611 #[allow(dead_code)]
612 fn walk_directory_internal<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
613 where
614 F: FnMut(&Path) -> Result<()>,
615 {
616 for entry in fs::read_dir(dir_path)? {
617 let entry = entry?;
618 let path = entry.path();
619
620 if path.is_dir() {
621 if self.is_allowed_path(&path) {
622 self.walk_directory_internal(&path, callback)?;
623 continue;
624 }
625
626 if !self.filter.should_descend(&path, &self.config) {
627 self.walk_allowed_descendants(&path, callback)?;
628 continue;
629 }
630
631 self.walk_directory_internal(&path, callback)?;
632 } else if path.is_file() {
633 callback(&path)?;
634 }
635 }
636
637 Ok(())
638 }
639
640 #[allow(dead_code)]
641 fn is_allowed_path(&self, path: &Path) -> bool {
642 self.config
643 .allowed_dirs
644 .iter()
645 .any(|allowed| path.starts_with(allowed))
646 }
647
648 #[allow(dead_code)]
649 fn walk_allowed_descendants<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
650 where
651 F: FnMut(&Path) -> Result<()>,
652 {
653 let allowed_dirs = self.config.allowed_dirs.clone();
654 for allowed in allowed_dirs {
655 if allowed.starts_with(dir_path) && allowed.exists() {
656 self.walk_directory_internal(&allowed, callback)?;
657 }
658 }
659 Ok(())
660 }
661
662 #[inline]
663 fn get_modified_time(&self, file_path: &Path) -> Result<u64> {
664 let metadata = fs::metadata(file_path)?;
665 let modified = metadata.modified()?;
666 Ok(modified.duration_since(SystemTime::UNIX_EPOCH)?.as_secs())
667 }
668
669 #[inline]
670 fn detect_language(&self, file_path: &Path) -> String {
671 file_path
672 .extension()
673 .and_then(|ext| ext.to_str())
674 .unwrap_or("unknown")
675 .to_string()
676 }
677
678 fn build_file_index(&self, file_path: &Path) -> Result<Option<FileIndex>> {
679 if !self.should_process_file_path(file_path) {
680 return Ok(None);
681 }
682
683 let content = match fs::read_to_string(file_path) {
684 Ok(text) => text,
685 Err(err) => {
686 if err.kind() == ErrorKind::InvalidData {
687 return Ok(None);
688 }
689 return Err(err.into());
690 }
691 };
692
693 let index = FileIndex {
694 path: file_path.to_string_lossy().into_owned(),
695 hash: calculate_hash(&content),
696 modified: self.get_modified_time(file_path)?,
697 size: content.len() as u64,
698 language: self.detect_language(file_path),
699 tags: vec![],
700 };
701
702 Ok(Some(index))
703 }
704
705 #[inline]
706 fn is_excluded_path(&self, path: &Path) -> bool {
707 self.config
708 .excluded_dirs
709 .iter()
710 .any(|excluded| path.starts_with(excluded))
711 }
712
713 #[inline]
714 fn should_index_file_path(&self, path: &Path) -> bool {
715 self.filter.should_index_file(path, &self.config)
716 }
717
718 #[inline]
719 fn should_process_file_path(&self, path: &Path) -> bool {
720 if self.is_allowed_path(path) {
721 return self.should_index_file_path(path);
722 }
723
724 !self.is_excluded_path(path) && self.should_index_file_path(path)
725 }
726
727 fn build_walker(&self, dir_path: &Path) -> Walk {
728 let walk_root = dir_path.to_path_buf();
729 let config = self.config.clone();
730 let filter = Arc::clone(&self.filter);
731
732 let mut builder = WalkBuilder::new(dir_path);
733 builder
734 .hidden(false)
735 .git_ignore(true)
736 .git_global(true)
737 .git_exclude(true)
738 .ignore(true)
739 .parents(true);
740 builder.filter_entry(move |entry| {
741 should_visit_entry(entry, walk_root.as_path(), &config, filter.as_ref())
742 });
743 builder.build()
744 }
745
746 fn replace_cached_entries(&mut self, dir_path: &Path, entries: &[FileIndex]) {
747 self.index_cache
748 .retain(|path, _| !Path::new(path).starts_with(dir_path));
749
750 self.index_cache.extend(
751 entries
752 .iter()
753 .cloned()
754 .map(|entry| (entry.path.clone(), entry)),
755 );
756 }
757
758 fn apply_snapshot_file_update(
759 &mut self,
760 cache_key: String,
761 next_entry: Option<FileIndex>,
762 ) -> Result<()> {
763 let previous_entry = match next_entry {
764 Some(entry) => self.index_cache.insert(cache_key.clone(), entry),
765 None => self.index_cache.remove(cache_key.as_str()),
766 };
767
768 if let Err(err) = self.persist_current_snapshot() {
769 match previous_entry {
770 Some(entry) => {
771 self.index_cache.insert(cache_key, entry);
772 }
773 None => {
774 self.index_cache.remove(cache_key.as_str());
775 }
776 }
777 return Err(err);
778 }
779
780 Ok(())
781 }
782
783 fn apply_snapshot_directory_update(
784 &mut self,
785 dir_path: &Path,
786 entries: &[FileIndex],
787 ) -> Result<()> {
788 let previous_entries = self.take_cached_entries(dir_path);
789 self.index_cache.extend(
790 entries
791 .iter()
792 .cloned()
793 .map(|entry| (entry.path.clone(), entry)),
794 );
795
796 if let Err(err) = self.persist_current_snapshot() {
797 self.index_cache
798 .retain(|path, _| !Path::new(path).starts_with(dir_path));
799 self.index_cache.extend(
800 previous_entries
801 .into_iter()
802 .map(|entry| (entry.path.clone(), entry)),
803 );
804 return Err(err);
805 }
806
807 Ok(())
808 }
809
810 fn take_cached_entries(&mut self, dir_path: &Path) -> Vec<FileIndex> {
811 let keys = self
812 .index_cache
813 .keys()
814 .filter(|path| Path::new(path).starts_with(dir_path))
815 .cloned()
816 .collect::<Vec<_>>();
817
818 keys.into_iter()
819 .filter_map(|path| self.index_cache.remove(path.as_str()))
820 .collect()
821 }
822
823 fn persist_current_snapshot(&self) -> Result<()> {
824 let mut snapshot = self.index_cache.values().collect::<Vec<_>>();
825 snapshot.sort_unstable_by(|left, right| left.path.cmp(&right.path));
826 self.storage
827 .persist_batch_refs(self.config.index_dir(), &snapshot)
828 }
829}
830
831impl Clone for SimpleIndexer {
832 fn clone(&self) -> Self {
833 Self {
834 config: self.config.clone(),
835 index_cache: self.index_cache.clone(),
836 storage: self.storage.clone(),
837 filter: self.filter.clone(),
838 }
839 }
840}
841
842fn should_skip_dir(path: &Path, config: &SimpleIndexerConfig) -> bool {
843 if is_allowed_path_or_ancestor(path, config) {
844 return false;
845 }
846
847 if config
848 .excluded_dirs
849 .iter()
850 .any(|excluded| path.starts_with(excluded))
851 {
852 return true;
853 }
854
855 if config.ignore_hidden
856 && path
857 .file_name()
858 .and_then(|name| name.to_str())
859 .is_some_and(|name_str| name_str.starts_with('.'))
860 {
861 return true;
862 }
863
864 false
865}
866
867fn is_allowed_path_or_ancestor(path: &Path, config: &SimpleIndexerConfig) -> bool {
868 config
869 .allowed_dirs
870 .iter()
871 .any(|allowed| path.starts_with(allowed) || allowed.starts_with(path))
872}
873
874fn should_visit_entry(
875 entry: &DirEntry,
876 walk_root: &Path,
877 config: &SimpleIndexerConfig,
878 filter: &dyn TraversalFilter,
879) -> bool {
880 if entry.path() == walk_root {
881 return true;
882 }
883
884 if !entry
885 .file_type()
886 .is_some_and(|file_type| file_type.is_dir())
887 {
888 return true;
889 }
890
891 filter.should_descend(entry.path(), config)
892}
893
894#[inline]
895fn calculate_hash(content: &str) -> String {
896 vtcode_commons::utils::calculate_sha256(content.as_bytes())
897}
898
899fn write_markdown_entry(writer: &mut impl Write, entry: &FileIndex) -> std::io::Result<()> {
900 writeln!(writer, "## {}", entry.path)?;
901 writeln!(writer)?;
902 write_markdown_fields(writer, entry)?;
903 writeln!(writer)?;
904 Ok(())
905}
906
907fn write_markdown_fields(writer: &mut impl Write, entry: &FileIndex) -> std::io::Result<()> {
908 writeln!(writer, "- **Path**: {}", entry.path)?;
909 writeln!(writer, "- **Hash**: {}", entry.hash)?;
910 writeln!(writer, "- **Modified**: {}", entry.modified)?;
911 writeln!(writer, "- **Size**: {} bytes", entry.size)?;
912 writeln!(writer, "- **Language**: {}", entry.language)?;
913 writeln!(writer, "- **Tags**: {}", entry.tags.join(", "))?;
914 Ok(())
915}
916
917fn cleanup_legacy_markdown_entries(index_dir: &Path) -> Result<()> {
918 for entry in fs::read_dir(index_dir)? {
919 let entry = entry?;
920 let file_name = entry.file_name();
921 let file_name = file_name.to_string_lossy();
922 if is_legacy_markdown_entry_name(file_name.as_ref()) {
923 fs::remove_file(entry.path())?;
924 }
925 }
926 Ok(())
927}
928
929#[inline]
930fn is_legacy_markdown_entry_name(file_name: &str) -> bool {
931 let Some(hash_part) = file_name.strip_suffix(".md") else {
932 return false;
933 };
934 hash_part.len() == 64 && hash_part.bytes().all(|byte| byte.is_ascii_hexdigit())
935}
936
937#[cfg(test)]
938mod tests {
939 use super::*;
940 use std::fs;
941 use std::sync::{Arc, Mutex};
942 use tempfile::tempdir;
943
944 #[test]
945 fn skips_hidden_directories_by_default() -> Result<()> {
946 let temp = tempdir()?;
947 let workspace = temp.path();
948 let hidden_dir = workspace.join(".private");
949 fs::create_dir_all(&hidden_dir)?;
950 fs::write(hidden_dir.join("secret.txt"), "classified")?;
951
952 let visible_dir = workspace.join("src");
953 fs::create_dir_all(&visible_dir)?;
954 fs::write(visible_dir.join("lib.rs"), "fn main() {}")?;
955
956 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
957 indexer.init()?;
958 indexer.index_directory(workspace)?;
959
960 assert!(indexer.find_files("secret\\.txt$")?.is_empty());
961 assert!(!indexer.find_files("lib\\.rs$")?.is_empty());
962
963 Ok(())
964 }
965
966 #[test]
967 fn can_include_hidden_directories_when_configured() -> Result<()> {
968 let temp = tempdir()?;
969 let workspace = temp.path();
970 let hidden_dir = workspace.join(".cache");
971 fs::create_dir_all(&hidden_dir)?;
972 fs::write(hidden_dir.join("data.log"), "details")?;
973
974 let config = SimpleIndexerConfig::new(workspace.to_path_buf()).ignore_hidden(false);
975 let mut indexer = SimpleIndexer::with_config(config);
976 indexer.init()?;
977 indexer.index_directory(workspace)?;
978
979 let results = indexer.find_files("data\\.log$")?;
980 assert_eq!(results.len(), 1);
981
982 Ok(())
983 }
984
985 #[test]
986 fn indexes_allowed_directories_inside_hidden_excluded_parents() -> Result<()> {
987 let temp = tempdir()?;
988 let workspace = temp.path();
989 let allowed_dir = workspace.join(".vtcode").join("external");
990 fs::create_dir_all(&allowed_dir)?;
991 fs::write(allowed_dir.join("plugin.toml"), "name = 'demo'")?;
992
993 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
994 indexer.init()?;
995 indexer.index_directory(workspace)?;
996
997 let results = indexer.find_files("plugin\\.toml$")?;
998 assert_eq!(results.len(), 1);
999
1000 Ok(())
1001 }
1002
1003 #[test]
1004 fn reindexing_prunes_deleted_files_from_cache() -> Result<()> {
1005 let temp = tempdir()?;
1006 let workspace = temp.path();
1007 let file_path = workspace.join("notes.txt");
1008 fs::write(&file_path, "remember this")?;
1009
1010 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1011 indexer.init()?;
1012 indexer.index_directory(workspace)?;
1013 assert_eq!(indexer.find_files("notes\\.txt$")?.len(), 1);
1014
1015 fs::remove_file(&file_path)?;
1016 indexer.index_directory(workspace)?;
1017
1018 assert!(indexer.find_files("notes\\.txt$")?.is_empty());
1019 assert!(indexer.all_files().is_empty());
1020
1021 Ok(())
1022 }
1023
1024 #[test]
1025 fn index_file_skips_excluded_paths() -> Result<()> {
1026 let temp = tempdir()?;
1027 let workspace = temp.path();
1028 let index_dir = workspace.join(".vtcode").join("index");
1029 fs::create_dir_all(&index_dir)?;
1030 let generated_index = index_dir.join("index.md");
1031 fs::write(&generated_index, "# generated")?;
1032
1033 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1034 indexer.init()?;
1035 indexer.index_file(&generated_index)?;
1036
1037 assert!(indexer.all_files().is_empty());
1038
1039 Ok(())
1040 }
1041
1042 #[test]
1043 fn index_file_removes_stale_entry_when_file_becomes_unreadable() -> Result<()> {
1044 let temp = tempdir()?;
1045 let workspace = temp.path();
1046 let file_path = workspace.join("notes.txt");
1047 fs::write(&file_path, "remember this")?;
1048
1049 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1050 indexer.init()?;
1051 indexer.index_file(&file_path)?;
1052 assert!(
1053 indexer
1054 .find_files("notes\\.txt$")?
1055 .iter()
1056 .any(|file| file.ends_with("notes.txt"))
1057 );
1058
1059 fs::write(&file_path, [0xFF, 0xFE, 0xFD])?;
1060 indexer.index_file(&file_path)?;
1061
1062 assert!(indexer.find_files("notes\\.txt$")?.is_empty());
1063
1064 let index_content =
1065 fs::read_to_string(workspace.join(".vtcode").join("index").join("index.md"))?;
1066 assert!(!index_content.contains(file_path.to_string_lossy().as_ref()));
1067
1068 Ok(())
1069 }
1070
1071 #[test]
1072 fn index_file_maintains_markdown_snapshot_across_updates() -> Result<()> {
1073 let temp = tempdir()?;
1074 let workspace = temp.path();
1075 let first = workspace.join("first.txt");
1076 let second = workspace.join("second.txt");
1077 fs::write(&first, "one")?;
1078 fs::write(&second, "two")?;
1079
1080 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1081 indexer.init()?;
1082 indexer.index_file(&first)?;
1083 indexer.index_file(&second)?;
1084
1085 let index_dir = workspace.join(".vtcode").join("index");
1086 let files = fs::read_dir(&index_dir)?
1087 .filter_map(|entry| entry.ok())
1088 .map(|entry| entry.file_name().to_string_lossy().into_owned())
1089 .collect::<Vec<_>>();
1090 assert_eq!(files, vec!["index.md".to_string()]);
1091
1092 let index_content = fs::read_to_string(index_dir.join("index.md"))?;
1093 assert!(index_content.contains(first.to_string_lossy().as_ref()));
1094 assert!(index_content.contains(second.to_string_lossy().as_ref()));
1095
1096 Ok(())
1097 }
1098
1099 #[test]
1100 fn index_directory_writes_markdown_snapshot_without_manual_init() -> Result<()> {
1101 let temp = tempdir()?;
1102 let workspace = temp.path();
1103 fs::write(workspace.join("notes.txt"), "remember this")?;
1104
1105 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1106 indexer.index_directory(workspace)?;
1107
1108 let index_content =
1109 fs::read_to_string(workspace.join(".vtcode").join("index").join("index.md"))?;
1110 assert!(index_content.contains(workspace.join("notes.txt").to_string_lossy().as_ref()));
1111
1112 Ok(())
1113 }
1114
1115 #[test]
1116 fn get_file_content_clamps_ranges_without_panicking() -> Result<()> {
1117 let temp = tempdir()?;
1118 let workspace = temp.path();
1119 let file_path = workspace.join("notes.txt");
1120 fs::write(&file_path, "first\nsecond")?;
1121
1122 let indexer = SimpleIndexer::new(workspace.to_path_buf());
1123 let file_path = file_path.to_string_lossy().into_owned();
1124
1125 assert_eq!(indexer.get_file_content(&file_path, Some(5), None)?, "");
1126 assert_eq!(
1127 indexer.get_file_content(&file_path, Some(0), Some(1))?,
1128 "1: first\n"
1129 );
1130 assert_eq!(indexer.get_file_content(&file_path, Some(2), Some(1))?, "");
1131
1132 Ok(())
1133 }
1134
1135 #[test]
1136 fn supports_custom_storage_backends() -> Result<()> {
1137 #[derive(Clone, Default)]
1138 struct MemoryStorage {
1139 records: Arc<Mutex<Vec<FileIndex>>>,
1140 }
1141
1142 impl MemoryStorage {
1143 fn new(records: Arc<Mutex<Vec<FileIndex>>>) -> Self {
1144 Self { records }
1145 }
1146 }
1147
1148 impl IndexStorage for MemoryStorage {
1149 fn init(&self, _index_dir: &Path) -> Result<()> {
1150 Ok(())
1151 }
1152
1153 fn persist(&self, _index_dir: &Path, entry: &FileIndex) -> Result<()> {
1154 let mut guard = self.records.lock().expect("lock poisoned");
1155 guard.push(entry.clone());
1156 Ok(())
1157 }
1158 }
1159
1160 let temp = tempdir()?;
1161 let workspace = temp.path();
1162 fs::write(workspace.join("notes.txt"), "remember this")?;
1163
1164 let records: Arc<Mutex<Vec<FileIndex>>> = Arc::new(Mutex::new(Vec::new()));
1165 let storage = MemoryStorage::new(records.clone());
1166
1167 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1168 let mut indexer = SimpleIndexer::with_config(config).with_storage(Arc::new(storage));
1169 indexer.init()?;
1170 indexer.index_directory(workspace)?;
1171
1172 let entries = records.lock().expect("lock poisoned");
1173 assert_eq!(entries.len(), 1);
1174 assert_eq!(
1175 entries[0].path,
1176 workspace.join("notes.txt").to_string_lossy().into_owned()
1177 );
1178
1179 Ok(())
1180 }
1181
1182 #[test]
1183 fn custom_filters_can_skip_files() -> Result<()> {
1184 #[derive(Default)]
1185 struct SkipRustFilter {
1186 inner: ConfigTraversalFilter,
1187 }
1188
1189 impl TraversalFilter for SkipRustFilter {
1190 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1191 self.inner.should_descend(path, config)
1192 }
1193
1194 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1195 if path
1196 .extension()
1197 .and_then(|ext| ext.to_str())
1198 .is_some_and(|ext| ext.eq_ignore_ascii_case("rs"))
1199 {
1200 return false;
1201 }
1202
1203 self.inner.should_index_file(path, config)
1204 }
1205 }
1206
1207 let temp = tempdir()?;
1208 let workspace = temp.path();
1209 fs::write(workspace.join("lib.rs"), "fn main() {}")?;
1210 fs::write(workspace.join("README.md"), "# Notes")?;
1211
1212 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1213 let mut indexer =
1214 SimpleIndexer::with_config(config).with_filter(Arc::new(SkipRustFilter::default()));
1215 indexer.init()?;
1216 indexer.index_directory(workspace)?;
1217
1218 assert!(indexer.find_files("lib\\.rs$")?.is_empty());
1219 assert!(!indexer.find_files("README\\.md$")?.is_empty());
1220
1221 Ok(())
1222 }
1223
1224 #[test]
1225 fn custom_filters_can_skip_directories() -> Result<()> {
1226 #[derive(Default)]
1227 struct SkipGeneratedFilter {
1228 inner: ConfigTraversalFilter,
1229 }
1230
1231 impl TraversalFilter for SkipGeneratedFilter {
1232 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1233 if path.ends_with("generated") {
1234 return false;
1235 }
1236
1237 self.inner.should_descend(path, config)
1238 }
1239
1240 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1241 self.inner.should_index_file(path, config)
1242 }
1243 }
1244
1245 let temp = tempdir()?;
1246 let workspace = temp.path();
1247 let generated_dir = workspace.join("generated");
1248 fs::create_dir_all(&generated_dir)?;
1249 fs::write(generated_dir.join("skip.txt"), "ignore me")?;
1250 fs::write(workspace.join("README.md"), "# Notes")?;
1251
1252 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1253 let indexer = SimpleIndexer::with_config(config)
1254 .with_filter(Arc::new(SkipGeneratedFilter::default()));
1255 let files = indexer.discover_files(workspace);
1256
1257 assert!(!files.iter().any(|file| file.ends_with("skip.txt")));
1258 assert!(files.iter().any(|file| file.ends_with("README.md")));
1259
1260 Ok(())
1261 }
1262
1263 #[test]
1264 fn indexing_multiple_directories_preserves_existing_cache_entries() -> Result<()> {
1265 let temp = tempdir()?;
1266 let workspace = temp.path();
1267 let src_dir = workspace.join("src");
1268 let docs_dir = workspace.join("docs");
1269 fs::create_dir_all(&src_dir)?;
1270 fs::create_dir_all(&docs_dir)?;
1271 fs::write(src_dir.join("lib.rs"), "fn main() {}")?;
1272 fs::write(docs_dir.join("guide.md"), "# Guide")?;
1273
1274 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1275 indexer.init()?;
1276 indexer.index_directory(&src_dir)?;
1277 indexer.index_directory(&docs_dir)?;
1278
1279 assert!(
1280 indexer
1281 .find_files("lib\\.rs$")?
1282 .iter()
1283 .any(|file| file.ends_with("lib.rs"))
1284 );
1285 assert!(
1286 indexer
1287 .find_files("guide\\.md$")?
1288 .iter()
1289 .any(|file| file.ends_with("guide.md"))
1290 );
1291
1292 let index_content =
1293 fs::read_to_string(workspace.join(".vtcode").join("index").join("index.md"))?;
1294 assert!(index_content.contains(src_dir.join("lib.rs").to_string_lossy().as_ref()));
1295 assert!(index_content.contains(docs_dir.join("guide.md").to_string_lossy().as_ref()));
1296
1297 Ok(())
1298 }
1299
1300 #[test]
1301 fn batch_indexing_writes_single_markdown_file() -> Result<()> {
1302 let temp = tempdir()?;
1303 let workspace = temp.path();
1304 fs::write(workspace.join("lib.rs"), "fn main() {}")?;
1305 fs::write(workspace.join("README.md"), "# Notes")?;
1306
1307 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1308 indexer.init()?;
1309 indexer.index_directory(workspace)?;
1310
1311 let index_dir = workspace.join(".vtcode").join("index");
1312 let files = fs::read_dir(&index_dir)?
1313 .filter_map(|entry| entry.ok())
1314 .map(|entry| entry.file_name().to_string_lossy().into_owned())
1315 .collect::<Vec<_>>();
1316 assert_eq!(files, vec!["index.md".to_string()]);
1317
1318 let index_content = fs::read_to_string(index_dir.join("index.md"))?;
1319 assert!(index_content.contains(workspace.join("lib.rs").to_string_lossy().as_ref()));
1320 assert!(index_content.contains(workspace.join("README.md").to_string_lossy().as_ref()));
1321
1322 Ok(())
1323 }
1324
1325 #[test]
1326 fn batch_indexing_removes_legacy_hashed_entries() -> Result<()> {
1327 let temp = tempdir()?;
1328 let workspace = temp.path();
1329 fs::write(workspace.join("lib.rs"), "fn main() {}")?;
1330
1331 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1332 indexer.init()?;
1333
1334 let legacy_file_name = format!("{}.md", calculate_hash("legacy-path"));
1335 let legacy_file_path = workspace
1336 .join(".vtcode")
1337 .join("index")
1338 .join(&legacy_file_name);
1339 fs::write(&legacy_file_path, "# legacy")?;
1340 assert!(legacy_file_path.exists());
1341
1342 indexer.index_directory(workspace)?;
1343
1344 assert!(!legacy_file_path.exists());
1345 let files = fs::read_dir(workspace.join(".vtcode").join("index"))?
1346 .filter_map(|entry| entry.ok())
1347 .map(|entry| entry.file_name().to_string_lossy().into_owned())
1348 .collect::<Vec<_>>();
1349 assert_eq!(files, vec!["index.md".to_string()]);
1350
1351 Ok(())
1352 }
1353
1354 #[test]
1355 fn snapshot_storage_uses_default_ref_batch_persistence() -> Result<()> {
1356 #[derive(Clone, Default)]
1357 struct SnapshotMemoryStorage {
1358 snapshots: Arc<Mutex<Vec<Vec<FileIndex>>>>,
1359 }
1360
1361 impl SnapshotMemoryStorage {
1362 fn new(snapshots: Arc<Mutex<Vec<Vec<FileIndex>>>>) -> Self {
1363 Self { snapshots }
1364 }
1365 }
1366
1367 impl IndexStorage for SnapshotMemoryStorage {
1368 fn init(&self, _index_dir: &Path) -> Result<()> {
1369 Ok(())
1370 }
1371
1372 fn persist(&self, _index_dir: &Path, _entry: &FileIndex) -> Result<()> {
1373 Ok(())
1374 }
1375
1376 fn prefers_snapshot_persistence(&self) -> bool {
1377 true
1378 }
1379
1380 fn persist_batch(&self, _index_dir: &Path, entries: &[FileIndex]) -> Result<()> {
1381 self.snapshots
1382 .lock()
1383 .expect("lock poisoned")
1384 .push(entries.to_vec());
1385 Ok(())
1386 }
1387 }
1388
1389 let temp = tempdir()?;
1390 let workspace = temp.path();
1391 let file_path = workspace.join("notes.txt");
1392 fs::write(&file_path, "remember this")?;
1393
1394 let snapshots = Arc::new(Mutex::new(Vec::new()));
1395 let storage = SnapshotMemoryStorage::new(snapshots.clone());
1396
1397 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1398 let mut indexer = SimpleIndexer::with_config(config).with_storage(Arc::new(storage));
1399 indexer.index_file(&file_path)?;
1400
1401 let snapshots = snapshots.lock().expect("lock poisoned");
1402 assert_eq!(snapshots.len(), 1);
1403 assert_eq!(snapshots[0].len(), 1);
1404 assert_eq!(
1405 snapshots[0][0].path,
1406 workspace.join("notes.txt").to_string_lossy().into_owned()
1407 );
1408
1409 Ok(())
1410 }
1411
1412 #[test]
1413 fn snapshot_index_file_rolls_back_cache_when_persist_fails() -> Result<()> {
1414 #[derive(Clone, Default)]
1415 struct FlakySnapshotStorage {
1416 persist_count: Arc<Mutex<usize>>,
1417 }
1418
1419 impl IndexStorage for FlakySnapshotStorage {
1420 fn init(&self, _index_dir: &Path) -> Result<()> {
1421 Ok(())
1422 }
1423
1424 fn persist(&self, _index_dir: &Path, _entry: &FileIndex) -> Result<()> {
1425 Ok(())
1426 }
1427
1428 fn prefers_snapshot_persistence(&self) -> bool {
1429 true
1430 }
1431
1432 fn persist_batch(&self, _index_dir: &Path, _entries: &[FileIndex]) -> Result<()> {
1433 let mut count = self.persist_count.lock().expect("lock poisoned");
1434 *count += 1;
1435 if *count == 2 {
1436 anyhow::bail!("simulated snapshot persistence failure");
1437 }
1438 Ok(())
1439 }
1440 }
1441
1442 let temp = tempdir()?;
1443 let workspace = temp.path();
1444 let first = workspace.join("first.txt");
1445 let second = workspace.join("second.txt");
1446 fs::write(&first, "one")?;
1447 fs::write(&second, "two")?;
1448
1449 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1450 let storage = Arc::new(FlakySnapshotStorage::default());
1451 let mut indexer = SimpleIndexer::with_config(config).with_storage(storage);
1452
1453 indexer.index_file(&first)?;
1454 assert!(
1455 indexer
1456 .find_files("first\\.txt$")?
1457 .iter()
1458 .any(|path| path.ends_with("first.txt"))
1459 );
1460
1461 let err = indexer
1462 .index_file(&second)
1463 .expect_err("second persist should fail");
1464 assert!(
1465 err.to_string()
1466 .contains("simulated snapshot persistence failure")
1467 );
1468 assert!(
1469 indexer
1470 .find_files("first\\.txt$")?
1471 .iter()
1472 .any(|path| path.ends_with("first.txt"))
1473 );
1474 assert!(indexer.find_files("second\\.txt$")?.is_empty());
1475
1476 Ok(())
1477 }
1478}