1use anyhow::Result;
9use hashbrown::HashMap;
10use ignore::{DirEntry, Walk, WalkBuilder};
11use regex::Regex;
12use serde::{Deserialize, Serialize};
13use std::fmt::Write as FmtWrite;
14use std::fs;
15use std::io::{BufWriter, ErrorKind, Write};
16use std::path::{Path, PathBuf};
17use std::sync::Arc;
18use std::time::SystemTime;
19
20pub trait IndexStorage: Send + Sync {
22 fn init(&self, index_dir: &Path) -> Result<()>;
24
25 fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()>;
27
28 fn prefers_snapshot_persistence(&self) -> bool {
34 false
35 }
36
37 fn remove(&self, _index_dir: &Path, _file_path: &Path) -> Result<()> {
41 Ok(())
42 }
43
44 fn persist_batch(&self, index_dir: &Path, entries: &[FileIndex]) -> Result<()> {
49 for entry in entries {
50 self.persist(index_dir, entry)?;
51 }
52 Ok(())
53 }
54
55 fn persist_batch_refs(&self, index_dir: &Path, entries: &[&FileIndex]) -> Result<()> {
61 let owned = entries
62 .iter()
63 .map(|entry| (*entry).clone())
64 .collect::<Vec<_>>();
65 self.persist_batch(index_dir, &owned)
66 }
67}
68
69pub trait TraversalFilter: Send + Sync {
71 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
73
74 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
76}
77
78#[derive(Debug, Default, Clone)]
80pub struct MarkdownIndexStorage;
81
82impl IndexStorage for MarkdownIndexStorage {
83 fn init(&self, index_dir: &Path) -> Result<()> {
84 fs::create_dir_all(index_dir)?;
85 Ok(())
86 }
87
88 fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()> {
89 fs::create_dir_all(index_dir)?;
90 let file_name = format!("{}.md", calculate_hash(&entry.path));
91 let index_path = index_dir.join(file_name);
92 let file = fs::File::create(index_path)?;
93 let mut writer = BufWriter::new(file);
94 writeln!(writer, "# File Index: {}", entry.path)?;
95 writeln!(writer)?;
96 write_markdown_fields(&mut writer, entry)?;
97 writer.flush()?;
98 Ok(())
99 }
100
101 fn prefers_snapshot_persistence(&self) -> bool {
102 true
103 }
104
105 fn remove(&self, index_dir: &Path, file_path: &Path) -> Result<()> {
106 let file_name = format!(
107 "{}.md",
108 calculate_hash(file_path.to_string_lossy().as_ref())
109 );
110 let index_path = index_dir.join(file_name);
111 match fs::remove_file(index_path) {
112 Ok(()) => Ok(()),
113 Err(err) if err.kind() == ErrorKind::NotFound => Ok(()),
114 Err(err) => Err(err.into()),
115 }
116 }
117
118 fn persist_batch(&self, index_dir: &Path, entries: &[FileIndex]) -> Result<()> {
119 persist_markdown_snapshot(index_dir, entries.iter())
120 }
121
122 fn persist_batch_refs(&self, index_dir: &Path, entries: &[&FileIndex]) -> Result<()> {
123 persist_markdown_snapshot(index_dir, entries.iter().copied())
124 }
125}
126
127fn persist_markdown_snapshot<'a>(
128 index_dir: &Path,
129 entries: impl IntoIterator<Item = &'a FileIndex>,
130) -> Result<()> {
131 let entries = entries.into_iter().collect::<Vec<_>>();
132
133 fs::create_dir_all(index_dir)?;
134 let temp_path = index_dir.join(".index.md.tmp");
135 let final_path = index_dir.join("index.md");
136 let file = fs::File::create(&temp_path)?;
137 let mut writer = BufWriter::new(file);
138
139 writeln!(writer, "# Workspace File Index")?;
140 writeln!(writer)?;
141 writeln!(writer, "- **Entries**: {}", entries.len())?;
142 writeln!(writer)?;
143
144 for entry in entries {
145 write_markdown_entry(&mut writer, entry)?;
146 }
147
148 writer.flush()?;
149 fs::rename(temp_path, final_path)?;
150 cleanup_legacy_markdown_entries(index_dir)?;
151 Ok(())
152}
153
154#[derive(Debug, Default, Clone)]
156pub struct ConfigTraversalFilter;
157
158impl TraversalFilter for ConfigTraversalFilter {
159 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
160 !should_skip_dir(path, config)
161 }
162
163 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
164 if !path.is_file() {
165 return false;
166 }
167
168 if config.ignore_hidden
170 && path
171 .file_name()
172 .and_then(|n| n.to_str())
173 .is_some_and(|s| s.starts_with('.'))
174 {
175 return false;
176 }
177
178 if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
180 if vtcode_commons::exclusions::is_sensitive_file(file_name)
181 || file_name == ".gitignore"
182 || file_name == ".git"
183 {
184 return false;
185 }
186 }
187
188 true
189 }
190}
191
192#[derive(Clone, Debug)]
194pub struct SimpleIndexerConfig {
195 workspace_root: PathBuf,
196 index_dir: PathBuf,
197 ignore_hidden: bool,
198 excluded_dirs: Vec<PathBuf>,
199 allowed_dirs: Vec<PathBuf>,
200}
201
202impl SimpleIndexerConfig {
203 pub fn new(workspace_root: PathBuf) -> Self {
205 let index_dir = workspace_root.join(".vtcode").join("index");
206 let vtcode_dir = workspace_root.join(".vtcode");
207 let external_dir = vtcode_dir.join("external");
208
209 let mut excluded_dirs: Vec<PathBuf> = vtcode_commons::exclusions::DEFAULT_EXCLUDED_DIRS
210 .iter()
211 .map(|name| workspace_root.join(name))
212 .collect();
213 excluded_dirs.push(index_dir.clone());
214 excluded_dirs.push(vtcode_dir);
215
216 excluded_dirs.dedup();
217
218 Self {
219 workspace_root,
220 index_dir,
221 ignore_hidden: true,
222 excluded_dirs,
223 allowed_dirs: vec![external_dir],
224 }
225 }
226
227 pub fn with_index_dir(mut self, index_dir: impl Into<PathBuf>) -> Self {
229 let index_dir = index_dir.into();
230 self.index_dir = index_dir.clone();
231 self.push_unique_excluded(index_dir);
232 self
233 }
234
235 pub fn add_allowed_dir(mut self, path: impl Into<PathBuf>) -> Self {
237 let path = path.into();
238 if !self.allowed_dirs.iter().any(|existing| existing == &path) {
239 self.allowed_dirs.push(path);
240 }
241 self
242 }
243
244 pub fn add_excluded_dir(mut self, path: impl Into<PathBuf>) -> Self {
246 let path = path.into();
247 self.push_unique_excluded(path);
248 self
249 }
250
251 pub fn ignore_hidden(mut self, ignore_hidden: bool) -> Self {
253 self.ignore_hidden = ignore_hidden;
254 self
255 }
256
257 pub fn workspace_root(&self) -> &Path {
259 &self.workspace_root
260 }
261
262 pub fn index_dir(&self) -> &Path {
264 &self.index_dir
265 }
266
267 fn push_unique_excluded(&mut self, path: PathBuf) {
268 if !self.excluded_dirs.iter().any(|existing| existing == &path) {
269 self.excluded_dirs.push(path);
270 }
271 }
272}
273
274#[derive(Debug, Clone, Serialize, Deserialize)]
276pub struct FileIndex {
277 pub path: String,
279 pub hash: String,
281 pub modified: u64,
283 pub size: u64,
285 pub language: String,
287 pub tags: Vec<String>,
289}
290
291#[derive(Debug, Clone, Serialize, Deserialize)]
293pub struct SearchResult {
294 pub file_path: String,
295 pub line_number: usize,
296 pub line_content: String,
297 pub matches: Vec<String>,
298}
299
300pub struct SimpleIndexer {
302 config: SimpleIndexerConfig,
303 index_cache: HashMap<String, FileIndex>,
304 storage: Arc<dyn IndexStorage>,
305 filter: Arc<dyn TraversalFilter>,
306}
307
308impl SimpleIndexer {
309 pub fn new(workspace_root: PathBuf) -> Self {
311 Self::with_components(
312 SimpleIndexerConfig::new(workspace_root),
313 Arc::new(MarkdownIndexStorage),
314 Arc::new(ConfigTraversalFilter),
315 )
316 }
317
318 pub fn with_config(config: SimpleIndexerConfig) -> Self {
320 Self::with_components(
321 config,
322 Arc::new(MarkdownIndexStorage),
323 Arc::new(ConfigTraversalFilter),
324 )
325 }
326
327 pub fn with_index_dir(workspace_root: PathBuf, index_dir: PathBuf) -> Self {
329 let config = SimpleIndexerConfig::new(workspace_root).with_index_dir(index_dir);
330 Self::with_config(config)
331 }
332
333 pub fn with_components(
335 config: SimpleIndexerConfig,
336 storage: Arc<dyn IndexStorage>,
337 filter: Arc<dyn TraversalFilter>,
338 ) -> Self {
339 Self {
340 config,
341 index_cache: HashMap::new(),
342 storage,
343 filter,
344 }
345 }
346
347 pub fn with_storage(self, storage: Arc<dyn IndexStorage>) -> Self {
349 Self { storage, ..self }
350 }
351
352 pub fn with_filter(self, filter: Arc<dyn TraversalFilter>) -> Self {
354 Self { filter, ..self }
355 }
356
357 pub fn init(&self) -> Result<()> {
359 self.storage.init(self.config.index_dir())
360 }
361
362 pub fn workspace_root(&self) -> &Path {
364 self.config.workspace_root()
365 }
366
367 pub fn index_dir(&self) -> &Path {
369 self.config.index_dir()
370 }
371
372 pub fn index_file(&mut self, file_path: &Path) -> Result<()> {
374 let cache_key = file_path.to_string_lossy().into_owned();
375
376 if self.storage.prefers_snapshot_persistence() {
377 let next_entry = if file_path.exists() && self.should_process_file_path(file_path) {
378 self.build_file_index(file_path)?
379 } else {
380 None
381 };
382
383 self.apply_snapshot_file_update(cache_key, next_entry)?;
384 return Ok(());
385 }
386
387 if !file_path.exists() || !self.should_process_file_path(file_path) {
388 self.index_cache.remove(cache_key.as_str());
389 self.storage.remove(self.config.index_dir(), file_path)?;
390 return Ok(());
391 }
392
393 if let Some(index) = self.build_file_index(file_path)? {
394 self.storage.persist(self.config.index_dir(), &index)?;
395 self.index_cache.insert(index.path.clone(), index);
396 } else {
397 self.index_cache.remove(cache_key.as_str());
398 self.storage.remove(self.config.index_dir(), file_path)?;
399 }
400
401 Ok(())
402 }
403
404 pub fn index_directory(&mut self, dir_path: &Path) -> Result<()> {
408 let walker = self.build_walker(dir_path);
409
410 let mut entries = Vec::new();
411
412 for entry in walker.filter_map(|e| e.ok()) {
413 let path = entry.path();
414
415 if entry.file_type().is_some_and(|ft| ft.is_file())
417 && let Some(index) = self.build_file_index(path)?
418 {
419 entries.push(index);
420 }
421 }
422
423 if self.storage.prefers_snapshot_persistence() {
424 self.apply_snapshot_directory_update(dir_path, &entries)?;
425 } else {
426 entries.sort_unstable_by(|left, right| left.path.cmp(&right.path));
427 self.storage
428 .persist_batch(self.config.index_dir(), &entries)?;
429 }
430
431 self.replace_cached_entries(dir_path, &entries);
432
433 Ok(())
434 }
435
436 pub fn discover_files(&self, dir_path: &Path) -> Vec<String> {
439 let walker = self.build_walker(dir_path);
440
441 let mut files = walker
442 .filter_map(|e| e.ok())
443 .filter(|e| {
444 if !e.file_type().is_some_and(|ft| ft.is_file()) {
445 return false;
446 }
447
448 self.should_process_file_path(e.path())
449 })
450 .map(|e| e.path().to_string_lossy().into_owned())
451 .collect::<Vec<_>>();
452 files.sort_unstable();
453 files
454 }
455
456 fn search_files_internal(
459 &self,
460 regex: &Regex,
461 path_filter: Option<&str>,
462 extract_matches: bool,
463 ) -> Vec<SearchResult> {
464 let mut results = Vec::with_capacity(self.index_cache.len());
465
466 for file_path in self.index_cache.keys() {
467 if path_filter.is_some_and(|filter| !file_path.contains(filter)) {
468 continue;
469 }
470
471 if let Ok(content) = fs::read_to_string(file_path) {
472 for (line_num, line) in content.lines().enumerate() {
473 if regex.is_match(line) {
474 let matches = if extract_matches {
475 regex
476 .find_iter(line)
477 .map(|m| m.as_str().to_string())
478 .collect()
479 } else {
480 vec![line.to_string()]
481 };
482
483 results.push(SearchResult {
484 file_path: file_path.clone(),
485 line_number: line_num + 1,
486 line_content: line.to_string(),
487 matches,
488 });
489 }
490 }
491 }
492 }
493
494 results.sort_unstable_by(|left, right| {
495 left.file_path
496 .cmp(&right.file_path)
497 .then_with(|| left.line_number.cmp(&right.line_number))
498 });
499 results
500 }
501
502 pub fn search(&self, pattern: &str, path_filter: Option<&str>) -> Result<Vec<SearchResult>> {
504 let regex = Regex::new(pattern)?;
505 Ok(self.search_files_internal(®ex, path_filter, true))
506 }
507
508 pub fn find_files(&self, pattern: &str) -> Result<Vec<String>> {
510 let regex = Regex::new(pattern)?;
511 let mut results = Vec::with_capacity(self.index_cache.len());
512
513 for file_path in self.index_cache.keys() {
514 if regex.is_match(file_path) {
515 results.push(file_path.clone());
516 }
517 }
518
519 results.sort_unstable();
520 Ok(results)
521 }
522
523 pub fn all_files(&self) -> Vec<String> {
526 let mut files = self.index_cache.keys().cloned().collect::<Vec<_>>();
527 files.sort_unstable();
528 files
529 }
530
531 pub fn get_file_content(
533 &self,
534 file_path: &str,
535 start_line: Option<usize>,
536 end_line: Option<usize>,
537 ) -> Result<String> {
538 let content = fs::read_to_string(file_path)?;
539 let start = start_line.unwrap_or(1).max(1);
540 let end = end_line.unwrap_or(usize::MAX);
541
542 if start > end {
543 return Ok(String::new());
544 }
545
546 let mut result = String::new();
547 for (line_number, line) in content.lines().enumerate() {
548 let line_number = line_number + 1;
549 if line_number < start {
550 continue;
551 }
552 if line_number > end {
553 break;
554 }
555 writeln!(&mut result, "{line_number}: {line}")?;
556 }
557
558 Ok(result)
559 }
560
561 pub fn list_files(&self, dir_path: &str, show_hidden: bool) -> Result<Vec<String>> {
563 let path = Path::new(dir_path);
564 if !path.exists() {
565 return Ok(vec![]);
566 }
567
568 let mut files = Vec::new();
569
570 for entry in fs::read_dir(path)? {
571 let entry = entry?;
572 let file_name = entry.file_name().to_string_lossy().into_owned();
573
574 if !show_hidden && file_name.starts_with('.') {
575 continue;
576 }
577
578 files.push(file_name);
579 }
580
581 files.sort_unstable();
582 Ok(files)
583 }
584
585 pub fn grep(&self, pattern: &str, file_pattern: Option<&str>) -> Result<Vec<SearchResult>> {
587 let regex = Regex::new(pattern)?;
588 Ok(self.search_files_internal(®ex, file_pattern, false))
589 }
590
591 fn is_allowed_path(&self, path: &Path) -> bool {
592 self.config
593 .allowed_dirs
594 .iter()
595 .any(|allowed| path.starts_with(allowed))
596 }
597
598 #[inline]
599 fn get_modified_time(&self, file_path: &Path) -> Result<u64> {
600 let metadata = fs::metadata(file_path)?;
601 let modified = metadata.modified()?;
602 Ok(modified.duration_since(SystemTime::UNIX_EPOCH)?.as_secs())
603 }
604
605 #[inline]
606 fn detect_language(&self, file_path: &Path) -> String {
607 file_path
608 .extension()
609 .and_then(|ext| ext.to_str())
610 .unwrap_or("unknown")
611 .to_string()
612 }
613
614 fn build_file_index(&self, file_path: &Path) -> Result<Option<FileIndex>> {
615 if !self.should_process_file_path(file_path) {
616 return Ok(None);
617 }
618
619 let content = match fs::read_to_string(file_path) {
620 Ok(text) => text,
621 Err(err) => {
622 if err.kind() == ErrorKind::InvalidData {
623 return Ok(None);
624 }
625 return Err(err.into());
626 }
627 };
628
629 let index = FileIndex {
630 path: file_path.to_string_lossy().into_owned(),
631 hash: calculate_hash(&content),
632 modified: self.get_modified_time(file_path)?,
633 size: content.len() as u64,
634 language: self.detect_language(file_path),
635 tags: vec![],
636 };
637
638 Ok(Some(index))
639 }
640
641 #[inline]
642 fn is_excluded_path(&self, path: &Path) -> bool {
643 self.config
644 .excluded_dirs
645 .iter()
646 .any(|excluded| path.starts_with(excluded))
647 }
648
649 #[inline]
650 fn should_index_file_path(&self, path: &Path) -> bool {
651 self.filter.should_index_file(path, &self.config)
652 }
653
654 #[inline]
655 fn should_process_file_path(&self, path: &Path) -> bool {
656 if self.is_allowed_path(path) {
657 return self.should_index_file_path(path);
658 }
659
660 !self.is_excluded_path(path) && self.should_index_file_path(path)
661 }
662
663 fn build_walker(&self, dir_path: &Path) -> Walk {
664 let walk_root = dir_path.to_path_buf();
665 let config = self.config.clone();
666 let filter = Arc::clone(&self.filter);
667
668 let mut builder = WalkBuilder::new(dir_path);
669 builder
670 .hidden(false)
671 .git_ignore(true)
672 .git_global(true)
673 .git_exclude(true)
674 .ignore(true)
675 .parents(true);
676 builder.filter_entry(move |entry| {
677 should_visit_entry(entry, walk_root.as_path(), &config, filter.as_ref())
678 });
679 builder.build()
680 }
681
682 fn replace_cached_entries(&mut self, dir_path: &Path, entries: &[FileIndex]) {
683 self.index_cache
684 .retain(|path, _| !Path::new(path).starts_with(dir_path));
685
686 self.index_cache.extend(
687 entries
688 .iter()
689 .cloned()
690 .map(|entry| (entry.path.clone(), entry)),
691 );
692 }
693
694 fn apply_snapshot_file_update(
695 &mut self,
696 cache_key: String,
697 next_entry: Option<FileIndex>,
698 ) -> Result<()> {
699 let previous_entry = match next_entry {
700 Some(entry) => self.index_cache.insert(cache_key.clone(), entry),
701 None => self.index_cache.remove(cache_key.as_str()),
702 };
703
704 if let Err(err) = self.persist_current_snapshot() {
705 match previous_entry {
706 Some(entry) => {
707 self.index_cache.insert(cache_key, entry);
708 }
709 None => {
710 self.index_cache.remove(cache_key.as_str());
711 }
712 }
713 return Err(err);
714 }
715
716 Ok(())
717 }
718
719 fn apply_snapshot_directory_update(
720 &mut self,
721 dir_path: &Path,
722 entries: &[FileIndex],
723 ) -> Result<()> {
724 let previous_entries = self.take_cached_entries(dir_path);
725 self.index_cache.extend(
726 entries
727 .iter()
728 .cloned()
729 .map(|entry| (entry.path.clone(), entry)),
730 );
731
732 if let Err(err) = self.persist_current_snapshot() {
733 self.index_cache
734 .retain(|path, _| !Path::new(path).starts_with(dir_path));
735 self.index_cache.extend(
736 previous_entries
737 .into_iter()
738 .map(|entry| (entry.path.clone(), entry)),
739 );
740 return Err(err);
741 }
742
743 Ok(())
744 }
745
746 fn take_cached_entries(&mut self, dir_path: &Path) -> Vec<FileIndex> {
747 let keys = self
748 .index_cache
749 .keys()
750 .filter(|path| Path::new(path).starts_with(dir_path))
751 .cloned()
752 .collect::<Vec<_>>();
753
754 keys.into_iter()
755 .filter_map(|path| self.index_cache.remove(path.as_str()))
756 .collect()
757 }
758
759 fn persist_current_snapshot(&self) -> Result<()> {
760 let mut snapshot = self.index_cache.values().collect::<Vec<_>>();
761 snapshot.sort_unstable_by(|left, right| left.path.cmp(&right.path));
762 self.storage
763 .persist_batch_refs(self.config.index_dir(), &snapshot)
764 }
765}
766
767impl Clone for SimpleIndexer {
768 fn clone(&self) -> Self {
769 Self {
770 config: self.config.clone(),
771 index_cache: self.index_cache.clone(),
772 storage: self.storage.clone(),
773 filter: self.filter.clone(),
774 }
775 }
776}
777
778fn should_skip_dir(path: &Path, config: &SimpleIndexerConfig) -> bool {
779 if is_allowed_path_or_ancestor(path, config) {
780 return false;
781 }
782
783 if config
784 .excluded_dirs
785 .iter()
786 .any(|excluded| path.starts_with(excluded))
787 {
788 return true;
789 }
790
791 if config.ignore_hidden
792 && path
793 .file_name()
794 .and_then(|name| name.to_str())
795 .is_some_and(|name_str| name_str.starts_with('.'))
796 {
797 return true;
798 }
799
800 false
801}
802
803fn is_allowed_path_or_ancestor(path: &Path, config: &SimpleIndexerConfig) -> bool {
804 config
805 .allowed_dirs
806 .iter()
807 .any(|allowed| path.starts_with(allowed) || allowed.starts_with(path))
808}
809
810fn should_visit_entry(
811 entry: &DirEntry,
812 walk_root: &Path,
813 config: &SimpleIndexerConfig,
814 filter: &dyn TraversalFilter,
815) -> bool {
816 if entry.path() == walk_root {
817 return true;
818 }
819
820 if !entry
821 .file_type()
822 .is_some_and(|file_type| file_type.is_dir())
823 {
824 return true;
825 }
826
827 filter.should_descend(entry.path(), config)
828}
829
830#[inline]
831fn calculate_hash(content: &str) -> String {
832 vtcode_commons::utils::calculate_sha256(content.as_bytes())
833}
834
835fn write_markdown_entry(writer: &mut impl Write, entry: &FileIndex) -> std::io::Result<()> {
836 writeln!(writer, "## {}", entry.path)?;
837 writeln!(writer)?;
838 write_markdown_fields(writer, entry)?;
839 writeln!(writer)?;
840 Ok(())
841}
842
843fn write_markdown_fields(writer: &mut impl Write, entry: &FileIndex) -> std::io::Result<()> {
844 writeln!(writer, "- **Path**: {}", entry.path)?;
845 writeln!(writer, "- **Hash**: {}", entry.hash)?;
846 writeln!(writer, "- **Modified**: {}", entry.modified)?;
847 writeln!(writer, "- **Size**: {} bytes", entry.size)?;
848 writeln!(writer, "- **Language**: {}", entry.language)?;
849 writeln!(writer, "- **Tags**: {}", entry.tags.join(", "))?;
850 Ok(())
851}
852
853fn cleanup_legacy_markdown_entries(index_dir: &Path) -> Result<()> {
854 for entry in fs::read_dir(index_dir)? {
855 let entry = entry?;
856 let file_name = entry.file_name();
857 let file_name = file_name.to_string_lossy();
858 if is_legacy_markdown_entry_name(file_name.as_ref()) {
859 fs::remove_file(entry.path())?;
860 }
861 }
862 Ok(())
863}
864
865#[inline]
866fn is_legacy_markdown_entry_name(file_name: &str) -> bool {
867 let Some(hash_part) = file_name.strip_suffix(".md") else {
868 return false;
869 };
870 hash_part.len() == 64 && hash_part.bytes().all(|byte| byte.is_ascii_hexdigit())
871}
872
873#[cfg(test)]
874mod tests {
875 use super::*;
876 use std::fs;
877 use std::sync::{Arc, Mutex};
878 use tempfile::tempdir;
879
880 #[test]
881 fn skips_hidden_directories_by_default() -> Result<()> {
882 let temp = tempdir()?;
883 let workspace = temp.path();
884 let hidden_dir = workspace.join(".private");
885 fs::create_dir_all(&hidden_dir)?;
886 fs::write(hidden_dir.join("secret.txt"), "classified")?;
887
888 let visible_dir = workspace.join("src");
889 fs::create_dir_all(&visible_dir)?;
890 fs::write(visible_dir.join("lib.rs"), "fn main() {}")?;
891
892 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
893 indexer.init()?;
894 indexer.index_directory(workspace)?;
895
896 assert!(indexer.find_files("secret\\.txt$")?.is_empty());
897 assert!(!indexer.find_files("lib\\.rs$")?.is_empty());
898
899 Ok(())
900 }
901
902 #[test]
903 fn can_include_hidden_directories_when_configured() -> Result<()> {
904 let temp = tempdir()?;
905 let workspace = temp.path();
906 let hidden_dir = workspace.join(".cache");
907 fs::create_dir_all(&hidden_dir)?;
908 fs::write(hidden_dir.join("data.log"), "details")?;
909
910 let config = SimpleIndexerConfig::new(workspace.to_path_buf()).ignore_hidden(false);
911 let mut indexer = SimpleIndexer::with_config(config);
912 indexer.init()?;
913 indexer.index_directory(workspace)?;
914
915 let results = indexer.find_files("data\\.log$")?;
916 assert_eq!(results.len(), 1);
917
918 Ok(())
919 }
920
921 #[test]
922 fn indexes_allowed_directories_inside_hidden_excluded_parents() -> Result<()> {
923 let temp = tempdir()?;
924 let workspace = temp.path();
925 let allowed_dir = workspace.join(".vtcode").join("external");
926 fs::create_dir_all(&allowed_dir)?;
927 fs::write(allowed_dir.join("plugin.toml"), "name = 'demo'")?;
928
929 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
930 indexer.init()?;
931 indexer.index_directory(workspace)?;
932
933 let results = indexer.find_files("plugin\\.toml$")?;
934 assert_eq!(results.len(), 1);
935
936 Ok(())
937 }
938
939 #[test]
940 fn reindexing_prunes_deleted_files_from_cache() -> Result<()> {
941 let temp = tempdir()?;
942 let workspace = temp.path();
943 let file_path = workspace.join("notes.txt");
944 fs::write(&file_path, "remember this")?;
945
946 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
947 indexer.init()?;
948 indexer.index_directory(workspace)?;
949 assert_eq!(indexer.find_files("notes\\.txt$")?.len(), 1);
950
951 fs::remove_file(&file_path)?;
952 indexer.index_directory(workspace)?;
953
954 assert!(indexer.find_files("notes\\.txt$")?.is_empty());
955 assert!(indexer.all_files().is_empty());
956
957 Ok(())
958 }
959
960 #[test]
961 fn index_file_skips_excluded_paths() -> Result<()> {
962 let temp = tempdir()?;
963 let workspace = temp.path();
964 let index_dir = workspace.join(".vtcode").join("index");
965 fs::create_dir_all(&index_dir)?;
966 let generated_index = index_dir.join("index.md");
967 fs::write(&generated_index, "# generated")?;
968
969 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
970 indexer.init()?;
971 indexer.index_file(&generated_index)?;
972
973 assert!(indexer.all_files().is_empty());
974
975 Ok(())
976 }
977
978 #[test]
979 fn index_file_removes_stale_entry_when_file_becomes_unreadable() -> Result<()> {
980 let temp = tempdir()?;
981 let workspace = temp.path();
982 let file_path = workspace.join("notes.txt");
983 fs::write(&file_path, "remember this")?;
984
985 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
986 indexer.init()?;
987 indexer.index_file(&file_path)?;
988 assert!(
989 indexer
990 .find_files("notes\\.txt$")?
991 .iter()
992 .any(|file| file.ends_with("notes.txt"))
993 );
994
995 fs::write(&file_path, [0xFF, 0xFE, 0xFD])?;
996 indexer.index_file(&file_path)?;
997
998 assert!(indexer.find_files("notes\\.txt$")?.is_empty());
999
1000 let index_content =
1001 fs::read_to_string(workspace.join(".vtcode").join("index").join("index.md"))?;
1002 assert!(!index_content.contains(file_path.to_string_lossy().as_ref()));
1003
1004 Ok(())
1005 }
1006
1007 #[test]
1008 fn index_file_maintains_markdown_snapshot_across_updates() -> Result<()> {
1009 let temp = tempdir()?;
1010 let workspace = temp.path();
1011 let first = workspace.join("first.txt");
1012 let second = workspace.join("second.txt");
1013 fs::write(&first, "one")?;
1014 fs::write(&second, "two")?;
1015
1016 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1017 indexer.init()?;
1018 indexer.index_file(&first)?;
1019 indexer.index_file(&second)?;
1020
1021 let index_dir = workspace.join(".vtcode").join("index");
1022 let files = fs::read_dir(&index_dir)?
1023 .filter_map(|entry| entry.ok())
1024 .map(|entry| entry.file_name().to_string_lossy().into_owned())
1025 .collect::<Vec<_>>();
1026 assert_eq!(files, vec!["index.md".to_string()]);
1027
1028 let index_content = fs::read_to_string(index_dir.join("index.md"))?;
1029 assert!(index_content.contains(first.to_string_lossy().as_ref()));
1030 assert!(index_content.contains(second.to_string_lossy().as_ref()));
1031
1032 Ok(())
1033 }
1034
1035 #[test]
1036 fn index_directory_writes_markdown_snapshot_without_manual_init() -> Result<()> {
1037 let temp = tempdir()?;
1038 let workspace = temp.path();
1039 fs::write(workspace.join("notes.txt"), "remember this")?;
1040
1041 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1042 indexer.index_directory(workspace)?;
1043
1044 let index_content =
1045 fs::read_to_string(workspace.join(".vtcode").join("index").join("index.md"))?;
1046 assert!(index_content.contains(workspace.join("notes.txt").to_string_lossy().as_ref()));
1047
1048 Ok(())
1049 }
1050
1051 #[test]
1052 fn get_file_content_clamps_ranges_without_panicking() -> Result<()> {
1053 let temp = tempdir()?;
1054 let workspace = temp.path();
1055 let file_path = workspace.join("notes.txt");
1056 fs::write(&file_path, "first\nsecond")?;
1057
1058 let indexer = SimpleIndexer::new(workspace.to_path_buf());
1059 let file_path = file_path.to_string_lossy().into_owned();
1060
1061 assert_eq!(indexer.get_file_content(&file_path, Some(5), None)?, "");
1062 assert_eq!(
1063 indexer.get_file_content(&file_path, Some(0), Some(1))?,
1064 "1: first\n"
1065 );
1066 assert_eq!(indexer.get_file_content(&file_path, Some(2), Some(1))?, "");
1067
1068 Ok(())
1069 }
1070
1071 #[test]
1072 fn supports_custom_storage_backends() -> Result<()> {
1073 #[derive(Clone, Default)]
1074 struct MemoryStorage {
1075 records: Arc<Mutex<Vec<FileIndex>>>,
1076 }
1077
1078 impl MemoryStorage {
1079 fn new(records: Arc<Mutex<Vec<FileIndex>>>) -> Self {
1080 Self { records }
1081 }
1082 }
1083
1084 impl IndexStorage for MemoryStorage {
1085 fn init(&self, _index_dir: &Path) -> Result<()> {
1086 Ok(())
1087 }
1088
1089 fn persist(&self, _index_dir: &Path, entry: &FileIndex) -> Result<()> {
1090 let mut guard = self.records.lock().expect("lock poisoned");
1091 guard.push(entry.clone());
1092 Ok(())
1093 }
1094 }
1095
1096 let temp = tempdir()?;
1097 let workspace = temp.path();
1098 fs::write(workspace.join("notes.txt"), "remember this")?;
1099
1100 let records: Arc<Mutex<Vec<FileIndex>>> = Arc::new(Mutex::new(Vec::new()));
1101 let storage = MemoryStorage::new(records.clone());
1102
1103 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1104 let mut indexer = SimpleIndexer::with_config(config).with_storage(Arc::new(storage));
1105 indexer.init()?;
1106 indexer.index_directory(workspace)?;
1107
1108 let entries = records.lock().expect("lock poisoned");
1109 assert_eq!(entries.len(), 1);
1110 assert_eq!(
1111 entries[0].path,
1112 workspace.join("notes.txt").to_string_lossy().into_owned()
1113 );
1114
1115 Ok(())
1116 }
1117
1118 #[test]
1119 fn custom_filters_can_skip_files() -> Result<()> {
1120 #[derive(Default)]
1121 struct SkipRustFilter {
1122 inner: ConfigTraversalFilter,
1123 }
1124
1125 impl TraversalFilter for SkipRustFilter {
1126 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1127 self.inner.should_descend(path, config)
1128 }
1129
1130 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1131 if path
1132 .extension()
1133 .and_then(|ext| ext.to_str())
1134 .is_some_and(|ext| ext.eq_ignore_ascii_case("rs"))
1135 {
1136 return false;
1137 }
1138
1139 self.inner.should_index_file(path, config)
1140 }
1141 }
1142
1143 let temp = tempdir()?;
1144 let workspace = temp.path();
1145 fs::write(workspace.join("lib.rs"), "fn main() {}")?;
1146 fs::write(workspace.join("README.md"), "# Notes")?;
1147
1148 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1149 let mut indexer =
1150 SimpleIndexer::with_config(config).with_filter(Arc::new(SkipRustFilter::default()));
1151 indexer.init()?;
1152 indexer.index_directory(workspace)?;
1153
1154 assert!(indexer.find_files("lib\\.rs$")?.is_empty());
1155 assert!(!indexer.find_files("README\\.md$")?.is_empty());
1156
1157 Ok(())
1158 }
1159
1160 #[test]
1161 fn custom_filters_can_skip_directories() -> Result<()> {
1162 #[derive(Default)]
1163 struct SkipGeneratedFilter {
1164 inner: ConfigTraversalFilter,
1165 }
1166
1167 impl TraversalFilter for SkipGeneratedFilter {
1168 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1169 if path.ends_with("generated") {
1170 return false;
1171 }
1172
1173 self.inner.should_descend(path, config)
1174 }
1175
1176 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1177 self.inner.should_index_file(path, config)
1178 }
1179 }
1180
1181 let temp = tempdir()?;
1182 let workspace = temp.path();
1183 let generated_dir = workspace.join("generated");
1184 fs::create_dir_all(&generated_dir)?;
1185 fs::write(generated_dir.join("skip.txt"), "ignore me")?;
1186 fs::write(workspace.join("README.md"), "# Notes")?;
1187
1188 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1189 let indexer = SimpleIndexer::with_config(config)
1190 .with_filter(Arc::new(SkipGeneratedFilter::default()));
1191 let files = indexer.discover_files(workspace);
1192
1193 assert!(!files.iter().any(|file| file.ends_with("skip.txt")));
1194 assert!(files.iter().any(|file| file.ends_with("README.md")));
1195
1196 Ok(())
1197 }
1198
1199 #[test]
1200 fn indexing_multiple_directories_preserves_existing_cache_entries() -> Result<()> {
1201 let temp = tempdir()?;
1202 let workspace = temp.path();
1203 let src_dir = workspace.join("src");
1204 let docs_dir = workspace.join("docs");
1205 fs::create_dir_all(&src_dir)?;
1206 fs::create_dir_all(&docs_dir)?;
1207 fs::write(src_dir.join("lib.rs"), "fn main() {}")?;
1208 fs::write(docs_dir.join("guide.md"), "# Guide")?;
1209
1210 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1211 indexer.init()?;
1212 indexer.index_directory(&src_dir)?;
1213 indexer.index_directory(&docs_dir)?;
1214
1215 assert!(
1216 indexer
1217 .find_files("lib\\.rs$")?
1218 .iter()
1219 .any(|file| file.ends_with("lib.rs"))
1220 );
1221 assert!(
1222 indexer
1223 .find_files("guide\\.md$")?
1224 .iter()
1225 .any(|file| file.ends_with("guide.md"))
1226 );
1227
1228 let index_content =
1229 fs::read_to_string(workspace.join(".vtcode").join("index").join("index.md"))?;
1230 assert!(index_content.contains(src_dir.join("lib.rs").to_string_lossy().as_ref()));
1231 assert!(index_content.contains(docs_dir.join("guide.md").to_string_lossy().as_ref()));
1232
1233 Ok(())
1234 }
1235
1236 #[test]
1237 fn batch_indexing_writes_single_markdown_file() -> Result<()> {
1238 let temp = tempdir()?;
1239 let workspace = temp.path();
1240 fs::write(workspace.join("lib.rs"), "fn main() {}")?;
1241 fs::write(workspace.join("README.md"), "# Notes")?;
1242
1243 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1244 indexer.init()?;
1245 indexer.index_directory(workspace)?;
1246
1247 let index_dir = workspace.join(".vtcode").join("index");
1248 let files = fs::read_dir(&index_dir)?
1249 .filter_map(|entry| entry.ok())
1250 .map(|entry| entry.file_name().to_string_lossy().into_owned())
1251 .collect::<Vec<_>>();
1252 assert_eq!(files, vec!["index.md".to_string()]);
1253
1254 let index_content = fs::read_to_string(index_dir.join("index.md"))?;
1255 assert!(index_content.contains(workspace.join("lib.rs").to_string_lossy().as_ref()));
1256 assert!(index_content.contains(workspace.join("README.md").to_string_lossy().as_ref()));
1257
1258 Ok(())
1259 }
1260
1261 #[test]
1262 fn batch_indexing_removes_legacy_hashed_entries() -> Result<()> {
1263 let temp = tempdir()?;
1264 let workspace = temp.path();
1265 fs::write(workspace.join("lib.rs"), "fn main() {}")?;
1266
1267 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1268 indexer.init()?;
1269
1270 let legacy_file_name = format!("{}.md", calculate_hash("legacy-path"));
1271 let legacy_file_path = workspace
1272 .join(".vtcode")
1273 .join("index")
1274 .join(&legacy_file_name);
1275 fs::write(&legacy_file_path, "# legacy")?;
1276 assert!(legacy_file_path.exists());
1277
1278 indexer.index_directory(workspace)?;
1279
1280 assert!(!legacy_file_path.exists());
1281 let files = fs::read_dir(workspace.join(".vtcode").join("index"))?
1282 .filter_map(|entry| entry.ok())
1283 .map(|entry| entry.file_name().to_string_lossy().into_owned())
1284 .collect::<Vec<_>>();
1285 assert_eq!(files, vec!["index.md".to_string()]);
1286
1287 Ok(())
1288 }
1289
1290 #[test]
1291 fn snapshot_storage_uses_default_ref_batch_persistence() -> Result<()> {
1292 #[derive(Clone, Default)]
1293 struct SnapshotMemoryStorage {
1294 snapshots: Arc<Mutex<Vec<Vec<FileIndex>>>>,
1295 }
1296
1297 impl SnapshotMemoryStorage {
1298 fn new(snapshots: Arc<Mutex<Vec<Vec<FileIndex>>>>) -> Self {
1299 Self { snapshots }
1300 }
1301 }
1302
1303 impl IndexStorage for SnapshotMemoryStorage {
1304 fn init(&self, _index_dir: &Path) -> Result<()> {
1305 Ok(())
1306 }
1307
1308 fn persist(&self, _index_dir: &Path, _entry: &FileIndex) -> Result<()> {
1309 Ok(())
1310 }
1311
1312 fn prefers_snapshot_persistence(&self) -> bool {
1313 true
1314 }
1315
1316 fn persist_batch(&self, _index_dir: &Path, entries: &[FileIndex]) -> Result<()> {
1317 self.snapshots
1318 .lock()
1319 .expect("lock poisoned")
1320 .push(entries.to_vec());
1321 Ok(())
1322 }
1323 }
1324
1325 let temp = tempdir()?;
1326 let workspace = temp.path();
1327 let file_path = workspace.join("notes.txt");
1328 fs::write(&file_path, "remember this")?;
1329
1330 let snapshots = Arc::new(Mutex::new(Vec::new()));
1331 let storage = SnapshotMemoryStorage::new(snapshots.clone());
1332
1333 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1334 let mut indexer = SimpleIndexer::with_config(config).with_storage(Arc::new(storage));
1335 indexer.index_file(&file_path)?;
1336
1337 let snapshots = snapshots.lock().expect("lock poisoned");
1338 assert_eq!(snapshots.len(), 1);
1339 assert_eq!(snapshots[0].len(), 1);
1340 assert_eq!(
1341 snapshots[0][0].path,
1342 workspace.join("notes.txt").to_string_lossy().into_owned()
1343 );
1344
1345 Ok(())
1346 }
1347
1348 #[test]
1349 fn snapshot_index_file_rolls_back_cache_when_persist_fails() -> Result<()> {
1350 #[derive(Clone, Default)]
1351 struct FlakySnapshotStorage {
1352 persist_count: Arc<Mutex<usize>>,
1353 }
1354
1355 impl IndexStorage for FlakySnapshotStorage {
1356 fn init(&self, _index_dir: &Path) -> Result<()> {
1357 Ok(())
1358 }
1359
1360 fn persist(&self, _index_dir: &Path, _entry: &FileIndex) -> Result<()> {
1361 Ok(())
1362 }
1363
1364 fn prefers_snapshot_persistence(&self) -> bool {
1365 true
1366 }
1367
1368 fn persist_batch(&self, _index_dir: &Path, _entries: &[FileIndex]) -> Result<()> {
1369 let mut count = self.persist_count.lock().expect("lock poisoned");
1370 *count += 1;
1371 if *count == 2 {
1372 anyhow::bail!("simulated snapshot persistence failure");
1373 }
1374 Ok(())
1375 }
1376 }
1377
1378 let temp = tempdir()?;
1379 let workspace = temp.path();
1380 let first = workspace.join("first.txt");
1381 let second = workspace.join("second.txt");
1382 fs::write(&first, "one")?;
1383 fs::write(&second, "two")?;
1384
1385 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1386 let storage = Arc::new(FlakySnapshotStorage::default());
1387 let mut indexer = SimpleIndexer::with_config(config).with_storage(storage);
1388
1389 indexer.index_file(&first)?;
1390 assert!(
1391 indexer
1392 .find_files("first\\.txt$")?
1393 .iter()
1394 .any(|path| path.ends_with("first.txt"))
1395 );
1396
1397 let err = indexer
1398 .index_file(&second)
1399 .expect_err("second persist should fail");
1400 assert!(
1401 err.to_string()
1402 .contains("simulated snapshot persistence failure")
1403 );
1404 assert!(
1405 indexer
1406 .find_files("first\\.txt$")?
1407 .iter()
1408 .any(|path| path.ends_with("first.txt"))
1409 );
1410 assert!(indexer.find_files("second\\.txt$")?.is_empty());
1411
1412 Ok(())
1413 }
1414}