1use anyhow::Result;
9use ignore::WalkBuilder;
10use regex::Regex;
11use serde::{Deserialize, Serialize};
12use std::collections::HashMap;
13use std::fs;
14use std::io::ErrorKind;
15use std::path::{Path, PathBuf};
16use std::sync::Arc;
17use std::time::SystemTime;
18
19pub trait IndexStorage: Send + Sync {
21 fn init(&self, index_dir: &Path) -> Result<()>;
23
24 fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()>;
26}
27
28pub trait TraversalFilter: Send + Sync {
30 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
32
33 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
35}
36
37#[derive(Debug, Default, Clone)]
39pub struct MarkdownIndexStorage;
40
41impl IndexStorage for MarkdownIndexStorage {
42 fn init(&self, index_dir: &Path) -> Result<()> {
43 fs::create_dir_all(index_dir)?;
44 Ok(())
45 }
46
47 fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()> {
48 let file_name = format!("{}.md", calculate_hash(&entry.path));
49 let index_path = index_dir.join(file_name);
50
51 let markdown = format!(
52 "# File Index: {}\n\n\
53 - **Path**: {}\n\
54 - **Hash**: {}\n\
55 - **Modified**: {}\n\
56 - **Size**: {} bytes\n\
57 - **Language**: {}\n\
58 - **Tags**: {}\n\n",
59 entry.path,
60 entry.path,
61 entry.hash,
62 entry.modified,
63 entry.size,
64 entry.language,
65 entry.tags.join(", ")
66 );
67
68 fs::write(index_path, markdown)?;
69 Ok(())
70 }
71}
72
73#[derive(Debug, Default, Clone)]
75pub struct ConfigTraversalFilter;
76
77impl TraversalFilter for ConfigTraversalFilter {
78 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
79 !should_skip_dir(path, config)
80 }
81
82 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
83 if !path.is_file() {
84 return false;
85 }
86
87 if config.ignore_hidden
89 && path
90 .file_name()
91 .and_then(|n| n.to_str())
92 .is_some_and(|s| s.starts_with('.'))
93 {
94 return false;
95 }
96
97 if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
99 let is_sensitive = matches!(
100 file_name,
101 ".env"
102 | ".env.local"
103 | ".env.production"
104 | ".env.development"
105 | ".env.test"
106 | ".git"
107 | ".gitignore"
108 | ".DS_Store"
109 ) || file_name.starts_with(".env.");
110 if is_sensitive {
111 return false;
112 }
113 }
114
115 true
116 }
117}
118
119#[derive(Clone, Debug)]
121pub struct SimpleIndexerConfig {
122 workspace_root: PathBuf,
123 index_dir: PathBuf,
124 ignore_hidden: bool,
125 excluded_dirs: Vec<PathBuf>,
126 allowed_dirs: Vec<PathBuf>,
127}
128
129impl SimpleIndexerConfig {
130 pub fn new(workspace_root: PathBuf) -> Self {
132 let index_dir = workspace_root.join(".vtcode").join("index");
133 let vtcode_dir = workspace_root.join(".vtcode");
134 let external_dir = vtcode_dir.join("external");
135
136 let mut excluded_dirs = vec![
137 index_dir.clone(),
138 vtcode_dir,
139 workspace_root.join("target"),
140 workspace_root.join("node_modules"),
141 ];
142
143 excluded_dirs.dedup();
144
145 Self {
146 workspace_root,
147 index_dir,
148 ignore_hidden: true,
149 excluded_dirs,
150 allowed_dirs: vec![external_dir],
151 }
152 }
153
154 pub fn with_index_dir(mut self, index_dir: impl Into<PathBuf>) -> Self {
156 let index_dir = index_dir.into();
157 self.index_dir = index_dir.clone();
158 self.push_unique_excluded(index_dir);
159 self
160 }
161
162 pub fn add_allowed_dir(mut self, path: impl Into<PathBuf>) -> Self {
164 let path = path.into();
165 if !self.allowed_dirs.iter().any(|existing| existing == &path) {
166 self.allowed_dirs.push(path);
167 }
168 self
169 }
170
171 pub fn add_excluded_dir(mut self, path: impl Into<PathBuf>) -> Self {
173 let path = path.into();
174 self.push_unique_excluded(path);
175 self
176 }
177
178 pub fn ignore_hidden(mut self, ignore_hidden: bool) -> Self {
180 self.ignore_hidden = ignore_hidden;
181 self
182 }
183
184 pub fn workspace_root(&self) -> &Path {
186 &self.workspace_root
187 }
188
189 pub fn index_dir(&self) -> &Path {
191 &self.index_dir
192 }
193
194 fn push_unique_excluded(&mut self, path: PathBuf) {
195 if !self.excluded_dirs.iter().any(|existing| existing == &path) {
196 self.excluded_dirs.push(path);
197 }
198 }
199}
200
201#[derive(Debug, Clone, Serialize, Deserialize)]
203pub struct FileIndex {
204 pub path: String,
206 pub hash: String,
208 pub modified: u64,
210 pub size: u64,
212 pub language: String,
214 pub tags: Vec<String>,
216}
217
218#[derive(Debug, Clone, Serialize, Deserialize)]
220pub struct SearchResult {
221 pub file_path: String,
222 pub line_number: usize,
223 pub line_content: String,
224 pub matches: Vec<String>,
225}
226
227pub struct SimpleIndexer {
229 config: SimpleIndexerConfig,
230 index_cache: HashMap<String, FileIndex>,
231 storage: Arc<dyn IndexStorage>,
232 filter: Arc<dyn TraversalFilter>,
233}
234
235impl SimpleIndexer {
236 pub fn new(workspace_root: PathBuf) -> Self {
238 Self::with_components(
239 SimpleIndexerConfig::new(workspace_root),
240 Arc::new(MarkdownIndexStorage),
241 Arc::new(ConfigTraversalFilter),
242 )
243 }
244
245 pub fn with_config(config: SimpleIndexerConfig) -> Self {
247 Self::with_components(
248 config,
249 Arc::new(MarkdownIndexStorage),
250 Arc::new(ConfigTraversalFilter),
251 )
252 }
253
254 pub fn with_index_dir(workspace_root: PathBuf, index_dir: PathBuf) -> Self {
256 let config = SimpleIndexerConfig::new(workspace_root).with_index_dir(index_dir);
257 Self::with_config(config)
258 }
259
260 pub fn with_components(
262 config: SimpleIndexerConfig,
263 storage: Arc<dyn IndexStorage>,
264 filter: Arc<dyn TraversalFilter>,
265 ) -> Self {
266 Self {
267 config,
268 index_cache: HashMap::new(),
269 storage,
270 filter,
271 }
272 }
273
274 pub fn with_storage(self, storage: Arc<dyn IndexStorage>) -> Self {
276 Self { storage, ..self }
277 }
278
279 pub fn with_filter(self, filter: Arc<dyn TraversalFilter>) -> Self {
281 Self { filter, ..self }
282 }
283
284 pub fn init(&self) -> Result<()> {
286 self.storage.init(self.config.index_dir())
287 }
288
289 pub fn workspace_root(&self) -> &Path {
291 self.config.workspace_root()
292 }
293
294 pub fn index_dir(&self) -> &Path {
296 self.config.index_dir()
297 }
298
299 pub fn index_file(&mut self, file_path: &Path) -> Result<()> {
301 if !file_path.exists() || !self.filter.should_index_file(file_path, &self.config) {
302 return Ok(());
303 }
304
305 let content = match fs::read_to_string(file_path) {
306 Ok(text) => text,
307 Err(err) => {
308 if err.kind() == ErrorKind::InvalidData {
309 return Ok(());
310 }
311 return Err(err.into());
312 }
313 };
314 let hash = calculate_hash(&content);
315 let modified = self.get_modified_time(file_path)?;
316 let size = content.len() as u64;
317 let language = self.detect_language(file_path);
318
319 let index = FileIndex {
320 path: file_path.to_string_lossy().into_owned(),
321 hash,
322 modified,
323 size,
324 language,
325 tags: vec![],
326 };
327
328 self.index_cache
329 .insert(file_path.to_string_lossy().into_owned(), index.clone());
330
331 self.storage.persist(self.config.index_dir(), &index)?;
332
333 Ok(())
334 }
335
336 pub fn index_directory(&mut self, dir_path: &Path) -> Result<()> {
340 let walker = WalkBuilder::new(dir_path)
341 .hidden(true) .git_ignore(true) .git_global(true) .git_exclude(true) .ignore(true) .parents(true) .build();
348
349 for entry in walker.filter_map(|e| e.ok()) {
350 let path = entry.path();
351
352 if entry.file_type().is_some_and(|ft| ft.is_file()) {
354 let should_skip = self
356 .config
357 .excluded_dirs
358 .iter()
359 .any(|excluded| path.starts_with(excluded));
360
361 if !should_skip && self.filter.should_index_file(path, &self.config) {
362 self.index_file(path)?;
363 }
364 }
365 }
366
367 Ok(())
368 }
369
370 pub fn discover_files(&self, dir_path: &Path) -> Vec<String> {
373 let walker = WalkBuilder::new(dir_path)
374 .hidden(true)
375 .git_ignore(true)
376 .git_global(true)
377 .git_exclude(true)
378 .ignore(true)
379 .parents(true)
380 .build();
381
382 walker
383 .filter_map(|e| e.ok())
384 .filter(|e| {
385 if !e.file_type().is_some_and(|ft| ft.is_file()) {
386 return false;
387 }
388 let path = e.path();
389 let should_skip = self
390 .config
391 .excluded_dirs
392 .iter()
393 .any(|excluded| path.starts_with(excluded));
394
395 !should_skip && self.filter.should_index_file(path, &self.config)
396 })
397 .map(|e| e.path().to_string_lossy().into_owned())
398 .collect()
399 }
400
401 fn search_files_internal(
404 &self,
405 regex: &Regex,
406 path_filter: Option<&str>,
407 extract_matches: bool,
408 ) -> Vec<SearchResult> {
409 let mut results = Vec::new();
410
411 for file_path in self.index_cache.keys() {
412 if path_filter.is_some_and(|filter| !file_path.contains(filter)) {
413 continue;
414 }
415
416 if let Ok(content) = fs::read_to_string(file_path) {
417 for (line_num, line) in content.lines().enumerate() {
418 if regex.is_match(line) {
419 let matches = if extract_matches {
420 regex
421 .find_iter(line)
422 .map(|m| m.as_str().to_string())
423 .collect()
424 } else {
425 vec![line.to_string()]
426 };
427
428 results.push(SearchResult {
429 file_path: file_path.clone(),
430 line_number: line_num + 1,
431 line_content: line.to_string(),
432 matches,
433 });
434 }
435 }
436 }
437 }
438
439 results
440 }
441
442 pub fn search(&self, pattern: &str, path_filter: Option<&str>) -> Result<Vec<SearchResult>> {
444 let regex = Regex::new(pattern)?;
445 Ok(self.search_files_internal(®ex, path_filter, true))
446 }
447
448 pub fn find_files(&self, pattern: &str) -> Result<Vec<String>> {
450 let regex = Regex::new(pattern)?;
451 let mut results = Vec::new();
452
453 for file_path in self.index_cache.keys() {
454 if regex.is_match(file_path) {
455 results.push(file_path.clone());
456 }
457 }
458
459 Ok(results)
460 }
461
462 pub fn all_files(&self) -> Vec<String> {
465 self.index_cache.keys().cloned().collect()
466 }
467
468 pub fn get_file_content(
470 &self,
471 file_path: &str,
472 start_line: Option<usize>,
473 end_line: Option<usize>,
474 ) -> Result<String> {
475 let content = fs::read_to_string(file_path)?;
476 let lines: Vec<&str> = content.lines().collect();
477
478 let start = start_line.unwrap_or(1).saturating_sub(1);
479 let end = end_line.unwrap_or(lines.len());
480
481 let selected_lines = &lines[start..end.min(lines.len())];
482
483 let mut result = String::new();
484 for (i, line) in selected_lines.iter().enumerate() {
485 result.push_str(&format!("{}: {}\n", start + i + 1, line));
486 }
487
488 Ok(result)
489 }
490
491 pub fn list_files(&self, dir_path: &str, show_hidden: bool) -> Result<Vec<String>> {
493 let path = Path::new(dir_path);
494 if !path.exists() {
495 return Ok(vec![]);
496 }
497
498 let mut files = Vec::new();
499
500 for entry in fs::read_dir(path)? {
501 let entry = entry?;
502 let file_name = entry.file_name().to_string_lossy().into_owned();
503
504 if !show_hidden && file_name.starts_with('.') {
505 continue;
506 }
507
508 files.push(file_name);
509 }
510
511 Ok(files)
512 }
513
514 pub fn grep(&self, pattern: &str, file_pattern: Option<&str>) -> Result<Vec<SearchResult>> {
516 let regex = Regex::new(pattern)?;
517 Ok(self.search_files_internal(®ex, file_pattern, false))
518 }
519
520 #[allow(dead_code)]
521 fn walk_directory<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
522 where
523 F: FnMut(&Path) -> Result<()>,
524 {
525 if !dir_path.exists() {
526 return Ok(());
527 }
528
529 self.walk_directory_internal(dir_path, callback)
530 }
531
532 #[allow(dead_code)]
533 fn walk_directory_internal<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
534 where
535 F: FnMut(&Path) -> Result<()>,
536 {
537 for entry in fs::read_dir(dir_path)? {
538 let entry = entry?;
539 let path = entry.path();
540
541 if path.is_dir() {
542 if self.is_allowed_dir(&path) {
543 self.walk_directory_internal(&path, callback)?;
544 continue;
545 }
546
547 if !self.filter.should_descend(&path, &self.config) {
548 self.walk_allowed_descendants(&path, callback)?;
549 continue;
550 }
551
552 self.walk_directory_internal(&path, callback)?;
553 } else if path.is_file() {
554 callback(&path)?;
555 }
556 }
557
558 Ok(())
559 }
560
561 #[allow(dead_code)]
562 fn is_allowed_dir(&self, path: &Path) -> bool {
563 self.config
564 .allowed_dirs
565 .iter()
566 .any(|allowed| path.starts_with(allowed))
567 }
568
569 #[allow(dead_code)]
570 fn walk_allowed_descendants<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
571 where
572 F: FnMut(&Path) -> Result<()>,
573 {
574 let allowed_dirs = self.config.allowed_dirs.clone();
575 for allowed in allowed_dirs {
576 if allowed.starts_with(dir_path) && allowed.exists() {
577 self.walk_directory_internal(&allowed, callback)?;
578 }
579 }
580 Ok(())
581 }
582
583 #[inline]
584 fn get_modified_time(&self, file_path: &Path) -> Result<u64> {
585 let metadata = fs::metadata(file_path)?;
586 let modified = metadata.modified()?;
587 Ok(modified.duration_since(SystemTime::UNIX_EPOCH)?.as_secs())
588 }
589
590 #[inline]
591 fn detect_language(&self, file_path: &Path) -> String {
592 file_path
593 .extension()
594 .and_then(|ext| ext.to_str())
595 .unwrap_or("unknown")
596 .to_string()
597 }
598}
599
600impl Clone for SimpleIndexer {
601 fn clone(&self) -> Self {
602 Self {
603 config: self.config.clone(),
604 index_cache: self.index_cache.clone(),
605 storage: self.storage.clone(),
606 filter: self.filter.clone(),
607 }
608 }
609}
610
611fn should_skip_dir(path: &Path, config: &SimpleIndexerConfig) -> bool {
612 if config
613 .allowed_dirs
614 .iter()
615 .any(|allowed| path.starts_with(allowed))
616 {
617 return false;
618 }
619
620 if config
621 .excluded_dirs
622 .iter()
623 .any(|excluded| path.starts_with(excluded))
624 {
625 return true;
626 }
627
628 if config.ignore_hidden
629 && path
630 .file_name()
631 .and_then(|name| name.to_str())
632 .is_some_and(|name_str| name_str.starts_with('.'))
633 {
634 return true;
635 }
636
637 false
638}
639
640#[inline]
641fn calculate_hash(content: &str) -> String {
642 use std::collections::hash_map::DefaultHasher;
643 use std::hash::{Hash, Hasher};
644
645 let mut hasher = DefaultHasher::new();
646 content.hash(&mut hasher);
647 format!("{:x}", hasher.finish())
648}
649
650#[cfg(test)]
651mod tests {
652 use super::*;
653 use std::fs;
654 use std::sync::{Arc, Mutex};
655 use tempfile::tempdir;
656
657 #[test]
658 fn skips_hidden_directories_by_default() -> Result<()> {
659 let temp = tempdir()?;
660 let workspace = temp.path();
661 let hidden_dir = workspace.join(".private");
662 fs::create_dir_all(&hidden_dir)?;
663 fs::write(hidden_dir.join("secret.txt"), "classified")?;
664
665 let visible_dir = workspace.join("src");
666 fs::create_dir_all(&visible_dir)?;
667 fs::write(visible_dir.join("lib.rs"), "fn main() {}")?;
668
669 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
670 indexer.init()?;
671 indexer.index_directory(workspace)?;
672
673 assert!(indexer.find_files("secret\\.txt$")?.is_empty());
674 assert!(!indexer.find_files("lib\\.rs$")?.is_empty());
675
676 Ok(())
677 }
678
679 #[test]
680 fn can_include_hidden_directories_when_configured() -> Result<()> {
681 let temp = tempdir()?;
682 let workspace = temp.path();
683 let hidden_dir = workspace.join(".cache");
684 fs::create_dir_all(&hidden_dir)?;
685 fs::write(hidden_dir.join("data.log"), "details")?;
686
687 let config = SimpleIndexerConfig::new(workspace.to_path_buf()).ignore_hidden(false);
688 let mut indexer = SimpleIndexer::with_config(config);
689 indexer.init()?;
690 indexer.index_directory(workspace)?;
691
692 let results = indexer.find_files("data\\.log$")?;
693 assert_eq!(results.len(), 1);
694
695 Ok(())
696 }
697
698 #[test]
699 fn supports_custom_storage_backends() -> Result<()> {
700 #[derive(Clone, Default)]
701 struct MemoryStorage {
702 records: Arc<Mutex<Vec<FileIndex>>>,
703 }
704
705 impl MemoryStorage {
706 fn new(records: Arc<Mutex<Vec<FileIndex>>>) -> Self {
707 Self { records }
708 }
709 }
710
711 impl IndexStorage for MemoryStorage {
712 fn init(&self, _index_dir: &Path) -> Result<()> {
713 Ok(())
714 }
715
716 fn persist(&self, _index_dir: &Path, entry: &FileIndex) -> Result<()> {
717 let mut guard = self.records.lock().expect("lock poisoned");
718 guard.push(entry.clone());
719 Ok(())
720 }
721 }
722
723 let temp = tempdir()?;
724 let workspace = temp.path();
725 fs::write(workspace.join("notes.txt"), "remember this")?;
726
727 let records: Arc<Mutex<Vec<FileIndex>>> = Arc::new(Mutex::new(Vec::new()));
728 let storage = MemoryStorage::new(records.clone());
729
730 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
731 let mut indexer = SimpleIndexer::with_config(config).with_storage(Arc::new(storage));
732 indexer.init()?;
733 indexer.index_directory(workspace)?;
734
735 let entries = records.lock().expect("lock poisoned");
736 assert_eq!(entries.len(), 1);
737 assert_eq!(
738 entries[0].path,
739 workspace.join("notes.txt").to_string_lossy().into_owned()
740 );
741
742 Ok(())
743 }
744
745 #[test]
746 fn custom_filters_can_skip_files() -> Result<()> {
747 #[derive(Default)]
748 struct SkipRustFilter {
749 inner: ConfigTraversalFilter,
750 }
751
752 impl TraversalFilter for SkipRustFilter {
753 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
754 self.inner.should_descend(path, config)
755 }
756
757 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
758 if path
759 .extension()
760 .and_then(|ext| ext.to_str())
761 .is_some_and(|ext| ext.eq_ignore_ascii_case("rs"))
762 {
763 return false;
764 }
765
766 self.inner.should_index_file(path, config)
767 }
768 }
769
770 let temp = tempdir()?;
771 let workspace = temp.path();
772 fs::write(workspace.join("lib.rs"), "fn main() {}")?;
773 fs::write(workspace.join("README.md"), "# Notes")?;
774
775 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
776 let mut indexer =
777 SimpleIndexer::with_config(config).with_filter(Arc::new(SkipRustFilter::default()));
778 indexer.init()?;
779 indexer.index_directory(workspace)?;
780
781 assert!(indexer.find_files("lib\\.rs$")?.is_empty());
782 assert!(!indexer.find_files("README\\.md$")?.is_empty());
783
784 Ok(())
785 }
786}