1use anyhow::Result;
9use ignore::WalkBuilder;
10use regex::Regex;
11use serde::{Deserialize, Serialize};
12use std::collections::HashMap;
13use std::fs;
14use std::io::ErrorKind;
15use std::path::{Path, PathBuf};
16use std::sync::Arc;
17use std::time::SystemTime;
18
19pub trait IndexStorage: Send + Sync {
21 fn init(&self, index_dir: &Path) -> Result<()>;
23
24 fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()>;
26}
27
28pub trait TraversalFilter: Send + Sync {
30 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
32
33 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
35}
36
37#[derive(Debug, Default, Clone)]
39pub struct MarkdownIndexStorage;
40
41impl IndexStorage for MarkdownIndexStorage {
42 fn init(&self, index_dir: &Path) -> Result<()> {
43 fs::create_dir_all(index_dir)?;
44 Ok(())
45 }
46
47 fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()> {
48 let file_name = format!("{}.md", calculate_hash(&entry.path));
49 let index_path = index_dir.join(file_name);
50
51 let markdown = format!(
52 "# File Index: {}\n\n\
53 - **Path**: {}\n\
54 - **Hash**: {}\n\
55 - **Modified**: {}\n\
56 - **Size**: {} bytes\n\
57 - **Language**: {}\n\
58 - **Tags**: {}\n\n",
59 entry.path,
60 entry.path,
61 entry.hash,
62 entry.modified,
63 entry.size,
64 entry.language,
65 entry.tags.join(", ")
66 );
67
68 fs::write(index_path, markdown)?;
69 Ok(())
70 }
71}
72
73#[derive(Debug, Default, Clone)]
75pub struct ConfigTraversalFilter;
76
77impl TraversalFilter for ConfigTraversalFilter {
78 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
79 !should_skip_dir(path, config)
80 }
81
82 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
83 if !path.is_file() {
84 return false;
85 }
86
87 if config.ignore_hidden
89 && path
90 .file_name()
91 .and_then(|n| n.to_str())
92 .is_some_and(|s| s.starts_with('.'))
93 {
94 return false;
95 }
96
97 if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
99 let is_sensitive = matches!(
100 file_name,
101 ".env"
102 | ".env.local"
103 | ".env.production"
104 | ".env.development"
105 | ".env.test"
106 | ".git"
107 | ".gitignore"
108 | ".DS_Store"
109 ) || file_name.starts_with(".env.");
110 if is_sensitive {
111 return false;
112 }
113 }
114
115 true
116 }
117}
118
119#[derive(Clone, Debug)]
121pub struct SimpleIndexerConfig {
122 workspace_root: PathBuf,
123 index_dir: PathBuf,
124 ignore_hidden: bool,
125 excluded_dirs: Vec<PathBuf>,
126 allowed_dirs: Vec<PathBuf>,
127}
128
129impl SimpleIndexerConfig {
130 pub fn new(workspace_root: PathBuf) -> Self {
132 let index_dir = workspace_root.join(".vtcode").join("index");
133 let vtcode_dir = workspace_root.join(".vtcode");
134 let external_dir = vtcode_dir.join("external");
135
136 let mut excluded_dirs = vec![
137 index_dir.clone(),
138 vtcode_dir,
139 workspace_root.join("target"),
140 workspace_root.join("node_modules"),
141 ];
142
143 excluded_dirs.dedup();
144
145 Self {
146 workspace_root,
147 index_dir,
148 ignore_hidden: true,
149 excluded_dirs,
150 allowed_dirs: vec![external_dir],
151 }
152 }
153
154 pub fn with_index_dir(mut self, index_dir: impl Into<PathBuf>) -> Self {
156 let index_dir = index_dir.into();
157 self.index_dir = index_dir.clone();
158 self.push_unique_excluded(index_dir);
159 self
160 }
161
162 pub fn add_allowed_dir(mut self, path: impl Into<PathBuf>) -> Self {
164 let path = path.into();
165 if !self.allowed_dirs.iter().any(|existing| existing == &path) {
166 self.allowed_dirs.push(path);
167 }
168 self
169 }
170
171 pub fn add_excluded_dir(mut self, path: impl Into<PathBuf>) -> Self {
173 let path = path.into();
174 self.push_unique_excluded(path);
175 self
176 }
177
178 pub fn ignore_hidden(mut self, ignore_hidden: bool) -> Self {
180 self.ignore_hidden = ignore_hidden;
181 self
182 }
183
184 pub fn workspace_root(&self) -> &Path {
186 &self.workspace_root
187 }
188
189 pub fn index_dir(&self) -> &Path {
191 &self.index_dir
192 }
193
194 fn push_unique_excluded(&mut self, path: PathBuf) {
195 if !self.excluded_dirs.iter().any(|existing| existing == &path) {
196 self.excluded_dirs.push(path);
197 }
198 }
199}
200
201#[derive(Debug, Clone, Serialize, Deserialize)]
203pub struct FileIndex {
204 pub path: String,
206 pub hash: String,
208 pub modified: u64,
210 pub size: u64,
212 pub language: String,
214 pub tags: Vec<String>,
216}
217
218#[derive(Debug, Clone, Serialize, Deserialize)]
220pub struct SearchResult {
221 pub file_path: String,
222 pub line_number: usize,
223 pub line_content: String,
224 pub matches: Vec<String>,
225}
226
227pub struct SimpleIndexer {
229 config: SimpleIndexerConfig,
230 index_cache: HashMap<String, FileIndex>,
231 storage: Arc<dyn IndexStorage>,
232 filter: Arc<dyn TraversalFilter>,
233}
234
235impl SimpleIndexer {
236 pub fn new(workspace_root: PathBuf) -> Self {
238 Self::with_components(
239 SimpleIndexerConfig::new(workspace_root),
240 Arc::new(MarkdownIndexStorage),
241 Arc::new(ConfigTraversalFilter),
242 )
243 }
244
245 pub fn with_config(config: SimpleIndexerConfig) -> Self {
247 Self::with_components(
248 config,
249 Arc::new(MarkdownIndexStorage),
250 Arc::new(ConfigTraversalFilter),
251 )
252 }
253
254 pub fn with_index_dir(workspace_root: PathBuf, index_dir: PathBuf) -> Self {
256 let config = SimpleIndexerConfig::new(workspace_root).with_index_dir(index_dir);
257 Self::with_config(config)
258 }
259
260 pub fn with_components(
262 config: SimpleIndexerConfig,
263 storage: Arc<dyn IndexStorage>,
264 filter: Arc<dyn TraversalFilter>,
265 ) -> Self {
266 Self {
267 config,
268 index_cache: HashMap::new(),
269 storage,
270 filter,
271 }
272 }
273
274 pub fn with_storage(self, storage: Arc<dyn IndexStorage>) -> Self {
276 Self { storage, ..self }
277 }
278
279 pub fn with_filter(self, filter: Arc<dyn TraversalFilter>) -> Self {
281 Self { filter, ..self }
282 }
283
284 pub fn init(&self) -> Result<()> {
286 self.storage.init(self.config.index_dir())
287 }
288
289 pub fn workspace_root(&self) -> &Path {
291 self.config.workspace_root()
292 }
293
294 pub fn index_dir(&self) -> &Path {
296 self.config.index_dir()
297 }
298
299 pub fn index_file(&mut self, file_path: &Path) -> Result<()> {
301 if !file_path.exists() || !self.filter.should_index_file(file_path, &self.config) {
302 return Ok(());
303 }
304
305 let content = match fs::read_to_string(file_path) {
306 Ok(text) => text,
307 Err(err) => {
308 if err.kind() == ErrorKind::InvalidData {
309 return Ok(());
310 }
311 return Err(err.into());
312 }
313 };
314 let hash = calculate_hash(&content);
315 let modified = self.get_modified_time(file_path)?;
316 let size = content.len() as u64;
317 let language = self.detect_language(file_path);
318
319 let index = FileIndex {
320 path: file_path.to_string_lossy().into_owned(),
321 hash,
322 modified,
323 size,
324 language,
325 tags: vec![],
326 };
327
328 self.index_cache
329 .insert(file_path.to_string_lossy().into_owned(), index.clone());
330
331 self.storage.persist(self.config.index_dir(), &index)?;
332
333 Ok(())
334 }
335
336 pub fn index_directory(&mut self, dir_path: &Path) -> Result<()> {
340 let walker = WalkBuilder::new(dir_path)
341 .hidden(true) .git_ignore(true) .git_global(true) .git_exclude(true) .ignore(true) .parents(true) .build();
348
349 for entry in walker.filter_map(|e| e.ok()) {
350 let path = entry.path();
351
352 if entry.file_type().is_some_and(|ft| ft.is_file()) {
354 let should_skip = self
356 .config
357 .excluded_dirs
358 .iter()
359 .any(|excluded| path.starts_with(excluded));
360
361 if !should_skip && self.filter.should_index_file(path, &self.config) {
362 self.index_file(path)?;
363 }
364 }
365 }
366
367 Ok(())
368 }
369
370 fn search_files_internal(
373 &self,
374 regex: &Regex,
375 path_filter: Option<&str>,
376 extract_matches: bool,
377 ) -> Vec<SearchResult> {
378 let mut results = Vec::new();
379
380 for file_path in self.index_cache.keys() {
381 if path_filter.is_some_and(|filter| !file_path.contains(filter)) {
382 continue;
383 }
384
385 if let Ok(content) = fs::read_to_string(file_path) {
386 for (line_num, line) in content.lines().enumerate() {
387 if regex.is_match(line) {
388 let matches = if extract_matches {
389 regex
390 .find_iter(line)
391 .map(|m| m.as_str().to_string())
392 .collect()
393 } else {
394 vec![line.to_string()]
395 };
396
397 results.push(SearchResult {
398 file_path: file_path.clone(),
399 line_number: line_num + 1,
400 line_content: line.to_string(),
401 matches,
402 });
403 }
404 }
405 }
406 }
407
408 results
409 }
410
411 pub fn search(&self, pattern: &str, path_filter: Option<&str>) -> Result<Vec<SearchResult>> {
413 let regex = Regex::new(pattern)?;
414 Ok(self.search_files_internal(®ex, path_filter, true))
415 }
416
417 pub fn find_files(&self, pattern: &str) -> Result<Vec<String>> {
419 let regex = Regex::new(pattern)?;
420 let mut results = Vec::new();
421
422 for file_path in self.index_cache.keys() {
423 if regex.is_match(file_path) {
424 results.push(file_path.clone());
425 }
426 }
427
428 Ok(results)
429 }
430
431 pub fn all_files(&self) -> Vec<String> {
434 self.index_cache.keys().cloned().collect()
435 }
436
437 pub fn get_file_content(
439 &self,
440 file_path: &str,
441 start_line: Option<usize>,
442 end_line: Option<usize>,
443 ) -> Result<String> {
444 let content = fs::read_to_string(file_path)?;
445 let lines: Vec<&str> = content.lines().collect();
446
447 let start = start_line.unwrap_or(1).saturating_sub(1);
448 let end = end_line.unwrap_or(lines.len());
449
450 let selected_lines = &lines[start..end.min(lines.len())];
451
452 let mut result = String::new();
453 for (i, line) in selected_lines.iter().enumerate() {
454 result.push_str(&format!("{}: {}\n", start + i + 1, line));
455 }
456
457 Ok(result)
458 }
459
460 pub fn list_files(&self, dir_path: &str, show_hidden: bool) -> Result<Vec<String>> {
462 let path = Path::new(dir_path);
463 if !path.exists() {
464 return Ok(vec![]);
465 }
466
467 let mut files = Vec::new();
468
469 for entry in fs::read_dir(path)? {
470 let entry = entry?;
471 let file_name = entry.file_name().to_string_lossy().into_owned();
472
473 if !show_hidden && file_name.starts_with('.') {
474 continue;
475 }
476
477 files.push(file_name);
478 }
479
480 Ok(files)
481 }
482
483 pub fn grep(&self, pattern: &str, file_pattern: Option<&str>) -> Result<Vec<SearchResult>> {
485 let regex = Regex::new(pattern)?;
486 Ok(self.search_files_internal(®ex, file_pattern, false))
487 }
488
489 #[allow(dead_code)]
490 fn walk_directory<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
491 where
492 F: FnMut(&Path) -> Result<()>,
493 {
494 if !dir_path.exists() {
495 return Ok(());
496 }
497
498 self.walk_directory_internal(dir_path, callback)
499 }
500
501 #[allow(dead_code)]
502 fn walk_directory_internal<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
503 where
504 F: FnMut(&Path) -> Result<()>,
505 {
506 for entry in fs::read_dir(dir_path)? {
507 let entry = entry?;
508 let path = entry.path();
509
510 if path.is_dir() {
511 if self.is_allowed_dir(&path) {
512 self.walk_directory_internal(&path, callback)?;
513 continue;
514 }
515
516 if !self.filter.should_descend(&path, &self.config) {
517 self.walk_allowed_descendants(&path, callback)?;
518 continue;
519 }
520
521 self.walk_directory_internal(&path, callback)?;
522 } else if path.is_file() {
523 callback(&path)?;
524 }
525 }
526
527 Ok(())
528 }
529
530 #[allow(dead_code)]
531 fn is_allowed_dir(&self, path: &Path) -> bool {
532 self.config
533 .allowed_dirs
534 .iter()
535 .any(|allowed| path.starts_with(allowed))
536 }
537
538 #[allow(dead_code)]
539 fn walk_allowed_descendants<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
540 where
541 F: FnMut(&Path) -> Result<()>,
542 {
543 let allowed_dirs = self.config.allowed_dirs.clone();
544 for allowed in allowed_dirs {
545 if allowed.starts_with(dir_path) && allowed.exists() {
546 self.walk_directory_internal(&allowed, callback)?;
547 }
548 }
549 Ok(())
550 }
551
552 #[inline]
553 fn get_modified_time(&self, file_path: &Path) -> Result<u64> {
554 let metadata = fs::metadata(file_path)?;
555 let modified = metadata.modified()?;
556 Ok(modified.duration_since(SystemTime::UNIX_EPOCH)?.as_secs())
557 }
558
559 #[inline]
560 fn detect_language(&self, file_path: &Path) -> String {
561 file_path
562 .extension()
563 .and_then(|ext| ext.to_str())
564 .unwrap_or("unknown")
565 .to_string()
566 }
567}
568
569impl Clone for SimpleIndexer {
570 fn clone(&self) -> Self {
571 Self {
572 config: self.config.clone(),
573 index_cache: self.index_cache.clone(),
574 storage: self.storage.clone(),
575 filter: self.filter.clone(),
576 }
577 }
578}
579
580fn should_skip_dir(path: &Path, config: &SimpleIndexerConfig) -> bool {
581 if config
582 .allowed_dirs
583 .iter()
584 .any(|allowed| path.starts_with(allowed))
585 {
586 return false;
587 }
588
589 if config
590 .excluded_dirs
591 .iter()
592 .any(|excluded| path.starts_with(excluded))
593 {
594 return true;
595 }
596
597 if config.ignore_hidden
598 && path
599 .file_name()
600 .and_then(|name| name.to_str())
601 .is_some_and(|name_str| name_str.starts_with('.'))
602 {
603 return true;
604 }
605
606 false
607}
608
609#[inline]
610fn calculate_hash(content: &str) -> String {
611 use std::collections::hash_map::DefaultHasher;
612 use std::hash::{Hash, Hasher};
613
614 let mut hasher = DefaultHasher::new();
615 content.hash(&mut hasher);
616 format!("{:x}", hasher.finish())
617}
618
619#[cfg(test)]
620mod tests {
621 use super::*;
622 use std::fs;
623 use std::sync::{Arc, Mutex};
624 use tempfile::tempdir;
625
626 #[test]
627 fn skips_hidden_directories_by_default() -> Result<()> {
628 let temp = tempdir()?;
629 let workspace = temp.path();
630 let hidden_dir = workspace.join(".private");
631 fs::create_dir_all(&hidden_dir)?;
632 fs::write(hidden_dir.join("secret.txt"), "classified")?;
633
634 let visible_dir = workspace.join("src");
635 fs::create_dir_all(&visible_dir)?;
636 fs::write(visible_dir.join("lib.rs"), "fn main() {}")?;
637
638 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
639 indexer.init()?;
640 indexer.index_directory(workspace)?;
641
642 assert!(indexer.find_files("secret\\.txt$")?.is_empty());
643 assert!(!indexer.find_files("lib\\.rs$")?.is_empty());
644
645 Ok(())
646 }
647
648 #[test]
649 fn can_include_hidden_directories_when_configured() -> Result<()> {
650 let temp = tempdir()?;
651 let workspace = temp.path();
652 let hidden_dir = workspace.join(".cache");
653 fs::create_dir_all(&hidden_dir)?;
654 fs::write(hidden_dir.join("data.log"), "details")?;
655
656 let config = SimpleIndexerConfig::new(workspace.to_path_buf()).ignore_hidden(false);
657 let mut indexer = SimpleIndexer::with_config(config);
658 indexer.init()?;
659 indexer.index_directory(workspace)?;
660
661 let results = indexer.find_files("data\\.log$")?;
662 assert_eq!(results.len(), 1);
663
664 Ok(())
665 }
666
667 #[test]
668 fn supports_custom_storage_backends() -> Result<()> {
669 #[derive(Clone, Default)]
670 struct MemoryStorage {
671 records: Arc<Mutex<Vec<FileIndex>>>,
672 }
673
674 impl MemoryStorage {
675 fn new(records: Arc<Mutex<Vec<FileIndex>>>) -> Self {
676 Self { records }
677 }
678 }
679
680 impl IndexStorage for MemoryStorage {
681 fn init(&self, _index_dir: &Path) -> Result<()> {
682 Ok(())
683 }
684
685 fn persist(&self, _index_dir: &Path, entry: &FileIndex) -> Result<()> {
686 let mut guard = self.records.lock().expect("lock poisoned");
687 guard.push(entry.clone());
688 Ok(())
689 }
690 }
691
692 let temp = tempdir()?;
693 let workspace = temp.path();
694 fs::write(workspace.join("notes.txt"), "remember this")?;
695
696 let records: Arc<Mutex<Vec<FileIndex>>> = Arc::new(Mutex::new(Vec::new()));
697 let storage = MemoryStorage::new(records.clone());
698
699 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
700 let mut indexer = SimpleIndexer::with_config(config).with_storage(Arc::new(storage));
701 indexer.init()?;
702 indexer.index_directory(workspace)?;
703
704 let entries = records.lock().expect("lock poisoned");
705 assert_eq!(entries.len(), 1);
706 assert_eq!(
707 entries[0].path,
708 workspace.join("notes.txt").to_string_lossy().into_owned()
709 );
710
711 Ok(())
712 }
713
714 #[test]
715 fn custom_filters_can_skip_files() -> Result<()> {
716 #[derive(Default)]
717 struct SkipRustFilter {
718 inner: ConfigTraversalFilter,
719 }
720
721 impl TraversalFilter for SkipRustFilter {
722 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
723 self.inner.should_descend(path, config)
724 }
725
726 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
727 if path
728 .extension()
729 .and_then(|ext| ext.to_str())
730 .is_some_and(|ext| ext.eq_ignore_ascii_case("rs"))
731 {
732 return false;
733 }
734
735 self.inner.should_index_file(path, config)
736 }
737 }
738
739 let temp = tempdir()?;
740 let workspace = temp.path();
741 fs::write(workspace.join("lib.rs"), "fn main() {}")?;
742 fs::write(workspace.join("README.md"), "# Notes")?;
743
744 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
745 let mut indexer =
746 SimpleIndexer::with_config(config).with_filter(Arc::new(SkipRustFilter::default()));
747 indexer.init()?;
748 indexer.index_directory(workspace)?;
749
750 assert!(indexer.find_files("lib\\.rs$")?.is_empty());
751 assert!(!indexer.find_files("README\\.md$")?.is_empty());
752
753 Ok(())
754 }
755}