1use anyhow::Result;
9use ignore::WalkBuilder;
10use regex::Regex;
11use serde::{Deserialize, Serialize};
12use std::collections::HashMap;
13use std::fs;
14use std::io::ErrorKind;
15use std::path::{Path, PathBuf};
16use std::sync::Arc;
17use std::time::SystemTime;
18
19pub trait IndexStorage: Send + Sync {
21 fn init(&self, index_dir: &Path) -> Result<()>;
23
24 fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()>;
26}
27
28pub trait TraversalFilter: Send + Sync {
30 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
32
33 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
35}
36
37#[derive(Debug, Default, Clone)]
39pub struct MarkdownIndexStorage;
40
41impl IndexStorage for MarkdownIndexStorage {
42 fn init(&self, index_dir: &Path) -> Result<()> {
43 fs::create_dir_all(index_dir)?;
44 Ok(())
45 }
46
47 fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()> {
48 let file_name = format!("{}.md", calculate_hash(&entry.path));
49 let index_path = index_dir.join(file_name);
50
51 let markdown = format!(
52 "# File Index: {}\n\n\
53 - **Path**: {}\n\
54 - **Hash**: {}\n\
55 - **Modified**: {}\n\
56 - **Size**: {} bytes\n\
57 - **Language**: {}\n\
58 - **Tags**: {}\n\n",
59 entry.path,
60 entry.path,
61 entry.hash,
62 entry.modified,
63 entry.size,
64 entry.language,
65 entry.tags.join(", ")
66 );
67
68 fs::write(index_path, markdown)?;
69 Ok(())
70 }
71}
72
73#[derive(Debug, Default, Clone)]
75pub struct ConfigTraversalFilter;
76
77impl TraversalFilter for ConfigTraversalFilter {
78 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
79 !should_skip_dir(path, config)
80 }
81
82 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
83 if !path.is_file() {
84 return false;
85 }
86
87 if config.ignore_hidden {
89 if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
90 if file_name.starts_with('.') {
91 return false;
92 }
93 }
94 }
95
96 if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
98 let sensitive_files = [
99 ".env",
100 ".env.local",
101 ".env.production",
102 ".env.development",
103 ".env.test",
104 ".git",
105 ".gitignore",
106 ".DS_Store",
107 ];
108
109 if sensitive_files
110 .iter()
111 .any(|s| file_name == *s || file_name.starts_with(".env."))
112 {
113 return false;
114 }
115 }
116
117 true
118 }
119}
120
121#[derive(Clone, Debug)]
123pub struct SimpleIndexerConfig {
124 workspace_root: PathBuf,
125 index_dir: PathBuf,
126 ignore_hidden: bool,
127 excluded_dirs: Vec<PathBuf>,
128 allowed_dirs: Vec<PathBuf>,
129}
130
131impl SimpleIndexerConfig {
132 pub fn new(workspace_root: PathBuf) -> Self {
134 let index_dir = workspace_root.join(".vtcode").join("index");
135 let vtcode_dir = workspace_root.join(".vtcode");
136 let external_dir = vtcode_dir.join("external");
137
138 let mut excluded_dirs = vec![
139 index_dir.clone(),
140 vtcode_dir,
141 workspace_root.join("target"),
142 workspace_root.join("node_modules"),
143 ];
144
145 excluded_dirs.dedup();
146
147 Self {
148 workspace_root,
149 index_dir,
150 ignore_hidden: true,
151 excluded_dirs,
152 allowed_dirs: vec![external_dir],
153 }
154 }
155
156 pub fn with_index_dir(mut self, index_dir: impl Into<PathBuf>) -> Self {
158 let index_dir = index_dir.into();
159 self.index_dir = index_dir.clone();
160 self.push_unique_excluded(index_dir);
161 self
162 }
163
164 pub fn add_allowed_dir(mut self, path: impl Into<PathBuf>) -> Self {
166 let path = path.into();
167 if !self.allowed_dirs.iter().any(|existing| existing == &path) {
168 self.allowed_dirs.push(path);
169 }
170 self
171 }
172
173 pub fn add_excluded_dir(mut self, path: impl Into<PathBuf>) -> Self {
175 let path = path.into();
176 self.push_unique_excluded(path);
177 self
178 }
179
180 pub fn ignore_hidden(mut self, ignore_hidden: bool) -> Self {
182 self.ignore_hidden = ignore_hidden;
183 self
184 }
185
186 pub fn workspace_root(&self) -> &Path {
188 &self.workspace_root
189 }
190
191 pub fn index_dir(&self) -> &Path {
193 &self.index_dir
194 }
195
196 fn push_unique_excluded(&mut self, path: PathBuf) {
197 if !self.excluded_dirs.iter().any(|existing| existing == &path) {
198 self.excluded_dirs.push(path);
199 }
200 }
201}
202
203#[derive(Debug, Clone, Serialize, Deserialize)]
205pub struct FileIndex {
206 pub path: String,
208 pub hash: String,
210 pub modified: u64,
212 pub size: u64,
214 pub language: String,
216 pub tags: Vec<String>,
218}
219
220#[derive(Debug, Clone, Serialize, Deserialize)]
222pub struct SearchResult {
223 pub file_path: String,
224 pub line_number: usize,
225 pub line_content: String,
226 pub matches: Vec<String>,
227}
228
229pub struct SimpleIndexer {
231 config: SimpleIndexerConfig,
232 index_cache: HashMap<String, FileIndex>,
233 storage: Arc<dyn IndexStorage>,
234 filter: Arc<dyn TraversalFilter>,
235}
236
237impl SimpleIndexer {
238 pub fn new(workspace_root: PathBuf) -> Self {
240 Self::with_components(
241 SimpleIndexerConfig::new(workspace_root),
242 Arc::new(MarkdownIndexStorage),
243 Arc::new(ConfigTraversalFilter),
244 )
245 }
246
247 pub fn with_config(config: SimpleIndexerConfig) -> Self {
249 Self::with_components(
250 config,
251 Arc::new(MarkdownIndexStorage),
252 Arc::new(ConfigTraversalFilter),
253 )
254 }
255
256 pub fn with_index_dir(workspace_root: PathBuf, index_dir: PathBuf) -> Self {
258 let config = SimpleIndexerConfig::new(workspace_root).with_index_dir(index_dir);
259 Self::with_config(config)
260 }
261
262 pub fn with_components(
264 config: SimpleIndexerConfig,
265 storage: Arc<dyn IndexStorage>,
266 filter: Arc<dyn TraversalFilter>,
267 ) -> Self {
268 Self {
269 config,
270 index_cache: HashMap::new(),
271 storage,
272 filter,
273 }
274 }
275
276 pub fn with_storage(self, storage: Arc<dyn IndexStorage>) -> Self {
278 Self { storage, ..self }
279 }
280
281 pub fn with_filter(self, filter: Arc<dyn TraversalFilter>) -> Self {
283 Self { filter, ..self }
284 }
285
286 pub fn init(&self) -> Result<()> {
288 self.storage.init(self.config.index_dir())
289 }
290
291 pub fn workspace_root(&self) -> &Path {
293 self.config.workspace_root()
294 }
295
296 pub fn index_dir(&self) -> &Path {
298 self.config.index_dir()
299 }
300
301 pub fn index_file(&mut self, file_path: &Path) -> Result<()> {
303 if !file_path.exists() || !self.filter.should_index_file(file_path, &self.config) {
304 return Ok(());
305 }
306
307 let content = match fs::read_to_string(file_path) {
308 Ok(text) => text,
309 Err(err) => {
310 if err.kind() == ErrorKind::InvalidData {
311 return Ok(());
312 }
313 return Err(err.into());
314 }
315 };
316 let hash = calculate_hash(&content);
317 let modified = self.get_modified_time(file_path)?;
318 let size = content.len() as u64;
319 let language = self.detect_language(file_path);
320
321 let index = FileIndex {
322 path: file_path.to_string_lossy().to_string(),
323 hash,
324 modified,
325 size,
326 language,
327 tags: vec![],
328 };
329
330 self.index_cache
331 .insert(file_path.to_string_lossy().to_string(), index.clone());
332
333 self.storage.persist(self.config.index_dir(), &index)?;
334
335 Ok(())
336 }
337
338 pub fn index_directory(&mut self, dir_path: &Path) -> Result<()> {
342 let walker = WalkBuilder::new(dir_path)
343 .hidden(true) .git_ignore(true) .git_global(true) .git_exclude(true) .ignore(true) .parents(true) .build();
350
351 for entry in walker.filter_map(|e| e.ok()) {
352 let path = entry.path();
353
354 if entry.file_type().map_or(false, |ft| ft.is_file()) {
356 let should_skip = self
358 .config
359 .excluded_dirs
360 .iter()
361 .any(|excluded| path.starts_with(excluded));
362
363 if !should_skip && self.filter.should_index_file(path, &self.config) {
364 self.index_file(path)?;
365 }
366 }
367 }
368
369 Ok(())
370 }
371
372 pub fn search(&self, pattern: &str, path_filter: Option<&str>) -> Result<Vec<SearchResult>> {
374 let regex = Regex::new(pattern)?;
375
376 let mut results = Vec::new();
377
378 for file_path in self.index_cache.keys() {
380 if path_filter.is_some_and(|filter| !file_path.contains(filter)) {
381 continue;
382 }
383
384 if let Ok(content) = fs::read_to_string(file_path) {
385 for (line_num, line) in content.lines().enumerate() {
386 if regex.is_match(line) {
387 let matches: Vec<String> = regex
388 .find_iter(line)
389 .map(|m| m.as_str().to_string())
390 .collect();
391
392 results.push(SearchResult {
393 file_path: file_path.clone(),
394 line_number: line_num + 1,
395 line_content: line.to_string(),
396 matches,
397 });
398 }
399 }
400 }
401 }
402
403 Ok(results)
404 }
405
406 pub fn find_files(&self, pattern: &str) -> Result<Vec<String>> {
408 let regex = Regex::new(pattern)?;
409 let mut results = Vec::new();
410
411 for file_path in self.index_cache.keys() {
412 if regex.is_match(file_path) {
413 results.push(file_path.clone());
414 }
415 }
416
417 Ok(results)
418 }
419
420 pub fn all_files(&self) -> Vec<String> {
423 self.index_cache.keys().cloned().collect()
424 }
425
426 pub fn get_file_content(
428 &self,
429 file_path: &str,
430 start_line: Option<usize>,
431 end_line: Option<usize>,
432 ) -> Result<String> {
433 let content = fs::read_to_string(file_path)?;
434 let lines: Vec<&str> = content.lines().collect();
435
436 let start = start_line.unwrap_or(1).saturating_sub(1);
437 let end = end_line.unwrap_or(lines.len());
438
439 let selected_lines = &lines[start..end.min(lines.len())];
440
441 let mut result = String::new();
442 for (i, line) in selected_lines.iter().enumerate() {
443 result.push_str(&format!("{}: {}\n", start + i + 1, line));
444 }
445
446 Ok(result)
447 }
448
449 pub fn list_files(&self, dir_path: &str, show_hidden: bool) -> Result<Vec<String>> {
451 let path = Path::new(dir_path);
452 if !path.exists() {
453 return Ok(vec![]);
454 }
455
456 let mut files = Vec::new();
457
458 for entry in fs::read_dir(path)? {
459 let entry = entry?;
460 let file_name = entry.file_name().to_string_lossy().to_string();
461
462 if !show_hidden && file_name.starts_with('.') {
463 continue;
464 }
465
466 files.push(file_name);
467 }
468
469 Ok(files)
470 }
471
472 pub fn grep(&self, pattern: &str, file_pattern: Option<&str>) -> Result<Vec<SearchResult>> {
474 let regex = Regex::new(pattern)?;
475 let mut results = Vec::new();
476
477 for file_path in self.index_cache.keys() {
478 if file_pattern.is_some_and(|fp| !file_path.contains(fp)) {
479 continue;
480 }
481
482 if let Ok(content) = fs::read_to_string(file_path) {
483 for (line_num, line) in content.lines().enumerate() {
484 if regex.is_match(line) {
485 results.push(SearchResult {
486 file_path: file_path.clone(),
487 line_number: line_num + 1,
488 line_content: line.to_string(),
489 matches: vec![line.to_string()],
490 });
491 }
492 }
493 }
494 }
495
496 Ok(results)
497 }
498
499 #[allow(dead_code)]
500 fn walk_directory<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
501 where
502 F: FnMut(&Path) -> Result<()>,
503 {
504 if !dir_path.exists() {
505 return Ok(());
506 }
507
508 self.walk_directory_internal(dir_path, callback)
509 }
510
511 #[allow(dead_code)]
512 fn walk_directory_internal<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
513 where
514 F: FnMut(&Path) -> Result<()>,
515 {
516 for entry in fs::read_dir(dir_path)? {
517 let entry = entry?;
518 let path = entry.path();
519
520 if path.is_dir() {
521 if self.is_allowed_dir(&path) {
522 self.walk_directory_internal(&path, callback)?;
523 continue;
524 }
525
526 if !self.filter.should_descend(&path, &self.config) {
527 self.walk_allowed_descendants(&path, callback)?;
528 continue;
529 }
530
531 self.walk_directory_internal(&path, callback)?;
532 } else if path.is_file() {
533 callback(&path)?;
534 }
535 }
536
537 Ok(())
538 }
539
540 #[allow(dead_code)]
541 fn is_allowed_dir(&self, path: &Path) -> bool {
542 self.config
543 .allowed_dirs
544 .iter()
545 .any(|allowed| path.starts_with(allowed))
546 }
547
548 #[allow(dead_code)]
549 fn walk_allowed_descendants<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
550 where
551 F: FnMut(&Path) -> Result<()>,
552 {
553 let allowed_dirs = self.config.allowed_dirs.clone();
554 for allowed in allowed_dirs {
555 if allowed.starts_with(dir_path) && allowed.exists() {
556 self.walk_directory_internal(&allowed, callback)?;
557 }
558 }
559 Ok(())
560 }
561
562 fn get_modified_time(&self, file_path: &Path) -> Result<u64> {
563 let metadata = fs::metadata(file_path)?;
564 let modified = metadata.modified()?;
565 Ok(modified.duration_since(SystemTime::UNIX_EPOCH)?.as_secs())
566 }
567
568 fn detect_language(&self, file_path: &Path) -> String {
569 file_path
570 .extension()
571 .and_then(|ext| ext.to_str())
572 .unwrap_or("unknown")
573 .to_string()
574 }
575}
576
577impl Clone for SimpleIndexer {
578 fn clone(&self) -> Self {
579 Self {
580 config: self.config.clone(),
581 index_cache: self.index_cache.clone(),
582 storage: self.storage.clone(),
583 filter: self.filter.clone(),
584 }
585 }
586}
587
588fn should_skip_dir(path: &Path, config: &SimpleIndexerConfig) -> bool {
589 if config
590 .allowed_dirs
591 .iter()
592 .any(|allowed| path.starts_with(allowed))
593 {
594 return false;
595 }
596
597 if config
598 .excluded_dirs
599 .iter()
600 .any(|excluded| path.starts_with(excluded))
601 {
602 return true;
603 }
604
605 if config.ignore_hidden
606 && path
607 .file_name()
608 .and_then(|name| name.to_str())
609 .is_some_and(|name_str| name_str.starts_with('.'))
610 {
611 return true;
612 }
613
614 false
615}
616
617fn calculate_hash(content: &str) -> String {
618 use std::collections::hash_map::DefaultHasher;
619 use std::hash::{Hash, Hasher};
620
621 let mut hasher = DefaultHasher::new();
622 content.hash(&mut hasher);
623 format!("{:x}", hasher.finish())
624}
625
626#[cfg(test)]
627mod tests {
628 use super::*;
629 use std::fs;
630 use std::sync::{Arc, Mutex};
631 use tempfile::tempdir;
632
633 #[test]
634 fn skips_hidden_directories_by_default() -> Result<()> {
635 let temp = tempdir()?;
636 let workspace = temp.path();
637 let hidden_dir = workspace.join(".private");
638 fs::create_dir_all(&hidden_dir)?;
639 fs::write(hidden_dir.join("secret.txt"), "classified")?;
640
641 let visible_dir = workspace.join("src");
642 fs::create_dir_all(&visible_dir)?;
643 fs::write(visible_dir.join("lib.rs"), "fn main() {}")?;
644
645 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
646 indexer.init()?;
647 indexer.index_directory(workspace)?;
648
649 assert!(indexer.find_files("secret\\.txt$")?.is_empty());
650 assert!(!indexer.find_files("lib\\.rs$")?.is_empty());
651
652 Ok(())
653 }
654
655 #[test]
656 fn can_include_hidden_directories_when_configured() -> Result<()> {
657 let temp = tempdir()?;
658 let workspace = temp.path();
659 let hidden_dir = workspace.join(".cache");
660 fs::create_dir_all(&hidden_dir)?;
661 fs::write(hidden_dir.join("data.log"), "details")?;
662
663 let config = SimpleIndexerConfig::new(workspace.to_path_buf()).ignore_hidden(false);
664 let mut indexer = SimpleIndexer::with_config(config);
665 indexer.init()?;
666 indexer.index_directory(workspace)?;
667
668 let results = indexer.find_files("data\\.log$")?;
669 assert_eq!(results.len(), 1);
670
671 Ok(())
672 }
673
674 #[test]
675 fn supports_custom_storage_backends() -> Result<()> {
676 #[derive(Clone, Default)]
677 struct MemoryStorage {
678 records: Arc<Mutex<Vec<FileIndex>>>,
679 }
680
681 impl MemoryStorage {
682 fn new(records: Arc<Mutex<Vec<FileIndex>>>) -> Self {
683 Self { records }
684 }
685 }
686
687 impl IndexStorage for MemoryStorage {
688 fn init(&self, _index_dir: &Path) -> Result<()> {
689 Ok(())
690 }
691
692 fn persist(&self, _index_dir: &Path, entry: &FileIndex) -> Result<()> {
693 let mut guard = self.records.lock().expect("lock poisoned");
694 guard.push(entry.clone());
695 Ok(())
696 }
697 }
698
699 let temp = tempdir()?;
700 let workspace = temp.path();
701 fs::write(workspace.join("notes.txt"), "remember this")?;
702
703 let records: Arc<Mutex<Vec<FileIndex>>> = Arc::new(Mutex::new(Vec::new()));
704 let storage = MemoryStorage::new(records.clone());
705
706 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
707 let mut indexer = SimpleIndexer::with_config(config).with_storage(Arc::new(storage));
708 indexer.init()?;
709 indexer.index_directory(workspace)?;
710
711 let entries = records.lock().expect("lock poisoned");
712 assert_eq!(entries.len(), 1);
713 assert_eq!(
714 entries[0].path,
715 workspace.join("notes.txt").to_string_lossy().to_string()
716 );
717
718 Ok(())
719 }
720
721 #[test]
722 fn custom_filters_can_skip_files() -> Result<()> {
723 #[derive(Default)]
724 struct SkipRustFilter {
725 inner: ConfigTraversalFilter,
726 }
727
728 impl TraversalFilter for SkipRustFilter {
729 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
730 self.inner.should_descend(path, config)
731 }
732
733 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
734 if path
735 .extension()
736 .and_then(|ext| ext.to_str())
737 .is_some_and(|ext| ext.eq_ignore_ascii_case("rs"))
738 {
739 return false;
740 }
741
742 self.inner.should_index_file(path, config)
743 }
744 }
745
746 let temp = tempdir()?;
747 let workspace = temp.path();
748 fs::write(workspace.join("lib.rs"), "fn main() {}")?;
749 fs::write(workspace.join("README.md"), "# Notes")?;
750
751 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
752 let mut indexer =
753 SimpleIndexer::with_config(config).with_filter(Arc::new(SkipRustFilter::default()));
754 indexer.init()?;
755 indexer.index_directory(workspace)?;
756
757 assert!(indexer.find_files("lib\\.rs$")?.is_empty());
758 assert!(!indexer.find_files("README\\.md$")?.is_empty());
759
760 Ok(())
761 }
762}