1use anyhow::Result;
9use regex::Regex;
10use serde::{Deserialize, Serialize};
11use std::collections::HashMap;
12use std::fs;
13use std::io::ErrorKind;
14use std::path::{Path, PathBuf};
15use std::sync::Arc;
16use std::time::SystemTime;
17
18pub trait IndexStorage: Send + Sync {
20 fn init(&self, index_dir: &Path) -> Result<()>;
22
23 fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()>;
25}
26
27pub trait TraversalFilter: Send + Sync {
29 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
31
32 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
34}
35
36#[derive(Debug, Default, Clone)]
38pub struct MarkdownIndexStorage;
39
40impl IndexStorage for MarkdownIndexStorage {
41 fn init(&self, index_dir: &Path) -> Result<()> {
42 fs::create_dir_all(index_dir)?;
43 Ok(())
44 }
45
46 fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()> {
47 let file_name = format!("{}.md", calculate_hash(&entry.path));
48 let index_path = index_dir.join(file_name);
49
50 let markdown = format!(
51 "# File Index: {}\n\n\
52 - **Path**: {}\n\
53 - **Hash**: {}\n\
54 - **Modified**: {}\n\
55 - **Size**: {} bytes\n\
56 - **Language**: {}\n\
57 - **Tags**: {}\n\n",
58 entry.path,
59 entry.path,
60 entry.hash,
61 entry.modified,
62 entry.size,
63 entry.language,
64 entry.tags.join(", ")
65 );
66
67 fs::write(index_path, markdown)?;
68 Ok(())
69 }
70}
71
72#[derive(Debug, Default, Clone)]
74pub struct ConfigTraversalFilter;
75
76impl TraversalFilter for ConfigTraversalFilter {
77 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
78 !should_skip_dir(path, config)
79 }
80
81 fn should_index_file(&self, path: &Path, _config: &SimpleIndexerConfig) -> bool {
82 path.is_file()
83 }
84}
85
86#[derive(Clone, Debug)]
88pub struct SimpleIndexerConfig {
89 workspace_root: PathBuf,
90 index_dir: PathBuf,
91 ignore_hidden: bool,
92 excluded_dirs: Vec<PathBuf>,
93 allowed_dirs: Vec<PathBuf>,
94}
95
96impl SimpleIndexerConfig {
97 pub fn new(workspace_root: PathBuf) -> Self {
99 let index_dir = workspace_root.join(".vtcode").join("index");
100 let vtcode_dir = workspace_root.join(".vtcode");
101 let external_dir = vtcode_dir.join("external");
102
103 let mut excluded_dirs = vec![
104 index_dir.clone(),
105 vtcode_dir,
106 workspace_root.join("target"),
107 workspace_root.join("node_modules"),
108 ];
109
110 excluded_dirs.dedup();
111
112 Self {
113 workspace_root,
114 index_dir,
115 ignore_hidden: true,
116 excluded_dirs,
117 allowed_dirs: vec![external_dir],
118 }
119 }
120
121 pub fn with_index_dir(mut self, index_dir: impl Into<PathBuf>) -> Self {
123 let index_dir = index_dir.into();
124 self.index_dir = index_dir.clone();
125 self.push_unique_excluded(index_dir);
126 self
127 }
128
129 pub fn add_allowed_dir(mut self, path: impl Into<PathBuf>) -> Self {
131 let path = path.into();
132 if !self.allowed_dirs.iter().any(|existing| existing == &path) {
133 self.allowed_dirs.push(path);
134 }
135 self
136 }
137
138 pub fn add_excluded_dir(mut self, path: impl Into<PathBuf>) -> Self {
140 let path = path.into();
141 self.push_unique_excluded(path);
142 self
143 }
144
145 pub fn ignore_hidden(mut self, ignore_hidden: bool) -> Self {
147 self.ignore_hidden = ignore_hidden;
148 self
149 }
150
151 pub fn workspace_root(&self) -> &Path {
153 &self.workspace_root
154 }
155
156 pub fn index_dir(&self) -> &Path {
158 &self.index_dir
159 }
160
161 fn push_unique_excluded(&mut self, path: PathBuf) {
162 if !self.excluded_dirs.iter().any(|existing| existing == &path) {
163 self.excluded_dirs.push(path);
164 }
165 }
166}
167
168#[derive(Debug, Clone, Serialize, Deserialize)]
170pub struct FileIndex {
171 pub path: String,
173 pub hash: String,
175 pub modified: u64,
177 pub size: u64,
179 pub language: String,
181 pub tags: Vec<String>,
183}
184
185#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct SearchResult {
188 pub file_path: String,
189 pub line_number: usize,
190 pub line_content: String,
191 pub matches: Vec<String>,
192}
193
194pub struct SimpleIndexer {
196 config: SimpleIndexerConfig,
197 index_cache: HashMap<String, FileIndex>,
198 storage: Arc<dyn IndexStorage>,
199 filter: Arc<dyn TraversalFilter>,
200}
201
202impl SimpleIndexer {
203 pub fn new(workspace_root: PathBuf) -> Self {
205 Self::with_components(
206 SimpleIndexerConfig::new(workspace_root),
207 Arc::new(MarkdownIndexStorage),
208 Arc::new(ConfigTraversalFilter),
209 )
210 }
211
212 pub fn with_config(config: SimpleIndexerConfig) -> Self {
214 Self::with_components(
215 config,
216 Arc::new(MarkdownIndexStorage),
217 Arc::new(ConfigTraversalFilter),
218 )
219 }
220
221 pub fn with_index_dir(workspace_root: PathBuf, index_dir: PathBuf) -> Self {
223 let config = SimpleIndexerConfig::new(workspace_root).with_index_dir(index_dir);
224 Self::with_config(config)
225 }
226
227 pub fn with_components(
229 config: SimpleIndexerConfig,
230 storage: Arc<dyn IndexStorage>,
231 filter: Arc<dyn TraversalFilter>,
232 ) -> Self {
233 Self {
234 config,
235 index_cache: HashMap::new(),
236 storage,
237 filter,
238 }
239 }
240
241 pub fn with_storage(self, storage: Arc<dyn IndexStorage>) -> Self {
243 Self { storage, ..self }
244 }
245
246 pub fn with_filter(self, filter: Arc<dyn TraversalFilter>) -> Self {
248 Self { filter, ..self }
249 }
250
251 pub fn init(&self) -> Result<()> {
253 self.storage.init(self.config.index_dir())
254 }
255
256 pub fn workspace_root(&self) -> &Path {
258 self.config.workspace_root()
259 }
260
261 pub fn index_dir(&self) -> &Path {
263 self.config.index_dir()
264 }
265
266 pub fn index_file(&mut self, file_path: &Path) -> Result<()> {
268 if !file_path.exists() || !self.filter.should_index_file(file_path, &self.config) {
269 return Ok(());
270 }
271
272 let content = match fs::read_to_string(file_path) {
273 Ok(text) => text,
274 Err(err) => {
275 if err.kind() == ErrorKind::InvalidData {
276 return Ok(());
277 }
278 return Err(err.into());
279 }
280 };
281 let hash = calculate_hash(&content);
282 let modified = self.get_modified_time(file_path)?;
283 let size = content.len() as u64;
284 let language = self.detect_language(file_path);
285
286 let index = FileIndex {
287 path: file_path.to_string_lossy().to_string(),
288 hash,
289 modified,
290 size,
291 language,
292 tags: vec![],
293 };
294
295 self.index_cache
296 .insert(file_path.to_string_lossy().to_string(), index.clone());
297
298 self.storage.persist(self.config.index_dir(), &index)?;
299
300 Ok(())
301 }
302
303 pub fn index_directory(&mut self, dir_path: &Path) -> Result<()> {
305 let mut file_paths = Vec::new();
306
307 self.walk_directory(dir_path, &mut |file_path| {
309 file_paths.push(file_path.to_path_buf());
310 Ok(())
311 })?;
312
313 for file_path in file_paths {
315 self.index_file(&file_path)?;
316 }
317
318 Ok(())
319 }
320
321 pub fn search(&self, pattern: &str, path_filter: Option<&str>) -> Result<Vec<SearchResult>> {
323 let regex = Regex::new(pattern)?;
324
325 let mut results = Vec::new();
326
327 for file_path in self.index_cache.keys() {
329 if path_filter.is_some_and(|filter| !file_path.contains(filter)) {
330 continue;
331 }
332
333 if let Ok(content) = fs::read_to_string(file_path) {
334 for (line_num, line) in content.lines().enumerate() {
335 if regex.is_match(line) {
336 let matches: Vec<String> = regex
337 .find_iter(line)
338 .map(|m| m.as_str().to_string())
339 .collect();
340
341 results.push(SearchResult {
342 file_path: file_path.clone(),
343 line_number: line_num + 1,
344 line_content: line.to_string(),
345 matches,
346 });
347 }
348 }
349 }
350 }
351
352 Ok(results)
353 }
354
355 pub fn find_files(&self, pattern: &str) -> Result<Vec<String>> {
357 let regex = Regex::new(pattern)?;
358 let mut results = Vec::new();
359
360 for file_path in self.index_cache.keys() {
361 if regex.is_match(file_path) {
362 results.push(file_path.clone());
363 }
364 }
365
366 Ok(results)
367 }
368
369 pub fn get_file_content(
371 &self,
372 file_path: &str,
373 start_line: Option<usize>,
374 end_line: Option<usize>,
375 ) -> Result<String> {
376 let content = fs::read_to_string(file_path)?;
377 let lines: Vec<&str> = content.lines().collect();
378
379 let start = start_line.unwrap_or(1).saturating_sub(1);
380 let end = end_line.unwrap_or(lines.len());
381
382 let selected_lines = &lines[start..end.min(lines.len())];
383
384 let mut result = String::new();
385 for (i, line) in selected_lines.iter().enumerate() {
386 result.push_str(&format!("{}: {}\n", start + i + 1, line));
387 }
388
389 Ok(result)
390 }
391
392 pub fn list_files(&self, dir_path: &str, show_hidden: bool) -> Result<Vec<String>> {
394 let path = Path::new(dir_path);
395 if !path.exists() {
396 return Ok(vec![]);
397 }
398
399 let mut files = Vec::new();
400
401 for entry in fs::read_dir(path)? {
402 let entry = entry?;
403 let file_name = entry.file_name().to_string_lossy().to_string();
404
405 if !show_hidden && file_name.starts_with('.') {
406 continue;
407 }
408
409 files.push(file_name);
410 }
411
412 Ok(files)
413 }
414
415 pub fn grep(&self, pattern: &str, file_pattern: Option<&str>) -> Result<Vec<SearchResult>> {
417 let regex = Regex::new(pattern)?;
418 let mut results = Vec::new();
419
420 for file_path in self.index_cache.keys() {
421 if file_pattern.is_some_and(|fp| !file_path.contains(fp)) {
422 continue;
423 }
424
425 if let Ok(content) = fs::read_to_string(file_path) {
426 for (line_num, line) in content.lines().enumerate() {
427 if regex.is_match(line) {
428 results.push(SearchResult {
429 file_path: file_path.clone(),
430 line_number: line_num + 1,
431 line_content: line.to_string(),
432 matches: vec![line.to_string()],
433 });
434 }
435 }
436 }
437 }
438
439 Ok(results)
440 }
441
442 fn walk_directory<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
443 where
444 F: FnMut(&Path) -> Result<()>,
445 {
446 if !dir_path.exists() {
447 return Ok(());
448 }
449
450 self.walk_directory_internal(dir_path, callback)
451 }
452
453 fn walk_directory_internal<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
454 where
455 F: FnMut(&Path) -> Result<()>,
456 {
457 for entry in fs::read_dir(dir_path)? {
458 let entry = entry?;
459 let path = entry.path();
460
461 if path.is_dir() {
462 if self.is_allowed_dir(&path) {
463 self.walk_directory_internal(&path, callback)?;
464 continue;
465 }
466
467 if !self.filter.should_descend(&path, &self.config) {
468 self.walk_allowed_descendants(&path, callback)?;
469 continue;
470 }
471
472 self.walk_directory_internal(&path, callback)?;
473 } else if path.is_file() {
474 callback(&path)?;
475 }
476 }
477
478 Ok(())
479 }
480
481 fn is_allowed_dir(&self, path: &Path) -> bool {
482 self.config
483 .allowed_dirs
484 .iter()
485 .any(|allowed| path.starts_with(allowed))
486 }
487
488 fn walk_allowed_descendants<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
489 where
490 F: FnMut(&Path) -> Result<()>,
491 {
492 let allowed_dirs = self.config.allowed_dirs.clone();
493 for allowed in allowed_dirs {
494 if allowed.starts_with(dir_path) && allowed.exists() {
495 self.walk_directory_internal(&allowed, callback)?;
496 }
497 }
498 Ok(())
499 }
500
501 fn get_modified_time(&self, file_path: &Path) -> Result<u64> {
502 let metadata = fs::metadata(file_path)?;
503 let modified = metadata.modified()?;
504 Ok(modified.duration_since(SystemTime::UNIX_EPOCH)?.as_secs())
505 }
506
507 fn detect_language(&self, file_path: &Path) -> String {
508 file_path
509 .extension()
510 .and_then(|ext| ext.to_str())
511 .unwrap_or("unknown")
512 .to_string()
513 }
514}
515
516impl Clone for SimpleIndexer {
517 fn clone(&self) -> Self {
518 Self {
519 config: self.config.clone(),
520 index_cache: self.index_cache.clone(),
521 storage: self.storage.clone(),
522 filter: self.filter.clone(),
523 }
524 }
525}
526
527fn should_skip_dir(path: &Path, config: &SimpleIndexerConfig) -> bool {
528 if config
529 .allowed_dirs
530 .iter()
531 .any(|allowed| path.starts_with(allowed))
532 {
533 return false;
534 }
535
536 if config
537 .excluded_dirs
538 .iter()
539 .any(|excluded| path.starts_with(excluded))
540 {
541 return true;
542 }
543
544 if config.ignore_hidden
545 && path
546 .file_name()
547 .and_then(|name| name.to_str())
548 .is_some_and(|name_str| name_str.starts_with('.'))
549 {
550 return true;
551 }
552
553 false
554}
555
556fn calculate_hash(content: &str) -> String {
557 use std::collections::hash_map::DefaultHasher;
558 use std::hash::{Hash, Hasher};
559
560 let mut hasher = DefaultHasher::new();
561 content.hash(&mut hasher);
562 format!("{:x}", hasher.finish())
563}
564
565#[cfg(test)]
566mod tests {
567 use super::*;
568 use std::fs;
569 use std::sync::{Arc, Mutex};
570 use tempfile::tempdir;
571
572 #[test]
573 fn skips_hidden_directories_by_default() -> Result<()> {
574 let temp = tempdir()?;
575 let workspace = temp.path();
576 let hidden_dir = workspace.join(".private");
577 fs::create_dir_all(&hidden_dir)?;
578 fs::write(hidden_dir.join("secret.txt"), "classified")?;
579
580 let visible_dir = workspace.join("src");
581 fs::create_dir_all(&visible_dir)?;
582 fs::write(visible_dir.join("lib.rs"), "fn main() {}")?;
583
584 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
585 indexer.init()?;
586 indexer.index_directory(workspace)?;
587
588 assert!(indexer.find_files("secret\\.txt$")?.is_empty());
589 assert!(!indexer.find_files("lib\\.rs$")?.is_empty());
590
591 Ok(())
592 }
593
594 #[test]
595 fn can_include_hidden_directories_when_configured() -> Result<()> {
596 let temp = tempdir()?;
597 let workspace = temp.path();
598 let hidden_dir = workspace.join(".cache");
599 fs::create_dir_all(&hidden_dir)?;
600 fs::write(hidden_dir.join("data.log"), "details")?;
601
602 let config = SimpleIndexerConfig::new(workspace.to_path_buf()).ignore_hidden(false);
603 let mut indexer = SimpleIndexer::with_config(config);
604 indexer.init()?;
605 indexer.index_directory(workspace)?;
606
607 let results = indexer.find_files("data\\.log$")?;
608 assert_eq!(results.len(), 1);
609
610 Ok(())
611 }
612
613 #[test]
614 fn supports_custom_storage_backends() -> Result<()> {
615 #[derive(Clone, Default)]
616 struct MemoryStorage {
617 records: Arc<Mutex<Vec<FileIndex>>>,
618 }
619
620 impl MemoryStorage {
621 fn new(records: Arc<Mutex<Vec<FileIndex>>>) -> Self {
622 Self { records }
623 }
624 }
625
626 impl IndexStorage for MemoryStorage {
627 fn init(&self, _index_dir: &Path) -> Result<()> {
628 Ok(())
629 }
630
631 fn persist(&self, _index_dir: &Path, entry: &FileIndex) -> Result<()> {
632 let mut guard = self.records.lock().expect("lock poisoned");
633 guard.push(entry.clone());
634 Ok(())
635 }
636 }
637
638 let temp = tempdir()?;
639 let workspace = temp.path();
640 fs::write(workspace.join("notes.txt"), "remember this")?;
641
642 let records: Arc<Mutex<Vec<FileIndex>>> = Arc::new(Mutex::new(Vec::new()));
643 let storage = MemoryStorage::new(records.clone());
644
645 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
646 let mut indexer = SimpleIndexer::with_config(config).with_storage(Arc::new(storage));
647 indexer.init()?;
648 indexer.index_directory(workspace)?;
649
650 let entries = records.lock().expect("lock poisoned");
651 assert_eq!(entries.len(), 1);
652 assert_eq!(
653 entries[0].path,
654 workspace.join("notes.txt").to_string_lossy().to_string()
655 );
656
657 Ok(())
658 }
659
660 #[test]
661 fn custom_filters_can_skip_files() -> Result<()> {
662 #[derive(Default)]
663 struct SkipRustFilter {
664 inner: ConfigTraversalFilter,
665 }
666
667 impl TraversalFilter for SkipRustFilter {
668 fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
669 self.inner.should_descend(path, config)
670 }
671
672 fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
673 if path
674 .extension()
675 .and_then(|ext| ext.to_str())
676 .is_some_and(|ext| ext.eq_ignore_ascii_case("rs"))
677 {
678 return false;
679 }
680
681 self.inner.should_index_file(path, config)
682 }
683 }
684
685 let temp = tempdir()?;
686 let workspace = temp.path();
687 fs::write(workspace.join("lib.rs"), "fn main() {}")?;
688 fs::write(workspace.join("README.md"), "# Notes")?;
689
690 let config = SimpleIndexerConfig::new(workspace.to_path_buf());
691 let mut indexer =
692 SimpleIndexer::with_config(config).with_filter(Arc::new(SkipRustFilter::default()));
693 indexer.init()?;
694 indexer.index_directory(workspace)?;
695
696 assert!(indexer.find_files("lib\\.rs$")?.is_empty());
697 assert!(!indexer.find_files("README\\.md$")?.is_empty());
698
699 Ok(())
700 }
701}