Skip to main content

vtcode_indexer/
lib.rs

1//! Workspace-friendly file indexer extracted from VT Code.
2//!
3//! `vtcode-indexer` offers a lightweight alternative to heavyweight
4//! search/indexing stacks. It recursively walks a workspace, computes
5//! hashes, and stores per-file metadata in Markdown-friendly summaries
6//! so changes remain easy to audit in git.
7
8use anyhow::Result;
9use hashbrown::HashMap;
10use ignore::{Walk, WalkBuilder};
11use regex::Regex;
12use serde::{Deserialize, Serialize};
13use std::fs;
14use std::io::{BufWriter, ErrorKind, Write};
15use std::path::{Path, PathBuf};
16use std::sync::Arc;
17use std::time::SystemTime;
18
19/// Persistence backend for [`SimpleIndexer`].
20pub trait IndexStorage: Send + Sync {
21    /// Prepare any directories or resources required for persistence.
22    fn init(&self, index_dir: &Path) -> Result<()>;
23
24    /// Persist an indexed file entry.
25    fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()>;
26
27    /// Persist a batch of indexed file entries.
28    ///
29    /// Defaults to calling [`IndexStorage::persist`] for each entry, keeping
30    /// existing custom storage backends compatible.
31    fn persist_batch(&self, index_dir: &Path, entries: &[FileIndex]) -> Result<()> {
32        for entry in entries {
33            self.persist(index_dir, entry)?;
34        }
35        Ok(())
36    }
37}
38
39/// Directory traversal filter hook for [`SimpleIndexer`].
40pub trait TraversalFilter: Send + Sync {
41    /// Determine if the indexer should descend into the provided directory.
42    fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
43
44    /// Determine if the indexer should process the provided file.
45    fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
46}
47
48/// Markdown-backed [`IndexStorage`] implementation.
49#[derive(Debug, Default, Clone)]
50pub struct MarkdownIndexStorage;
51
52impl IndexStorage for MarkdownIndexStorage {
53    fn init(&self, index_dir: &Path) -> Result<()> {
54        fs::create_dir_all(index_dir)?;
55        Ok(())
56    }
57
58    fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()> {
59        let file_name = format!("{}.md", calculate_hash(&entry.path));
60        let index_path = index_dir.join(file_name);
61        let file = fs::File::create(index_path)?;
62        let mut writer = BufWriter::new(file);
63        writeln!(writer, "# File Index: {}", entry.path)?;
64        writeln!(writer)?;
65        write_markdown_fields(&mut writer, entry)?;
66        writer.flush()?;
67        Ok(())
68    }
69
70    fn persist_batch(&self, index_dir: &Path, entries: &[FileIndex]) -> Result<()> {
71        let temp_path = index_dir.join(".index.md.tmp");
72        let final_path = index_dir.join("index.md");
73        let file = fs::File::create(&temp_path)?;
74        let mut writer = BufWriter::new(file);
75
76        writeln!(writer, "# Workspace File Index")?;
77        writeln!(writer)?;
78        writeln!(writer, "- **Entries**: {}", entries.len())?;
79        writeln!(writer)?;
80
81        for entry in entries {
82            write_markdown_entry(&mut writer, entry)?;
83        }
84
85        writer.flush()?;
86        fs::rename(temp_path, final_path)?;
87        cleanup_legacy_markdown_entries(index_dir)?;
88        Ok(())
89    }
90}
91
92/// Default traversal filter powered by [`SimpleIndexerConfig`].
93#[derive(Debug, Default, Clone)]
94pub struct ConfigTraversalFilter;
95
96impl TraversalFilter for ConfigTraversalFilter {
97    fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
98        !should_skip_dir(path, config)
99    }
100
101    fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
102        if !path.is_file() {
103            return false;
104        }
105
106        // Skip hidden files when configured.
107        if config.ignore_hidden
108            && path
109                .file_name()
110                .and_then(|n| n.to_str())
111                .is_some_and(|s| s.starts_with('.'))
112        {
113            return false;
114        }
115
116        // Always skip known sensitive files regardless of config.
117        if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
118            let is_sensitive = matches!(
119                file_name,
120                ".env"
121                    | ".env.local"
122                    | ".env.production"
123                    | ".env.development"
124                    | ".env.test"
125                    | ".git"
126                    | ".gitignore"
127                    | ".DS_Store"
128            ) || file_name.starts_with(".env.");
129            if is_sensitive {
130                return false;
131            }
132        }
133
134        true
135    }
136}
137
138/// Configuration for [`SimpleIndexer`].
139#[derive(Clone, Debug)]
140pub struct SimpleIndexerConfig {
141    workspace_root: PathBuf,
142    index_dir: PathBuf,
143    ignore_hidden: bool,
144    excluded_dirs: Vec<PathBuf>,
145    allowed_dirs: Vec<PathBuf>,
146}
147
148impl SimpleIndexerConfig {
149    /// Builds a configuration using VT Code's legacy layout as defaults.
150    pub fn new(workspace_root: PathBuf) -> Self {
151        let index_dir = workspace_root.join(".vtcode").join("index");
152        let vtcode_dir = workspace_root.join(".vtcode");
153        let external_dir = vtcode_dir.join("external");
154
155        let mut excluded_dirs = vec![
156            index_dir.clone(),
157            vtcode_dir,
158            workspace_root.join("target"),
159            workspace_root.join("node_modules"),
160        ];
161
162        excluded_dirs.dedup();
163
164        Self {
165            workspace_root,
166            index_dir,
167            ignore_hidden: true,
168            excluded_dirs,
169            allowed_dirs: vec![external_dir],
170        }
171    }
172
173    /// Updates the index directory used for persisted metadata.
174    pub fn with_index_dir(mut self, index_dir: impl Into<PathBuf>) -> Self {
175        let index_dir = index_dir.into();
176        self.index_dir = index_dir.clone();
177        self.push_unique_excluded(index_dir);
178        self
179    }
180
181    /// Adds an allowed directory that should be indexed even if hidden or inside an excluded parent.
182    pub fn add_allowed_dir(mut self, path: impl Into<PathBuf>) -> Self {
183        let path = path.into();
184        if !self.allowed_dirs.iter().any(|existing| existing == &path) {
185            self.allowed_dirs.push(path);
186        }
187        self
188    }
189
190    /// Adds an additional excluded directory to skip during traversal.
191    pub fn add_excluded_dir(mut self, path: impl Into<PathBuf>) -> Self {
192        let path = path.into();
193        self.push_unique_excluded(path);
194        self
195    }
196
197    /// Toggles whether hidden directories (prefix `.`) are ignored.
198    pub fn ignore_hidden(mut self, ignore_hidden: bool) -> Self {
199        self.ignore_hidden = ignore_hidden;
200        self
201    }
202
203    /// Workspace root accessor.
204    pub fn workspace_root(&self) -> &Path {
205        &self.workspace_root
206    }
207
208    /// Index directory accessor.
209    pub fn index_dir(&self) -> &Path {
210        &self.index_dir
211    }
212
213    fn push_unique_excluded(&mut self, path: PathBuf) {
214        if !self.excluded_dirs.iter().any(|existing| existing == &path) {
215            self.excluded_dirs.push(path);
216        }
217    }
218}
219
220/// Simple file index entry.
221#[derive(Debug, Clone, Serialize, Deserialize)]
222pub struct FileIndex {
223    /// File path.
224    pub path: String,
225    /// File content hash for change detection.
226    pub hash: String,
227    /// Last modified timestamp.
228    pub modified: u64,
229    /// File size.
230    pub size: u64,
231    /// Language/extension.
232    pub language: String,
233    /// Simple tags.
234    pub tags: Vec<String>,
235}
236
237/// Simple search result.
238#[derive(Debug, Clone, Serialize, Deserialize)]
239pub struct SearchResult {
240    pub file_path: String,
241    pub line_number: usize,
242    pub line_content: String,
243    pub matches: Vec<String>,
244}
245
246/// Simple file indexer.
247pub struct SimpleIndexer {
248    config: SimpleIndexerConfig,
249    index_cache: HashMap<String, FileIndex>,
250    storage: Arc<dyn IndexStorage>,
251    filter: Arc<dyn TraversalFilter>,
252}
253
254impl SimpleIndexer {
255    /// Create a new simple indexer with default VT Code paths.
256    pub fn new(workspace_root: PathBuf) -> Self {
257        Self::with_components(
258            SimpleIndexerConfig::new(workspace_root),
259            Arc::new(MarkdownIndexStorage),
260            Arc::new(ConfigTraversalFilter),
261        )
262    }
263
264    /// Create a simple indexer with the provided configuration.
265    pub fn with_config(config: SimpleIndexerConfig) -> Self {
266        Self::with_components(
267            config,
268            Arc::new(MarkdownIndexStorage),
269            Arc::new(ConfigTraversalFilter),
270        )
271    }
272
273    /// Create a new simple indexer using a custom index directory.
274    pub fn with_index_dir(workspace_root: PathBuf, index_dir: PathBuf) -> Self {
275        let config = SimpleIndexerConfig::new(workspace_root).with_index_dir(index_dir);
276        Self::with_config(config)
277    }
278
279    /// Create an indexer with explicit storage and traversal filter implementations.
280    pub fn with_components(
281        config: SimpleIndexerConfig,
282        storage: Arc<dyn IndexStorage>,
283        filter: Arc<dyn TraversalFilter>,
284    ) -> Self {
285        Self {
286            config,
287            index_cache: HashMap::new(),
288            storage,
289            filter,
290        }
291    }
292
293    /// Replace the storage backend used to persist index entries.
294    pub fn with_storage(self, storage: Arc<dyn IndexStorage>) -> Self {
295        Self { storage, ..self }
296    }
297
298    /// Replace the traversal filter used to decide which files and directories are indexed.
299    pub fn with_filter(self, filter: Arc<dyn TraversalFilter>) -> Self {
300        Self { filter, ..self }
301    }
302
303    /// Initialize the index directory.
304    pub fn init(&self) -> Result<()> {
305        self.storage.init(self.config.index_dir())
306    }
307
308    /// Get the workspace root path.
309    pub fn workspace_root(&self) -> &Path {
310        self.config.workspace_root()
311    }
312
313    /// Get the index directory used for persisted metadata.
314    pub fn index_dir(&self) -> &Path {
315        self.config.index_dir()
316    }
317
318    /// Index a single file.
319    pub fn index_file(&mut self, file_path: &Path) -> Result<()> {
320        if !file_path.exists() {
321            return Ok(());
322        }
323
324        if let Some(index) = self.build_file_index(file_path)? {
325            self.index_cache.insert(index.path.clone(), index.clone());
326            self.storage.persist(self.config.index_dir(), &index)?;
327        }
328
329        Ok(())
330    }
331
332    /// Index all files in directory recursively.
333    /// Respects .gitignore, .ignore, and other ignore files.
334    /// SECURITY: Always skips hidden files and sensitive data (.env, .git, etc.)
335    pub fn index_directory(&mut self, dir_path: &Path) -> Result<()> {
336        let walker = self.build_walker(dir_path);
337
338        let mut entries = Vec::new();
339
340        for entry in walker.filter_map(|e| e.ok()) {
341            let path = entry.path();
342
343            // Only index files, not directories
344            if entry.file_type().is_some_and(|ft| ft.is_file())
345                && !self.is_excluded_path(path)
346                && let Some(index) = self.build_file_index(path)?
347            {
348                self.index_cache.insert(index.path.clone(), index.clone());
349                entries.push(index);
350            }
351        }
352
353        entries.sort_unstable_by(|left, right| left.path.cmp(&right.path));
354        self.storage
355            .persist_batch(self.config.index_dir(), &entries)?;
356
357        Ok(())
358    }
359
360    /// Discover all files in directory recursively without indexing them.
361    /// This is much faster than `index_directory` as it avoids hashing and persistence.
362    pub fn discover_files(&self, dir_path: &Path) -> Vec<String> {
363        let walker = self.build_walker(dir_path);
364
365        walker
366            .filter_map(|e| e.ok())
367            .filter(|e| {
368                if !e.file_type().is_some_and(|ft| ft.is_file()) {
369                    return false;
370                }
371                let path = e.path();
372
373                !self.is_excluded_path(path) && self.should_index_file_path(path)
374            })
375            .map(|e| e.path().to_string_lossy().into_owned())
376            .collect()
377    }
378
379    /// Internal helper for regex-based file content search.
380    /// Used by both `search()` and `grep()` to avoid code duplication.
381    fn search_files_internal(
382        &self,
383        regex: &Regex,
384        path_filter: Option<&str>,
385        extract_matches: bool,
386    ) -> Vec<SearchResult> {
387        let mut results = Vec::new();
388
389        for file_path in self.index_cache.keys() {
390            if path_filter.is_some_and(|filter| !file_path.contains(filter)) {
391                continue;
392            }
393
394            if let Ok(content) = fs::read_to_string(file_path) {
395                for (line_num, line) in content.lines().enumerate() {
396                    if regex.is_match(line) {
397                        let matches = if extract_matches {
398                            regex
399                                .find_iter(line)
400                                .map(|m| m.as_str().to_string())
401                                .collect()
402                        } else {
403                            vec![line.to_string()]
404                        };
405
406                        results.push(SearchResult {
407                            file_path: file_path.clone(),
408                            line_number: line_num + 1,
409                            line_content: line.to_string(),
410                            matches,
411                        });
412                    }
413                }
414            }
415        }
416
417        results
418    }
419
420    /// Search files using regex pattern.
421    pub fn search(&self, pattern: &str, path_filter: Option<&str>) -> Result<Vec<SearchResult>> {
422        let regex = Regex::new(pattern)?;
423        Ok(self.search_files_internal(&regex, path_filter, true))
424    }
425
426    /// Find files by name pattern.
427    pub fn find_files(&self, pattern: &str) -> Result<Vec<String>> {
428        let regex = Regex::new(pattern)?;
429        let mut results = Vec::new();
430
431        for file_path in self.index_cache.keys() {
432            if regex.is_match(file_path) {
433                results.push(file_path.clone());
434            }
435        }
436
437        Ok(results)
438    }
439
440    /// Get all indexed files without pattern matching.
441    /// This is more efficient than using find_files(".*").
442    pub fn all_files(&self) -> Vec<String> {
443        self.index_cache.keys().cloned().collect()
444    }
445
446    /// Get file content with line numbers.
447    pub fn get_file_content(
448        &self,
449        file_path: &str,
450        start_line: Option<usize>,
451        end_line: Option<usize>,
452    ) -> Result<String> {
453        let content = fs::read_to_string(file_path)?;
454        let lines: Vec<&str> = content.lines().collect();
455
456        let start = start_line.unwrap_or(1).saturating_sub(1);
457        let end = end_line.unwrap_or(lines.len());
458
459        let selected_lines = &lines[start..end.min(lines.len())];
460
461        let mut result = String::new();
462        for (i, line) in selected_lines.iter().enumerate() {
463            result.push_str(&format!("{}: {}\n", start + i + 1, line));
464        }
465
466        Ok(result)
467    }
468
469    /// List files in directory (like ls).
470    pub fn list_files(&self, dir_path: &str, show_hidden: bool) -> Result<Vec<String>> {
471        let path = Path::new(dir_path);
472        if !path.exists() {
473            return Ok(vec![]);
474        }
475
476        let mut files = Vec::new();
477
478        for entry in fs::read_dir(path)? {
479            let entry = entry?;
480            let file_name = entry.file_name().to_string_lossy().into_owned();
481
482            if !show_hidden && file_name.starts_with('.') {
483                continue;
484            }
485
486            files.push(file_name);
487        }
488
489        Ok(files)
490    }
491
492    /// Grep-like search (like grep command).
493    pub fn grep(&self, pattern: &str, file_pattern: Option<&str>) -> Result<Vec<SearchResult>> {
494        let regex = Regex::new(pattern)?;
495        Ok(self.search_files_internal(&regex, file_pattern, false))
496    }
497
498    #[allow(dead_code)]
499    fn walk_directory<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
500    where
501        F: FnMut(&Path) -> Result<()>,
502    {
503        if !dir_path.exists() {
504            return Ok(());
505        }
506
507        self.walk_directory_internal(dir_path, callback)
508    }
509
510    #[allow(dead_code)]
511    fn walk_directory_internal<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
512    where
513        F: FnMut(&Path) -> Result<()>,
514    {
515        for entry in fs::read_dir(dir_path)? {
516            let entry = entry?;
517            let path = entry.path();
518
519            if path.is_dir() {
520                if self.is_allowed_dir(&path) {
521                    self.walk_directory_internal(&path, callback)?;
522                    continue;
523                }
524
525                if !self.filter.should_descend(&path, &self.config) {
526                    self.walk_allowed_descendants(&path, callback)?;
527                    continue;
528                }
529
530                self.walk_directory_internal(&path, callback)?;
531            } else if path.is_file() {
532                callback(&path)?;
533            }
534        }
535
536        Ok(())
537    }
538
539    #[allow(dead_code)]
540    fn is_allowed_dir(&self, path: &Path) -> bool {
541        self.config
542            .allowed_dirs
543            .iter()
544            .any(|allowed| path.starts_with(allowed))
545    }
546
547    #[allow(dead_code)]
548    fn walk_allowed_descendants<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
549    where
550        F: FnMut(&Path) -> Result<()>,
551    {
552        let allowed_dirs = self.config.allowed_dirs.clone();
553        for allowed in allowed_dirs {
554            if allowed.starts_with(dir_path) && allowed.exists() {
555                self.walk_directory_internal(&allowed, callback)?;
556            }
557        }
558        Ok(())
559    }
560
561    #[inline]
562    fn get_modified_time(&self, file_path: &Path) -> Result<u64> {
563        let metadata = fs::metadata(file_path)?;
564        let modified = metadata.modified()?;
565        Ok(modified.duration_since(SystemTime::UNIX_EPOCH)?.as_secs())
566    }
567
568    #[inline]
569    fn detect_language(&self, file_path: &Path) -> String {
570        file_path
571            .extension()
572            .and_then(|ext| ext.to_str())
573            .unwrap_or("unknown")
574            .to_string()
575    }
576
577    fn build_file_index(&self, file_path: &Path) -> Result<Option<FileIndex>> {
578        if !self.should_index_file_path(file_path) {
579            return Ok(None);
580        }
581
582        let content = match fs::read_to_string(file_path) {
583            Ok(text) => text,
584            Err(err) => {
585                if err.kind() == ErrorKind::InvalidData {
586                    return Ok(None);
587                }
588                return Err(err.into());
589            }
590        };
591
592        let index = FileIndex {
593            path: file_path.to_string_lossy().into_owned(),
594            hash: calculate_hash(&content),
595            modified: self.get_modified_time(file_path)?,
596            size: content.len() as u64,
597            language: self.detect_language(file_path),
598            tags: vec![],
599        };
600
601        Ok(Some(index))
602    }
603
604    #[inline]
605    fn is_excluded_path(&self, path: &Path) -> bool {
606        self.config
607            .excluded_dirs
608            .iter()
609            .any(|excluded| path.starts_with(excluded))
610    }
611
612    #[inline]
613    fn should_index_file_path(&self, path: &Path) -> bool {
614        self.filter.should_index_file(path, &self.config)
615    }
616
617    fn build_walker(&self, dir_path: &Path) -> Walk {
618        WalkBuilder::new(dir_path)
619            .hidden(self.config.ignore_hidden)
620            .git_ignore(true)
621            .git_global(true)
622            .git_exclude(true)
623            .ignore(true)
624            .parents(true)
625            .build()
626    }
627}
628
629impl Clone for SimpleIndexer {
630    fn clone(&self) -> Self {
631        Self {
632            config: self.config.clone(),
633            index_cache: self.index_cache.clone(),
634            storage: self.storage.clone(),
635            filter: self.filter.clone(),
636        }
637    }
638}
639
640fn should_skip_dir(path: &Path, config: &SimpleIndexerConfig) -> bool {
641    if config
642        .allowed_dirs
643        .iter()
644        .any(|allowed| path.starts_with(allowed))
645    {
646        return false;
647    }
648
649    if config
650        .excluded_dirs
651        .iter()
652        .any(|excluded| path.starts_with(excluded))
653    {
654        return true;
655    }
656
657    if config.ignore_hidden
658        && path
659            .file_name()
660            .and_then(|name| name.to_str())
661            .is_some_and(|name_str| name_str.starts_with('.'))
662    {
663        return true;
664    }
665
666    false
667}
668
669#[inline]
670fn calculate_hash(content: &str) -> String {
671    vtcode_commons::utils::calculate_sha256(content.as_bytes())
672}
673
674fn write_markdown_entry(writer: &mut impl Write, entry: &FileIndex) -> std::io::Result<()> {
675    writeln!(writer, "## {}", entry.path)?;
676    writeln!(writer)?;
677    write_markdown_fields(writer, entry)?;
678    writeln!(writer)?;
679    Ok(())
680}
681
682fn write_markdown_fields(writer: &mut impl Write, entry: &FileIndex) -> std::io::Result<()> {
683    writeln!(writer, "- **Path**: {}", entry.path)?;
684    writeln!(writer, "- **Hash**: {}", entry.hash)?;
685    writeln!(writer, "- **Modified**: {}", entry.modified)?;
686    writeln!(writer, "- **Size**: {} bytes", entry.size)?;
687    writeln!(writer, "- **Language**: {}", entry.language)?;
688    writeln!(writer, "- **Tags**: {}", entry.tags.join(", "))?;
689    Ok(())
690}
691
692fn cleanup_legacy_markdown_entries(index_dir: &Path) -> Result<()> {
693    for entry in fs::read_dir(index_dir)? {
694        let entry = entry?;
695        let file_name = entry.file_name();
696        let file_name = file_name.to_string_lossy();
697        if is_legacy_markdown_entry_name(file_name.as_ref()) {
698            fs::remove_file(entry.path())?;
699        }
700    }
701    Ok(())
702}
703
704#[inline]
705fn is_legacy_markdown_entry_name(file_name: &str) -> bool {
706    let Some(hash_part) = file_name.strip_suffix(".md") else {
707        return false;
708    };
709    hash_part.len() == 64 && hash_part.bytes().all(|byte| byte.is_ascii_hexdigit())
710}
711
712#[cfg(test)]
713mod tests {
714    use super::*;
715    use std::fs;
716    use std::sync::{Arc, Mutex};
717    use tempfile::tempdir;
718
719    #[test]
720    fn skips_hidden_directories_by_default() -> Result<()> {
721        let temp = tempdir()?;
722        let workspace = temp.path();
723        let hidden_dir = workspace.join(".private");
724        fs::create_dir_all(&hidden_dir)?;
725        fs::write(hidden_dir.join("secret.txt"), "classified")?;
726
727        let visible_dir = workspace.join("src");
728        fs::create_dir_all(&visible_dir)?;
729        fs::write(visible_dir.join("lib.rs"), "fn main() {}")?;
730
731        let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
732        indexer.init()?;
733        indexer.index_directory(workspace)?;
734
735        assert!(indexer.find_files("secret\\.txt$")?.is_empty());
736        assert!(!indexer.find_files("lib\\.rs$")?.is_empty());
737
738        Ok(())
739    }
740
741    #[test]
742    fn can_include_hidden_directories_when_configured() -> Result<()> {
743        let temp = tempdir()?;
744        let workspace = temp.path();
745        let hidden_dir = workspace.join(".cache");
746        fs::create_dir_all(&hidden_dir)?;
747        fs::write(hidden_dir.join("data.log"), "details")?;
748
749        let config = SimpleIndexerConfig::new(workspace.to_path_buf()).ignore_hidden(false);
750        let mut indexer = SimpleIndexer::with_config(config);
751        indexer.init()?;
752        indexer.index_directory(workspace)?;
753
754        let results = indexer.find_files("data\\.log$")?;
755        assert_eq!(results.len(), 1);
756
757        Ok(())
758    }
759
760    #[test]
761    fn supports_custom_storage_backends() -> Result<()> {
762        #[derive(Clone, Default)]
763        struct MemoryStorage {
764            records: Arc<Mutex<Vec<FileIndex>>>,
765        }
766
767        impl MemoryStorage {
768            fn new(records: Arc<Mutex<Vec<FileIndex>>>) -> Self {
769                Self { records }
770            }
771        }
772
773        impl IndexStorage for MemoryStorage {
774            fn init(&self, _index_dir: &Path) -> Result<()> {
775                Ok(())
776            }
777
778            fn persist(&self, _index_dir: &Path, entry: &FileIndex) -> Result<()> {
779                let mut guard = self.records.lock().expect("lock poisoned");
780                guard.push(entry.clone());
781                Ok(())
782            }
783        }
784
785        let temp = tempdir()?;
786        let workspace = temp.path();
787        fs::write(workspace.join("notes.txt"), "remember this")?;
788
789        let records: Arc<Mutex<Vec<FileIndex>>> = Arc::new(Mutex::new(Vec::new()));
790        let storage = MemoryStorage::new(records.clone());
791
792        let config = SimpleIndexerConfig::new(workspace.to_path_buf());
793        let mut indexer = SimpleIndexer::with_config(config).with_storage(Arc::new(storage));
794        indexer.init()?;
795        indexer.index_directory(workspace)?;
796
797        let entries = records.lock().expect("lock poisoned");
798        assert_eq!(entries.len(), 1);
799        assert_eq!(
800            entries[0].path,
801            workspace.join("notes.txt").to_string_lossy().into_owned()
802        );
803
804        Ok(())
805    }
806
807    #[test]
808    fn custom_filters_can_skip_files() -> Result<()> {
809        #[derive(Default)]
810        struct SkipRustFilter {
811            inner: ConfigTraversalFilter,
812        }
813
814        impl TraversalFilter for SkipRustFilter {
815            fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
816                self.inner.should_descend(path, config)
817            }
818
819            fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
820                if path
821                    .extension()
822                    .and_then(|ext| ext.to_str())
823                    .is_some_and(|ext| ext.eq_ignore_ascii_case("rs"))
824                {
825                    return false;
826                }
827
828                self.inner.should_index_file(path, config)
829            }
830        }
831
832        let temp = tempdir()?;
833        let workspace = temp.path();
834        fs::write(workspace.join("lib.rs"), "fn main() {}")?;
835        fs::write(workspace.join("README.md"), "# Notes")?;
836
837        let config = SimpleIndexerConfig::new(workspace.to_path_buf());
838        let mut indexer =
839            SimpleIndexer::with_config(config).with_filter(Arc::new(SkipRustFilter::default()));
840        indexer.init()?;
841        indexer.index_directory(workspace)?;
842
843        assert!(indexer.find_files("lib\\.rs$")?.is_empty());
844        assert!(!indexer.find_files("README\\.md$")?.is_empty());
845
846        Ok(())
847    }
848
849    #[test]
850    fn batch_indexing_writes_single_markdown_file() -> Result<()> {
851        let temp = tempdir()?;
852        let workspace = temp.path();
853        fs::write(workspace.join("lib.rs"), "fn main() {}")?;
854        fs::write(workspace.join("README.md"), "# Notes")?;
855
856        let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
857        indexer.init()?;
858        indexer.index_directory(workspace)?;
859
860        let index_dir = workspace.join(".vtcode").join("index");
861        let files = fs::read_dir(&index_dir)?
862            .filter_map(|entry| entry.ok())
863            .map(|entry| entry.file_name().to_string_lossy().into_owned())
864            .collect::<Vec<_>>();
865        assert_eq!(files, vec!["index.md".to_string()]);
866
867        let index_content = fs::read_to_string(index_dir.join("index.md"))?;
868        assert!(index_content.contains(workspace.join("lib.rs").to_string_lossy().as_ref()));
869        assert!(index_content.contains(workspace.join("README.md").to_string_lossy().as_ref()));
870
871        Ok(())
872    }
873
874    #[test]
875    fn batch_indexing_removes_legacy_hashed_entries() -> Result<()> {
876        let temp = tempdir()?;
877        let workspace = temp.path();
878        fs::write(workspace.join("lib.rs"), "fn main() {}")?;
879
880        let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
881        indexer.init()?;
882
883        let legacy_file_name = format!("{}.md", calculate_hash("legacy-path"));
884        let legacy_file_path = workspace
885            .join(".vtcode")
886            .join("index")
887            .join(&legacy_file_name);
888        fs::write(&legacy_file_path, "# legacy")?;
889        assert!(legacy_file_path.exists());
890
891        indexer.index_directory(workspace)?;
892
893        assert!(!legacy_file_path.exists());
894        let files = fs::read_dir(workspace.join(".vtcode").join("index"))?
895            .filter_map(|entry| entry.ok())
896            .map(|entry| entry.file_name().to_string_lossy().into_owned())
897            .collect::<Vec<_>>();
898        assert_eq!(files, vec!["index.md".to_string()]);
899
900        Ok(())
901    }
902}