Skip to main content

vtcode_indexer/
lib.rs

1//! Workspace-friendly file indexer extracted from VT Code.
2//!
3//! `vtcode-indexer` offers a lightweight alternative to heavyweight
4//! search/indexing stacks. It recursively walks a workspace, computes
5//! hashes, and stores per-file metadata in Markdown-friendly summaries
6//! so changes remain easy to audit in git.
7
8use anyhow::Result;
9use hashbrown::HashMap;
10use ignore::{DirEntry, Walk, WalkBuilder};
11use regex::Regex;
12use serde::{Deserialize, Serialize};
13use std::fmt::Write as FmtWrite;
14use std::fs;
15use std::io::{BufWriter, ErrorKind, Write};
16use std::path::{Path, PathBuf};
17use std::sync::Arc;
18use std::time::SystemTime;
19
20/// Persistence backend for [`SimpleIndexer`].
21pub trait IndexStorage: Send + Sync {
22    /// Prepare any directories or resources required for persistence.
23    fn init(&self, index_dir: &Path) -> Result<()>;
24
25    /// Persist an indexed file entry.
26    fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()>;
27
28    /// Whether this backend expects full-snapshot persistence.
29    ///
30    /// Snapshot-aware backends receive the complete in-memory index on each
31    /// update so on-disk state stays consistent across single-file and
32    /// directory indexing flows.
33    fn prefers_snapshot_persistence(&self) -> bool {
34        false
35    }
36
37    /// Remove a previously persisted file entry.
38    ///
39    /// Defaults to a no-op to keep existing custom storage backends compatible.
40    fn remove(&self, _index_dir: &Path, _file_path: &Path) -> Result<()> {
41        Ok(())
42    }
43
44    /// Persist a batch of indexed file entries.
45    ///
46    /// Defaults to calling [`IndexStorage::persist`] for each entry, keeping
47    /// existing custom storage backends compatible.
48    fn persist_batch(&self, index_dir: &Path, entries: &[FileIndex]) -> Result<()> {
49        for entry in entries {
50            self.persist(index_dir, entry)?;
51        }
52        Ok(())
53    }
54
55    /// Persist a batch of indexed file entries borrowed from the in-memory cache.
56    ///
57    /// Defaults to cloning the borrowed entries and delegating to
58    /// [`IndexStorage::persist_batch`] so existing custom storage backends remain
59    /// compatible.
60    fn persist_batch_refs(&self, index_dir: &Path, entries: &[&FileIndex]) -> Result<()> {
61        let owned = entries
62            .iter()
63            .map(|entry| (*entry).clone())
64            .collect::<Vec<_>>();
65        self.persist_batch(index_dir, &owned)
66    }
67}
68
69/// Directory traversal filter hook for [`SimpleIndexer`].
70pub trait TraversalFilter: Send + Sync {
71    /// Determine if the indexer should descend into the provided directory.
72    fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
73
74    /// Determine if the indexer should process the provided file.
75    fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
76}
77
78/// Markdown-backed [`IndexStorage`] implementation.
79#[derive(Debug, Default, Clone)]
80pub struct MarkdownIndexStorage;
81
82impl IndexStorage for MarkdownIndexStorage {
83    fn init(&self, index_dir: &Path) -> Result<()> {
84        fs::create_dir_all(index_dir)?;
85        Ok(())
86    }
87
88    fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()> {
89        fs::create_dir_all(index_dir)?;
90        let file_name = format!("{}.md", calculate_hash(&entry.path));
91        let index_path = index_dir.join(file_name);
92        let file = fs::File::create(index_path)?;
93        let mut writer = BufWriter::new(file);
94        writeln!(writer, "# File Index: {}", entry.path)?;
95        writeln!(writer)?;
96        write_markdown_fields(&mut writer, entry)?;
97        writer.flush()?;
98        Ok(())
99    }
100
101    fn prefers_snapshot_persistence(&self) -> bool {
102        true
103    }
104
105    fn remove(&self, index_dir: &Path, file_path: &Path) -> Result<()> {
106        let file_name = format!(
107            "{}.md",
108            calculate_hash(file_path.to_string_lossy().as_ref())
109        );
110        let index_path = index_dir.join(file_name);
111        match fs::remove_file(index_path) {
112            Ok(()) => Ok(()),
113            Err(err) if err.kind() == ErrorKind::NotFound => Ok(()),
114            Err(err) => Err(err.into()),
115        }
116    }
117
118    fn persist_batch(&self, index_dir: &Path, entries: &[FileIndex]) -> Result<()> {
119        persist_markdown_snapshot(index_dir, entries.iter())
120    }
121
122    fn persist_batch_refs(&self, index_dir: &Path, entries: &[&FileIndex]) -> Result<()> {
123        persist_markdown_snapshot(index_dir, entries.iter().copied())
124    }
125}
126
127fn persist_markdown_snapshot<'a>(
128    index_dir: &Path,
129    entries: impl IntoIterator<Item = &'a FileIndex>,
130) -> Result<()> {
131    let entries = entries.into_iter().collect::<Vec<_>>();
132
133    fs::create_dir_all(index_dir)?;
134    let temp_path = index_dir.join(".index.md.tmp");
135    let final_path = index_dir.join("index.md");
136    let file = fs::File::create(&temp_path)?;
137    let mut writer = BufWriter::new(file);
138
139    writeln!(writer, "# Workspace File Index")?;
140    writeln!(writer)?;
141    writeln!(writer, "- **Entries**: {}", entries.len())?;
142    writeln!(writer)?;
143
144    for entry in entries {
145        write_markdown_entry(&mut writer, entry)?;
146    }
147
148    writer.flush()?;
149    fs::rename(temp_path, final_path)?;
150    cleanup_legacy_markdown_entries(index_dir)?;
151    Ok(())
152}
153
154/// Default traversal filter powered by [`SimpleIndexerConfig`].
155#[derive(Debug, Default, Clone)]
156pub struct ConfigTraversalFilter;
157
158impl TraversalFilter for ConfigTraversalFilter {
159    fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
160        !should_skip_dir(path, config)
161    }
162
163    fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
164        if !path.is_file() {
165            return false;
166        }
167
168        // Skip hidden files when configured.
169        if config.ignore_hidden
170            && path
171                .file_name()
172                .and_then(|n| n.to_str())
173                .is_some_and(|s| s.starts_with('.'))
174        {
175            return false;
176        }
177
178        // Always skip known sensitive files regardless of config.
179        if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
180            let is_sensitive = matches!(
181                file_name,
182                ".env"
183                    | ".env.local"
184                    | ".env.production"
185                    | ".env.development"
186                    | ".env.test"
187                    | ".git"
188                    | ".gitignore"
189                    | ".DS_Store"
190            ) || file_name.starts_with(".env.");
191            if is_sensitive {
192                return false;
193            }
194        }
195
196        true
197    }
198}
199
200/// Configuration for [`SimpleIndexer`].
201#[derive(Clone, Debug)]
202pub struct SimpleIndexerConfig {
203    workspace_root: PathBuf,
204    index_dir: PathBuf,
205    ignore_hidden: bool,
206    excluded_dirs: Vec<PathBuf>,
207    allowed_dirs: Vec<PathBuf>,
208}
209
210impl SimpleIndexerConfig {
211    /// Builds a configuration using VT Code's legacy layout as defaults.
212    pub fn new(workspace_root: PathBuf) -> Self {
213        let index_dir = workspace_root.join(".vtcode").join("index");
214        let vtcode_dir = workspace_root.join(".vtcode");
215        let external_dir = vtcode_dir.join("external");
216
217        let mut excluded_dirs = vec![
218            index_dir.clone(),
219            vtcode_dir,
220            workspace_root.join("target"),
221            workspace_root.join("node_modules"),
222        ];
223
224        excluded_dirs.dedup();
225
226        Self {
227            workspace_root,
228            index_dir,
229            ignore_hidden: true,
230            excluded_dirs,
231            allowed_dirs: vec![external_dir],
232        }
233    }
234
235    /// Updates the index directory used for persisted metadata.
236    pub fn with_index_dir(mut self, index_dir: impl Into<PathBuf>) -> Self {
237        let index_dir = index_dir.into();
238        self.index_dir = index_dir.clone();
239        self.push_unique_excluded(index_dir);
240        self
241    }
242
243    /// Adds an allowed directory that should be indexed even if hidden or inside an excluded parent.
244    pub fn add_allowed_dir(mut self, path: impl Into<PathBuf>) -> Self {
245        let path = path.into();
246        if !self.allowed_dirs.iter().any(|existing| existing == &path) {
247            self.allowed_dirs.push(path);
248        }
249        self
250    }
251
252    /// Adds an additional excluded directory to skip during traversal.
253    pub fn add_excluded_dir(mut self, path: impl Into<PathBuf>) -> Self {
254        let path = path.into();
255        self.push_unique_excluded(path);
256        self
257    }
258
259    /// Toggles whether hidden directories (prefix `.`) are ignored.
260    pub fn ignore_hidden(mut self, ignore_hidden: bool) -> Self {
261        self.ignore_hidden = ignore_hidden;
262        self
263    }
264
265    /// Workspace root accessor.
266    pub fn workspace_root(&self) -> &Path {
267        &self.workspace_root
268    }
269
270    /// Index directory accessor.
271    pub fn index_dir(&self) -> &Path {
272        &self.index_dir
273    }
274
275    fn push_unique_excluded(&mut self, path: PathBuf) {
276        if !self.excluded_dirs.iter().any(|existing| existing == &path) {
277            self.excluded_dirs.push(path);
278        }
279    }
280}
281
282/// Simple file index entry.
283#[derive(Debug, Clone, Serialize, Deserialize)]
284pub struct FileIndex {
285    /// File path.
286    pub path: String,
287    /// File content hash for change detection.
288    pub hash: String,
289    /// Last modified timestamp.
290    pub modified: u64,
291    /// File size.
292    pub size: u64,
293    /// Language/extension.
294    pub language: String,
295    /// Simple tags.
296    pub tags: Vec<String>,
297}
298
299/// Simple search result.
300#[derive(Debug, Clone, Serialize, Deserialize)]
301pub struct SearchResult {
302    pub file_path: String,
303    pub line_number: usize,
304    pub line_content: String,
305    pub matches: Vec<String>,
306}
307
308/// Simple file indexer.
309pub struct SimpleIndexer {
310    config: SimpleIndexerConfig,
311    index_cache: HashMap<String, FileIndex>,
312    storage: Arc<dyn IndexStorage>,
313    filter: Arc<dyn TraversalFilter>,
314}
315
316impl SimpleIndexer {
317    /// Create a new simple indexer with default VT Code paths.
318    pub fn new(workspace_root: PathBuf) -> Self {
319        Self::with_components(
320            SimpleIndexerConfig::new(workspace_root),
321            Arc::new(MarkdownIndexStorage),
322            Arc::new(ConfigTraversalFilter),
323        )
324    }
325
326    /// Create a simple indexer with the provided configuration.
327    pub fn with_config(config: SimpleIndexerConfig) -> Self {
328        Self::with_components(
329            config,
330            Arc::new(MarkdownIndexStorage),
331            Arc::new(ConfigTraversalFilter),
332        )
333    }
334
335    /// Create a new simple indexer using a custom index directory.
336    pub fn with_index_dir(workspace_root: PathBuf, index_dir: PathBuf) -> Self {
337        let config = SimpleIndexerConfig::new(workspace_root).with_index_dir(index_dir);
338        Self::with_config(config)
339    }
340
341    /// Create an indexer with explicit storage and traversal filter implementations.
342    pub fn with_components(
343        config: SimpleIndexerConfig,
344        storage: Arc<dyn IndexStorage>,
345        filter: Arc<dyn TraversalFilter>,
346    ) -> Self {
347        Self {
348            config,
349            index_cache: HashMap::new(),
350            storage,
351            filter,
352        }
353    }
354
355    /// Replace the storage backend used to persist index entries.
356    pub fn with_storage(self, storage: Arc<dyn IndexStorage>) -> Self {
357        Self { storage, ..self }
358    }
359
360    /// Replace the traversal filter used to decide which files and directories are indexed.
361    pub fn with_filter(self, filter: Arc<dyn TraversalFilter>) -> Self {
362        Self { filter, ..self }
363    }
364
365    /// Initialize the index directory.
366    pub fn init(&self) -> Result<()> {
367        self.storage.init(self.config.index_dir())
368    }
369
370    /// Get the workspace root path.
371    pub fn workspace_root(&self) -> &Path {
372        self.config.workspace_root()
373    }
374
375    /// Get the index directory used for persisted metadata.
376    pub fn index_dir(&self) -> &Path {
377        self.config.index_dir()
378    }
379
380    /// Index a single file.
381    pub fn index_file(&mut self, file_path: &Path) -> Result<()> {
382        let cache_key = file_path.to_string_lossy().into_owned();
383
384        if self.storage.prefers_snapshot_persistence() {
385            let next_entry = if file_path.exists() && self.should_process_file_path(file_path) {
386                self.build_file_index(file_path)?
387            } else {
388                None
389            };
390
391            self.apply_snapshot_file_update(cache_key, next_entry)?;
392            return Ok(());
393        }
394
395        if !file_path.exists() || !self.should_process_file_path(file_path) {
396            self.index_cache.remove(cache_key.as_str());
397            self.storage.remove(self.config.index_dir(), file_path)?;
398            return Ok(());
399        }
400
401        if let Some(index) = self.build_file_index(file_path)? {
402            self.storage.persist(self.config.index_dir(), &index)?;
403            self.index_cache.insert(index.path.clone(), index);
404        } else {
405            self.index_cache.remove(cache_key.as_str());
406            self.storage.remove(self.config.index_dir(), file_path)?;
407        }
408
409        Ok(())
410    }
411
412    /// Index all files in directory recursively.
413    /// Respects .gitignore, .ignore, and other ignore files.
414    /// SECURITY: Always skips hidden files and sensitive data (.env, .git, etc.)
415    pub fn index_directory(&mut self, dir_path: &Path) -> Result<()> {
416        let walker = self.build_walker(dir_path);
417
418        let mut entries = Vec::new();
419
420        for entry in walker.filter_map(|e| e.ok()) {
421            let path = entry.path();
422
423            // Only index files, not directories
424            if entry.file_type().is_some_and(|ft| ft.is_file())
425                && let Some(index) = self.build_file_index(path)?
426            {
427                entries.push(index);
428            }
429        }
430
431        if self.storage.prefers_snapshot_persistence() {
432            self.apply_snapshot_directory_update(dir_path, &entries)?;
433        } else {
434            entries.sort_unstable_by(|left, right| left.path.cmp(&right.path));
435            self.storage
436                .persist_batch(self.config.index_dir(), &entries)?;
437        }
438
439        self.replace_cached_entries(dir_path, &entries);
440
441        Ok(())
442    }
443
444    /// Discover all files in directory recursively without indexing them.
445    /// This is much faster than `index_directory` as it avoids hashing and persistence.
446    pub fn discover_files(&self, dir_path: &Path) -> Vec<String> {
447        let walker = self.build_walker(dir_path);
448
449        let mut files = walker
450            .filter_map(|e| e.ok())
451            .filter(|e| {
452                if !e.file_type().is_some_and(|ft| ft.is_file()) {
453                    return false;
454                }
455
456                self.should_process_file_path(e.path())
457            })
458            .map(|e| e.path().to_string_lossy().into_owned())
459            .collect::<Vec<_>>();
460        files.sort_unstable();
461        files
462    }
463
464    /// Internal helper for regex-based file content search.
465    /// Used by both `search()` and `grep()` to avoid code duplication.
466    fn search_files_internal(
467        &self,
468        regex: &Regex,
469        path_filter: Option<&str>,
470        extract_matches: bool,
471    ) -> Vec<SearchResult> {
472        let mut results = Vec::new();
473
474        for file_path in self.index_cache.keys() {
475            if path_filter.is_some_and(|filter| !file_path.contains(filter)) {
476                continue;
477            }
478
479            if let Ok(content) = fs::read_to_string(file_path) {
480                for (line_num, line) in content.lines().enumerate() {
481                    if regex.is_match(line) {
482                        let matches = if extract_matches {
483                            regex
484                                .find_iter(line)
485                                .map(|m| m.as_str().to_string())
486                                .collect()
487                        } else {
488                            vec![line.to_string()]
489                        };
490
491                        results.push(SearchResult {
492                            file_path: file_path.clone(),
493                            line_number: line_num + 1,
494                            line_content: line.to_string(),
495                            matches,
496                        });
497                    }
498                }
499            }
500        }
501
502        results.sort_unstable_by(|left, right| {
503            left.file_path
504                .cmp(&right.file_path)
505                .then_with(|| left.line_number.cmp(&right.line_number))
506        });
507        results
508    }
509
510    /// Search files using regex pattern.
511    pub fn search(&self, pattern: &str, path_filter: Option<&str>) -> Result<Vec<SearchResult>> {
512        let regex = Regex::new(pattern)?;
513        Ok(self.search_files_internal(&regex, path_filter, true))
514    }
515
516    /// Find files by name pattern.
517    pub fn find_files(&self, pattern: &str) -> Result<Vec<String>> {
518        let regex = Regex::new(pattern)?;
519        let mut results = Vec::new();
520
521        for file_path in self.index_cache.keys() {
522            if regex.is_match(file_path) {
523                results.push(file_path.clone());
524            }
525        }
526
527        results.sort_unstable();
528        Ok(results)
529    }
530
531    /// Get all indexed files without pattern matching.
532    /// This is more efficient than using find_files(".*").
533    pub fn all_files(&self) -> Vec<String> {
534        let mut files = self.index_cache.keys().cloned().collect::<Vec<_>>();
535        files.sort_unstable();
536        files
537    }
538
539    /// Get file content with line numbers.
540    pub fn get_file_content(
541        &self,
542        file_path: &str,
543        start_line: Option<usize>,
544        end_line: Option<usize>,
545    ) -> Result<String> {
546        let content = fs::read_to_string(file_path)?;
547        let start = start_line.unwrap_or(1).max(1);
548        let end = end_line.unwrap_or(usize::MAX);
549
550        if start > end {
551            return Ok(String::new());
552        }
553
554        let mut result = String::new();
555        for (line_number, line) in content.lines().enumerate() {
556            let line_number = line_number + 1;
557            if line_number < start {
558                continue;
559            }
560            if line_number > end {
561                break;
562            }
563            writeln!(&mut result, "{line_number}: {line}")?;
564        }
565
566        Ok(result)
567    }
568
569    /// List files in directory (like ls).
570    pub fn list_files(&self, dir_path: &str, show_hidden: bool) -> Result<Vec<String>> {
571        let path = Path::new(dir_path);
572        if !path.exists() {
573            return Ok(vec![]);
574        }
575
576        let mut files = Vec::new();
577
578        for entry in fs::read_dir(path)? {
579            let entry = entry?;
580            let file_name = entry.file_name().to_string_lossy().into_owned();
581
582            if !show_hidden && file_name.starts_with('.') {
583                continue;
584            }
585
586            files.push(file_name);
587        }
588
589        files.sort_unstable();
590        Ok(files)
591    }
592
593    /// Grep-like search (like grep command).
594    pub fn grep(&self, pattern: &str, file_pattern: Option<&str>) -> Result<Vec<SearchResult>> {
595        let regex = Regex::new(pattern)?;
596        Ok(self.search_files_internal(&regex, file_pattern, false))
597    }
598
599    #[allow(dead_code)]
600    fn walk_directory<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
601    where
602        F: FnMut(&Path) -> Result<()>,
603    {
604        if !dir_path.exists() {
605            return Ok(());
606        }
607
608        self.walk_directory_internal(dir_path, callback)
609    }
610
611    #[allow(dead_code)]
612    fn walk_directory_internal<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
613    where
614        F: FnMut(&Path) -> Result<()>,
615    {
616        for entry in fs::read_dir(dir_path)? {
617            let entry = entry?;
618            let path = entry.path();
619
620            if path.is_dir() {
621                if self.is_allowed_path(&path) {
622                    self.walk_directory_internal(&path, callback)?;
623                    continue;
624                }
625
626                if !self.filter.should_descend(&path, &self.config) {
627                    self.walk_allowed_descendants(&path, callback)?;
628                    continue;
629                }
630
631                self.walk_directory_internal(&path, callback)?;
632            } else if path.is_file() {
633                callback(&path)?;
634            }
635        }
636
637        Ok(())
638    }
639
640    #[allow(dead_code)]
641    fn is_allowed_path(&self, path: &Path) -> bool {
642        self.config
643            .allowed_dirs
644            .iter()
645            .any(|allowed| path.starts_with(allowed))
646    }
647
648    #[allow(dead_code)]
649    fn walk_allowed_descendants<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
650    where
651        F: FnMut(&Path) -> Result<()>,
652    {
653        let allowed_dirs = self.config.allowed_dirs.clone();
654        for allowed in allowed_dirs {
655            if allowed.starts_with(dir_path) && allowed.exists() {
656                self.walk_directory_internal(&allowed, callback)?;
657            }
658        }
659        Ok(())
660    }
661
662    #[inline]
663    fn get_modified_time(&self, file_path: &Path) -> Result<u64> {
664        let metadata = fs::metadata(file_path)?;
665        let modified = metadata.modified()?;
666        Ok(modified.duration_since(SystemTime::UNIX_EPOCH)?.as_secs())
667    }
668
669    #[inline]
670    fn detect_language(&self, file_path: &Path) -> String {
671        file_path
672            .extension()
673            .and_then(|ext| ext.to_str())
674            .unwrap_or("unknown")
675            .to_string()
676    }
677
678    fn build_file_index(&self, file_path: &Path) -> Result<Option<FileIndex>> {
679        if !self.should_process_file_path(file_path) {
680            return Ok(None);
681        }
682
683        let content = match fs::read_to_string(file_path) {
684            Ok(text) => text,
685            Err(err) => {
686                if err.kind() == ErrorKind::InvalidData {
687                    return Ok(None);
688                }
689                return Err(err.into());
690            }
691        };
692
693        let index = FileIndex {
694            path: file_path.to_string_lossy().into_owned(),
695            hash: calculate_hash(&content),
696            modified: self.get_modified_time(file_path)?,
697            size: content.len() as u64,
698            language: self.detect_language(file_path),
699            tags: vec![],
700        };
701
702        Ok(Some(index))
703    }
704
705    #[inline]
706    fn is_excluded_path(&self, path: &Path) -> bool {
707        self.config
708            .excluded_dirs
709            .iter()
710            .any(|excluded| path.starts_with(excluded))
711    }
712
713    #[inline]
714    fn should_index_file_path(&self, path: &Path) -> bool {
715        self.filter.should_index_file(path, &self.config)
716    }
717
718    #[inline]
719    fn should_process_file_path(&self, path: &Path) -> bool {
720        if self.is_allowed_path(path) {
721            return self.should_index_file_path(path);
722        }
723
724        !self.is_excluded_path(path) && self.should_index_file_path(path)
725    }
726
727    fn build_walker(&self, dir_path: &Path) -> Walk {
728        let walk_root = dir_path.to_path_buf();
729        let config = self.config.clone();
730        let filter = Arc::clone(&self.filter);
731
732        let mut builder = WalkBuilder::new(dir_path);
733        builder
734            .hidden(false)
735            .git_ignore(true)
736            .git_global(true)
737            .git_exclude(true)
738            .ignore(true)
739            .parents(true);
740        builder.filter_entry(move |entry| {
741            should_visit_entry(entry, walk_root.as_path(), &config, filter.as_ref())
742        });
743        builder.build()
744    }
745
746    fn replace_cached_entries(&mut self, dir_path: &Path, entries: &[FileIndex]) {
747        self.index_cache
748            .retain(|path, _| !Path::new(path).starts_with(dir_path));
749
750        self.index_cache.extend(
751            entries
752                .iter()
753                .cloned()
754                .map(|entry| (entry.path.clone(), entry)),
755        );
756    }
757
758    fn apply_snapshot_file_update(
759        &mut self,
760        cache_key: String,
761        next_entry: Option<FileIndex>,
762    ) -> Result<()> {
763        let previous_entry = match next_entry {
764            Some(entry) => self.index_cache.insert(cache_key.clone(), entry),
765            None => self.index_cache.remove(cache_key.as_str()),
766        };
767
768        if let Err(err) = self.persist_current_snapshot() {
769            match previous_entry {
770                Some(entry) => {
771                    self.index_cache.insert(cache_key, entry);
772                }
773                None => {
774                    self.index_cache.remove(cache_key.as_str());
775                }
776            }
777            return Err(err);
778        }
779
780        Ok(())
781    }
782
783    fn apply_snapshot_directory_update(
784        &mut self,
785        dir_path: &Path,
786        entries: &[FileIndex],
787    ) -> Result<()> {
788        let previous_entries = self.take_cached_entries(dir_path);
789        self.index_cache.extend(
790            entries
791                .iter()
792                .cloned()
793                .map(|entry| (entry.path.clone(), entry)),
794        );
795
796        if let Err(err) = self.persist_current_snapshot() {
797            self.index_cache
798                .retain(|path, _| !Path::new(path).starts_with(dir_path));
799            self.index_cache.extend(
800                previous_entries
801                    .into_iter()
802                    .map(|entry| (entry.path.clone(), entry)),
803            );
804            return Err(err);
805        }
806
807        Ok(())
808    }
809
810    fn take_cached_entries(&mut self, dir_path: &Path) -> Vec<FileIndex> {
811        let keys = self
812            .index_cache
813            .keys()
814            .filter(|path| Path::new(path).starts_with(dir_path))
815            .cloned()
816            .collect::<Vec<_>>();
817
818        keys.into_iter()
819            .filter_map(|path| self.index_cache.remove(path.as_str()))
820            .collect()
821    }
822
823    fn persist_current_snapshot(&self) -> Result<()> {
824        let mut snapshot = self.index_cache.values().collect::<Vec<_>>();
825        snapshot.sort_unstable_by(|left, right| left.path.cmp(&right.path));
826        self.storage
827            .persist_batch_refs(self.config.index_dir(), &snapshot)
828    }
829}
830
831impl Clone for SimpleIndexer {
832    fn clone(&self) -> Self {
833        Self {
834            config: self.config.clone(),
835            index_cache: self.index_cache.clone(),
836            storage: self.storage.clone(),
837            filter: self.filter.clone(),
838        }
839    }
840}
841
842fn should_skip_dir(path: &Path, config: &SimpleIndexerConfig) -> bool {
843    if is_allowed_path_or_ancestor(path, config) {
844        return false;
845    }
846
847    if config
848        .excluded_dirs
849        .iter()
850        .any(|excluded| path.starts_with(excluded))
851    {
852        return true;
853    }
854
855    if config.ignore_hidden
856        && path
857            .file_name()
858            .and_then(|name| name.to_str())
859            .is_some_and(|name_str| name_str.starts_with('.'))
860    {
861        return true;
862    }
863
864    false
865}
866
867fn is_allowed_path_or_ancestor(path: &Path, config: &SimpleIndexerConfig) -> bool {
868    config
869        .allowed_dirs
870        .iter()
871        .any(|allowed| path.starts_with(allowed) || allowed.starts_with(path))
872}
873
874fn should_visit_entry(
875    entry: &DirEntry,
876    walk_root: &Path,
877    config: &SimpleIndexerConfig,
878    filter: &dyn TraversalFilter,
879) -> bool {
880    if entry.path() == walk_root {
881        return true;
882    }
883
884    if !entry
885        .file_type()
886        .is_some_and(|file_type| file_type.is_dir())
887    {
888        return true;
889    }
890
891    filter.should_descend(entry.path(), config)
892}
893
894#[inline]
895fn calculate_hash(content: &str) -> String {
896    vtcode_commons::utils::calculate_sha256(content.as_bytes())
897}
898
899fn write_markdown_entry(writer: &mut impl Write, entry: &FileIndex) -> std::io::Result<()> {
900    writeln!(writer, "## {}", entry.path)?;
901    writeln!(writer)?;
902    write_markdown_fields(writer, entry)?;
903    writeln!(writer)?;
904    Ok(())
905}
906
907fn write_markdown_fields(writer: &mut impl Write, entry: &FileIndex) -> std::io::Result<()> {
908    writeln!(writer, "- **Path**: {}", entry.path)?;
909    writeln!(writer, "- **Hash**: {}", entry.hash)?;
910    writeln!(writer, "- **Modified**: {}", entry.modified)?;
911    writeln!(writer, "- **Size**: {} bytes", entry.size)?;
912    writeln!(writer, "- **Language**: {}", entry.language)?;
913    writeln!(writer, "- **Tags**: {}", entry.tags.join(", "))?;
914    Ok(())
915}
916
917fn cleanup_legacy_markdown_entries(index_dir: &Path) -> Result<()> {
918    for entry in fs::read_dir(index_dir)? {
919        let entry = entry?;
920        let file_name = entry.file_name();
921        let file_name = file_name.to_string_lossy();
922        if is_legacy_markdown_entry_name(file_name.as_ref()) {
923            fs::remove_file(entry.path())?;
924        }
925    }
926    Ok(())
927}
928
929#[inline]
930fn is_legacy_markdown_entry_name(file_name: &str) -> bool {
931    let Some(hash_part) = file_name.strip_suffix(".md") else {
932        return false;
933    };
934    hash_part.len() == 64 && hash_part.bytes().all(|byte| byte.is_ascii_hexdigit())
935}
936
937#[cfg(test)]
938mod tests {
939    use super::*;
940    use std::fs;
941    use std::sync::{Arc, Mutex};
942    use tempfile::tempdir;
943
944    #[test]
945    fn skips_hidden_directories_by_default() -> Result<()> {
946        let temp = tempdir()?;
947        let workspace = temp.path();
948        let hidden_dir = workspace.join(".private");
949        fs::create_dir_all(&hidden_dir)?;
950        fs::write(hidden_dir.join("secret.txt"), "classified")?;
951
952        let visible_dir = workspace.join("src");
953        fs::create_dir_all(&visible_dir)?;
954        fs::write(visible_dir.join("lib.rs"), "fn main() {}")?;
955
956        let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
957        indexer.init()?;
958        indexer.index_directory(workspace)?;
959
960        assert!(indexer.find_files("secret\\.txt$")?.is_empty());
961        assert!(!indexer.find_files("lib\\.rs$")?.is_empty());
962
963        Ok(())
964    }
965
966    #[test]
967    fn can_include_hidden_directories_when_configured() -> Result<()> {
968        let temp = tempdir()?;
969        let workspace = temp.path();
970        let hidden_dir = workspace.join(".cache");
971        fs::create_dir_all(&hidden_dir)?;
972        fs::write(hidden_dir.join("data.log"), "details")?;
973
974        let config = SimpleIndexerConfig::new(workspace.to_path_buf()).ignore_hidden(false);
975        let mut indexer = SimpleIndexer::with_config(config);
976        indexer.init()?;
977        indexer.index_directory(workspace)?;
978
979        let results = indexer.find_files("data\\.log$")?;
980        assert_eq!(results.len(), 1);
981
982        Ok(())
983    }
984
985    #[test]
986    fn indexes_allowed_directories_inside_hidden_excluded_parents() -> Result<()> {
987        let temp = tempdir()?;
988        let workspace = temp.path();
989        let allowed_dir = workspace.join(".vtcode").join("external");
990        fs::create_dir_all(&allowed_dir)?;
991        fs::write(allowed_dir.join("plugin.toml"), "name = 'demo'")?;
992
993        let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
994        indexer.init()?;
995        indexer.index_directory(workspace)?;
996
997        let results = indexer.find_files("plugin\\.toml$")?;
998        assert_eq!(results.len(), 1);
999
1000        Ok(())
1001    }
1002
1003    #[test]
1004    fn reindexing_prunes_deleted_files_from_cache() -> Result<()> {
1005        let temp = tempdir()?;
1006        let workspace = temp.path();
1007        let file_path = workspace.join("notes.txt");
1008        fs::write(&file_path, "remember this")?;
1009
1010        let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1011        indexer.init()?;
1012        indexer.index_directory(workspace)?;
1013        assert_eq!(indexer.find_files("notes\\.txt$")?.len(), 1);
1014
1015        fs::remove_file(&file_path)?;
1016        indexer.index_directory(workspace)?;
1017
1018        assert!(indexer.find_files("notes\\.txt$")?.is_empty());
1019        assert!(indexer.all_files().is_empty());
1020
1021        Ok(())
1022    }
1023
1024    #[test]
1025    fn index_file_skips_excluded_paths() -> Result<()> {
1026        let temp = tempdir()?;
1027        let workspace = temp.path();
1028        let index_dir = workspace.join(".vtcode").join("index");
1029        fs::create_dir_all(&index_dir)?;
1030        let generated_index = index_dir.join("index.md");
1031        fs::write(&generated_index, "# generated")?;
1032
1033        let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1034        indexer.init()?;
1035        indexer.index_file(&generated_index)?;
1036
1037        assert!(indexer.all_files().is_empty());
1038
1039        Ok(())
1040    }
1041
1042    #[test]
1043    fn index_file_removes_stale_entry_when_file_becomes_unreadable() -> Result<()> {
1044        let temp = tempdir()?;
1045        let workspace = temp.path();
1046        let file_path = workspace.join("notes.txt");
1047        fs::write(&file_path, "remember this")?;
1048
1049        let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1050        indexer.init()?;
1051        indexer.index_file(&file_path)?;
1052        assert!(
1053            indexer
1054                .find_files("notes\\.txt$")?
1055                .iter()
1056                .any(|file| file.ends_with("notes.txt"))
1057        );
1058
1059        fs::write(&file_path, [0xFF, 0xFE, 0xFD])?;
1060        indexer.index_file(&file_path)?;
1061
1062        assert!(indexer.find_files("notes\\.txt$")?.is_empty());
1063
1064        let index_content =
1065            fs::read_to_string(workspace.join(".vtcode").join("index").join("index.md"))?;
1066        assert!(!index_content.contains(file_path.to_string_lossy().as_ref()));
1067
1068        Ok(())
1069    }
1070
1071    #[test]
1072    fn index_file_maintains_markdown_snapshot_across_updates() -> Result<()> {
1073        let temp = tempdir()?;
1074        let workspace = temp.path();
1075        let first = workspace.join("first.txt");
1076        let second = workspace.join("second.txt");
1077        fs::write(&first, "one")?;
1078        fs::write(&second, "two")?;
1079
1080        let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1081        indexer.init()?;
1082        indexer.index_file(&first)?;
1083        indexer.index_file(&second)?;
1084
1085        let index_dir = workspace.join(".vtcode").join("index");
1086        let files = fs::read_dir(&index_dir)?
1087            .filter_map(|entry| entry.ok())
1088            .map(|entry| entry.file_name().to_string_lossy().into_owned())
1089            .collect::<Vec<_>>();
1090        assert_eq!(files, vec!["index.md".to_string()]);
1091
1092        let index_content = fs::read_to_string(index_dir.join("index.md"))?;
1093        assert!(index_content.contains(first.to_string_lossy().as_ref()));
1094        assert!(index_content.contains(second.to_string_lossy().as_ref()));
1095
1096        Ok(())
1097    }
1098
1099    #[test]
1100    fn index_directory_writes_markdown_snapshot_without_manual_init() -> Result<()> {
1101        let temp = tempdir()?;
1102        let workspace = temp.path();
1103        fs::write(workspace.join("notes.txt"), "remember this")?;
1104
1105        let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1106        indexer.index_directory(workspace)?;
1107
1108        let index_content =
1109            fs::read_to_string(workspace.join(".vtcode").join("index").join("index.md"))?;
1110        assert!(index_content.contains(workspace.join("notes.txt").to_string_lossy().as_ref()));
1111
1112        Ok(())
1113    }
1114
1115    #[test]
1116    fn get_file_content_clamps_ranges_without_panicking() -> Result<()> {
1117        let temp = tempdir()?;
1118        let workspace = temp.path();
1119        let file_path = workspace.join("notes.txt");
1120        fs::write(&file_path, "first\nsecond")?;
1121
1122        let indexer = SimpleIndexer::new(workspace.to_path_buf());
1123        let file_path = file_path.to_string_lossy().into_owned();
1124
1125        assert_eq!(indexer.get_file_content(&file_path, Some(5), None)?, "");
1126        assert_eq!(
1127            indexer.get_file_content(&file_path, Some(0), Some(1))?,
1128            "1: first\n"
1129        );
1130        assert_eq!(indexer.get_file_content(&file_path, Some(2), Some(1))?, "");
1131
1132        Ok(())
1133    }
1134
1135    #[test]
1136    fn supports_custom_storage_backends() -> Result<()> {
1137        #[derive(Clone, Default)]
1138        struct MemoryStorage {
1139            records: Arc<Mutex<Vec<FileIndex>>>,
1140        }
1141
1142        impl MemoryStorage {
1143            fn new(records: Arc<Mutex<Vec<FileIndex>>>) -> Self {
1144                Self { records }
1145            }
1146        }
1147
1148        impl IndexStorage for MemoryStorage {
1149            fn init(&self, _index_dir: &Path) -> Result<()> {
1150                Ok(())
1151            }
1152
1153            fn persist(&self, _index_dir: &Path, entry: &FileIndex) -> Result<()> {
1154                let mut guard = self.records.lock().expect("lock poisoned");
1155                guard.push(entry.clone());
1156                Ok(())
1157            }
1158        }
1159
1160        let temp = tempdir()?;
1161        let workspace = temp.path();
1162        fs::write(workspace.join("notes.txt"), "remember this")?;
1163
1164        let records: Arc<Mutex<Vec<FileIndex>>> = Arc::new(Mutex::new(Vec::new()));
1165        let storage = MemoryStorage::new(records.clone());
1166
1167        let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1168        let mut indexer = SimpleIndexer::with_config(config).with_storage(Arc::new(storage));
1169        indexer.init()?;
1170        indexer.index_directory(workspace)?;
1171
1172        let entries = records.lock().expect("lock poisoned");
1173        assert_eq!(entries.len(), 1);
1174        assert_eq!(
1175            entries[0].path,
1176            workspace.join("notes.txt").to_string_lossy().into_owned()
1177        );
1178
1179        Ok(())
1180    }
1181
1182    #[test]
1183    fn custom_filters_can_skip_files() -> Result<()> {
1184        #[derive(Default)]
1185        struct SkipRustFilter {
1186            inner: ConfigTraversalFilter,
1187        }
1188
1189        impl TraversalFilter for SkipRustFilter {
1190            fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1191                self.inner.should_descend(path, config)
1192            }
1193
1194            fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1195                if path
1196                    .extension()
1197                    .and_then(|ext| ext.to_str())
1198                    .is_some_and(|ext| ext.eq_ignore_ascii_case("rs"))
1199                {
1200                    return false;
1201                }
1202
1203                self.inner.should_index_file(path, config)
1204            }
1205        }
1206
1207        let temp = tempdir()?;
1208        let workspace = temp.path();
1209        fs::write(workspace.join("lib.rs"), "fn main() {}")?;
1210        fs::write(workspace.join("README.md"), "# Notes")?;
1211
1212        let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1213        let mut indexer =
1214            SimpleIndexer::with_config(config).with_filter(Arc::new(SkipRustFilter::default()));
1215        indexer.init()?;
1216        indexer.index_directory(workspace)?;
1217
1218        assert!(indexer.find_files("lib\\.rs$")?.is_empty());
1219        assert!(!indexer.find_files("README\\.md$")?.is_empty());
1220
1221        Ok(())
1222    }
1223
1224    #[test]
1225    fn custom_filters_can_skip_directories() -> Result<()> {
1226        #[derive(Default)]
1227        struct SkipGeneratedFilter {
1228            inner: ConfigTraversalFilter,
1229        }
1230
1231        impl TraversalFilter for SkipGeneratedFilter {
1232            fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1233                if path.ends_with("generated") {
1234                    return false;
1235                }
1236
1237                self.inner.should_descend(path, config)
1238            }
1239
1240            fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
1241                self.inner.should_index_file(path, config)
1242            }
1243        }
1244
1245        let temp = tempdir()?;
1246        let workspace = temp.path();
1247        let generated_dir = workspace.join("generated");
1248        fs::create_dir_all(&generated_dir)?;
1249        fs::write(generated_dir.join("skip.txt"), "ignore me")?;
1250        fs::write(workspace.join("README.md"), "# Notes")?;
1251
1252        let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1253        let indexer = SimpleIndexer::with_config(config)
1254            .with_filter(Arc::new(SkipGeneratedFilter::default()));
1255        let files = indexer.discover_files(workspace);
1256
1257        assert!(!files.iter().any(|file| file.ends_with("skip.txt")));
1258        assert!(files.iter().any(|file| file.ends_with("README.md")));
1259
1260        Ok(())
1261    }
1262
1263    #[test]
1264    fn indexing_multiple_directories_preserves_existing_cache_entries() -> Result<()> {
1265        let temp = tempdir()?;
1266        let workspace = temp.path();
1267        let src_dir = workspace.join("src");
1268        let docs_dir = workspace.join("docs");
1269        fs::create_dir_all(&src_dir)?;
1270        fs::create_dir_all(&docs_dir)?;
1271        fs::write(src_dir.join("lib.rs"), "fn main() {}")?;
1272        fs::write(docs_dir.join("guide.md"), "# Guide")?;
1273
1274        let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1275        indexer.init()?;
1276        indexer.index_directory(&src_dir)?;
1277        indexer.index_directory(&docs_dir)?;
1278
1279        assert!(
1280            indexer
1281                .find_files("lib\\.rs$")?
1282                .iter()
1283                .any(|file| file.ends_with("lib.rs"))
1284        );
1285        assert!(
1286            indexer
1287                .find_files("guide\\.md$")?
1288                .iter()
1289                .any(|file| file.ends_with("guide.md"))
1290        );
1291
1292        let index_content =
1293            fs::read_to_string(workspace.join(".vtcode").join("index").join("index.md"))?;
1294        assert!(index_content.contains(src_dir.join("lib.rs").to_string_lossy().as_ref()));
1295        assert!(index_content.contains(docs_dir.join("guide.md").to_string_lossy().as_ref()));
1296
1297        Ok(())
1298    }
1299
1300    #[test]
1301    fn batch_indexing_writes_single_markdown_file() -> Result<()> {
1302        let temp = tempdir()?;
1303        let workspace = temp.path();
1304        fs::write(workspace.join("lib.rs"), "fn main() {}")?;
1305        fs::write(workspace.join("README.md"), "# Notes")?;
1306
1307        let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1308        indexer.init()?;
1309        indexer.index_directory(workspace)?;
1310
1311        let index_dir = workspace.join(".vtcode").join("index");
1312        let files = fs::read_dir(&index_dir)?
1313            .filter_map(|entry| entry.ok())
1314            .map(|entry| entry.file_name().to_string_lossy().into_owned())
1315            .collect::<Vec<_>>();
1316        assert_eq!(files, vec!["index.md".to_string()]);
1317
1318        let index_content = fs::read_to_string(index_dir.join("index.md"))?;
1319        assert!(index_content.contains(workspace.join("lib.rs").to_string_lossy().as_ref()));
1320        assert!(index_content.contains(workspace.join("README.md").to_string_lossy().as_ref()));
1321
1322        Ok(())
1323    }
1324
1325    #[test]
1326    fn batch_indexing_removes_legacy_hashed_entries() -> Result<()> {
1327        let temp = tempdir()?;
1328        let workspace = temp.path();
1329        fs::write(workspace.join("lib.rs"), "fn main() {}")?;
1330
1331        let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
1332        indexer.init()?;
1333
1334        let legacy_file_name = format!("{}.md", calculate_hash("legacy-path"));
1335        let legacy_file_path = workspace
1336            .join(".vtcode")
1337            .join("index")
1338            .join(&legacy_file_name);
1339        fs::write(&legacy_file_path, "# legacy")?;
1340        assert!(legacy_file_path.exists());
1341
1342        indexer.index_directory(workspace)?;
1343
1344        assert!(!legacy_file_path.exists());
1345        let files = fs::read_dir(workspace.join(".vtcode").join("index"))?
1346            .filter_map(|entry| entry.ok())
1347            .map(|entry| entry.file_name().to_string_lossy().into_owned())
1348            .collect::<Vec<_>>();
1349        assert_eq!(files, vec!["index.md".to_string()]);
1350
1351        Ok(())
1352    }
1353
1354    #[test]
1355    fn snapshot_storage_uses_default_ref_batch_persistence() -> Result<()> {
1356        #[derive(Clone, Default)]
1357        struct SnapshotMemoryStorage {
1358            snapshots: Arc<Mutex<Vec<Vec<FileIndex>>>>,
1359        }
1360
1361        impl SnapshotMemoryStorage {
1362            fn new(snapshots: Arc<Mutex<Vec<Vec<FileIndex>>>>) -> Self {
1363                Self { snapshots }
1364            }
1365        }
1366
1367        impl IndexStorage for SnapshotMemoryStorage {
1368            fn init(&self, _index_dir: &Path) -> Result<()> {
1369                Ok(())
1370            }
1371
1372            fn persist(&self, _index_dir: &Path, _entry: &FileIndex) -> Result<()> {
1373                Ok(())
1374            }
1375
1376            fn prefers_snapshot_persistence(&self) -> bool {
1377                true
1378            }
1379
1380            fn persist_batch(&self, _index_dir: &Path, entries: &[FileIndex]) -> Result<()> {
1381                self.snapshots
1382                    .lock()
1383                    .expect("lock poisoned")
1384                    .push(entries.to_vec());
1385                Ok(())
1386            }
1387        }
1388
1389        let temp = tempdir()?;
1390        let workspace = temp.path();
1391        let file_path = workspace.join("notes.txt");
1392        fs::write(&file_path, "remember this")?;
1393
1394        let snapshots = Arc::new(Mutex::new(Vec::new()));
1395        let storage = SnapshotMemoryStorage::new(snapshots.clone());
1396
1397        let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1398        let mut indexer = SimpleIndexer::with_config(config).with_storage(Arc::new(storage));
1399        indexer.index_file(&file_path)?;
1400
1401        let snapshots = snapshots.lock().expect("lock poisoned");
1402        assert_eq!(snapshots.len(), 1);
1403        assert_eq!(snapshots[0].len(), 1);
1404        assert_eq!(
1405            snapshots[0][0].path,
1406            workspace.join("notes.txt").to_string_lossy().into_owned()
1407        );
1408
1409        Ok(())
1410    }
1411
1412    #[test]
1413    fn snapshot_index_file_rolls_back_cache_when_persist_fails() -> Result<()> {
1414        #[derive(Clone, Default)]
1415        struct FlakySnapshotStorage {
1416            persist_count: Arc<Mutex<usize>>,
1417        }
1418
1419        impl IndexStorage for FlakySnapshotStorage {
1420            fn init(&self, _index_dir: &Path) -> Result<()> {
1421                Ok(())
1422            }
1423
1424            fn persist(&self, _index_dir: &Path, _entry: &FileIndex) -> Result<()> {
1425                Ok(())
1426            }
1427
1428            fn prefers_snapshot_persistence(&self) -> bool {
1429                true
1430            }
1431
1432            fn persist_batch(&self, _index_dir: &Path, _entries: &[FileIndex]) -> Result<()> {
1433                let mut count = self.persist_count.lock().expect("lock poisoned");
1434                *count += 1;
1435                if *count == 2 {
1436                    anyhow::bail!("simulated snapshot persistence failure");
1437                }
1438                Ok(())
1439            }
1440        }
1441
1442        let temp = tempdir()?;
1443        let workspace = temp.path();
1444        let first = workspace.join("first.txt");
1445        let second = workspace.join("second.txt");
1446        fs::write(&first, "one")?;
1447        fs::write(&second, "two")?;
1448
1449        let config = SimpleIndexerConfig::new(workspace.to_path_buf());
1450        let storage = Arc::new(FlakySnapshotStorage::default());
1451        let mut indexer = SimpleIndexer::with_config(config).with_storage(storage);
1452
1453        indexer.index_file(&first)?;
1454        assert!(
1455            indexer
1456                .find_files("first\\.txt$")?
1457                .iter()
1458                .any(|path| path.ends_with("first.txt"))
1459        );
1460
1461        let err = indexer
1462            .index_file(&second)
1463            .expect_err("second persist should fail");
1464        assert!(
1465            err.to_string()
1466                .contains("simulated snapshot persistence failure")
1467        );
1468        assert!(
1469            indexer
1470                .find_files("first\\.txt$")?
1471                .iter()
1472                .any(|path| path.ends_with("first.txt"))
1473        );
1474        assert!(indexer.find_files("second\\.txt$")?.is_empty());
1475
1476        Ok(())
1477    }
1478}