use anyhow::Result;
use hashbrown::HashMap;
use ignore::{DirEntry, Walk, WalkBuilder};
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::fmt::Write as FmtWrite;
use std::fs;
use std::io::{BufWriter, ErrorKind, Write};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::SystemTime;
pub trait IndexStorage: Send + Sync {
fn init(&self, index_dir: &Path) -> Result<()>;
fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()>;
fn prefers_snapshot_persistence(&self) -> bool {
false
}
fn remove(&self, _index_dir: &Path, _file_path: &Path) -> Result<()> {
Ok(())
}
fn persist_batch(&self, index_dir: &Path, entries: &[FileIndex]) -> Result<()> {
for entry in entries {
self.persist(index_dir, entry)?;
}
Ok(())
}
fn persist_batch_refs(&self, index_dir: &Path, entries: &[&FileIndex]) -> Result<()> {
let owned = entries
.iter()
.map(|entry| (*entry).clone())
.collect::<Vec<_>>();
self.persist_batch(index_dir, &owned)
}
}
pub trait TraversalFilter: Send + Sync {
fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
}
#[derive(Debug, Default, Clone)]
pub struct MarkdownIndexStorage;
impl IndexStorage for MarkdownIndexStorage {
fn init(&self, index_dir: &Path) -> Result<()> {
fs::create_dir_all(index_dir)?;
Ok(())
}
fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()> {
fs::create_dir_all(index_dir)?;
let file_name = format!("{}.md", calculate_hash(&entry.path));
let index_path = index_dir.join(file_name);
let file = fs::File::create(index_path)?;
let mut writer = BufWriter::new(file);
writeln!(writer, "# File Index: {}", entry.path)?;
writeln!(writer)?;
write_markdown_fields(&mut writer, entry)?;
writer.flush()?;
Ok(())
}
fn prefers_snapshot_persistence(&self) -> bool {
true
}
fn remove(&self, index_dir: &Path, file_path: &Path) -> Result<()> {
let file_name = format!(
"{}.md",
calculate_hash(file_path.to_string_lossy().as_ref())
);
let index_path = index_dir.join(file_name);
match fs::remove_file(index_path) {
Ok(()) => Ok(()),
Err(err) if err.kind() == ErrorKind::NotFound => Ok(()),
Err(err) => Err(err.into()),
}
}
fn persist_batch(&self, index_dir: &Path, entries: &[FileIndex]) -> Result<()> {
persist_markdown_snapshot(index_dir, entries.iter())
}
fn persist_batch_refs(&self, index_dir: &Path, entries: &[&FileIndex]) -> Result<()> {
persist_markdown_snapshot(index_dir, entries.iter().copied())
}
}
fn persist_markdown_snapshot<'a>(
index_dir: &Path,
entries: impl IntoIterator<Item = &'a FileIndex>,
) -> Result<()> {
let entries = entries.into_iter().collect::<Vec<_>>();
fs::create_dir_all(index_dir)?;
let temp_path = index_dir.join(".index.md.tmp");
let final_path = index_dir.join("index.md");
let file = fs::File::create(&temp_path)?;
let mut writer = BufWriter::new(file);
writeln!(writer, "# Workspace File Index")?;
writeln!(writer)?;
writeln!(writer, "- **Entries**: {}", entries.len())?;
writeln!(writer)?;
for entry in entries {
write_markdown_entry(&mut writer, entry)?;
}
writer.flush()?;
fs::rename(temp_path, final_path)?;
cleanup_legacy_markdown_entries(index_dir)?;
Ok(())
}
#[derive(Debug, Default, Clone)]
pub struct ConfigTraversalFilter;
impl TraversalFilter for ConfigTraversalFilter {
fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
!should_skip_dir(path, config)
}
fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
if !path.is_file() {
return false;
}
if config.ignore_hidden
&& path
.file_name()
.and_then(|n| n.to_str())
.is_some_and(|s| s.starts_with('.'))
{
return false;
}
if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
let is_sensitive = matches!(
file_name,
".env"
| ".env.local"
| ".env.production"
| ".env.development"
| ".env.test"
| ".git"
| ".gitignore"
| ".DS_Store"
) || file_name.starts_with(".env.");
if is_sensitive {
return false;
}
}
true
}
}
#[derive(Clone, Debug)]
pub struct SimpleIndexerConfig {
workspace_root: PathBuf,
index_dir: PathBuf,
ignore_hidden: bool,
excluded_dirs: Vec<PathBuf>,
allowed_dirs: Vec<PathBuf>,
}
impl SimpleIndexerConfig {
pub fn new(workspace_root: PathBuf) -> Self {
let index_dir = workspace_root.join(".vtcode").join("index");
let vtcode_dir = workspace_root.join(".vtcode");
let external_dir = vtcode_dir.join("external");
let mut excluded_dirs = vec![
index_dir.clone(),
vtcode_dir,
workspace_root.join("target"),
workspace_root.join("node_modules"),
];
excluded_dirs.dedup();
Self {
workspace_root,
index_dir,
ignore_hidden: true,
excluded_dirs,
allowed_dirs: vec![external_dir],
}
}
pub fn with_index_dir(mut self, index_dir: impl Into<PathBuf>) -> Self {
let index_dir = index_dir.into();
self.index_dir = index_dir.clone();
self.push_unique_excluded(index_dir);
self
}
pub fn add_allowed_dir(mut self, path: impl Into<PathBuf>) -> Self {
let path = path.into();
if !self.allowed_dirs.iter().any(|existing| existing == &path) {
self.allowed_dirs.push(path);
}
self
}
pub fn add_excluded_dir(mut self, path: impl Into<PathBuf>) -> Self {
let path = path.into();
self.push_unique_excluded(path);
self
}
pub fn ignore_hidden(mut self, ignore_hidden: bool) -> Self {
self.ignore_hidden = ignore_hidden;
self
}
pub fn workspace_root(&self) -> &Path {
&self.workspace_root
}
pub fn index_dir(&self) -> &Path {
&self.index_dir
}
fn push_unique_excluded(&mut self, path: PathBuf) {
if !self.excluded_dirs.iter().any(|existing| existing == &path) {
self.excluded_dirs.push(path);
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileIndex {
pub path: String,
pub hash: String,
pub modified: u64,
pub size: u64,
pub language: String,
pub tags: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchResult {
pub file_path: String,
pub line_number: usize,
pub line_content: String,
pub matches: Vec<String>,
}
pub struct SimpleIndexer {
config: SimpleIndexerConfig,
index_cache: HashMap<String, FileIndex>,
storage: Arc<dyn IndexStorage>,
filter: Arc<dyn TraversalFilter>,
}
impl SimpleIndexer {
pub fn new(workspace_root: PathBuf) -> Self {
Self::with_components(
SimpleIndexerConfig::new(workspace_root),
Arc::new(MarkdownIndexStorage),
Arc::new(ConfigTraversalFilter),
)
}
pub fn with_config(config: SimpleIndexerConfig) -> Self {
Self::with_components(
config,
Arc::new(MarkdownIndexStorage),
Arc::new(ConfigTraversalFilter),
)
}
pub fn with_index_dir(workspace_root: PathBuf, index_dir: PathBuf) -> Self {
let config = SimpleIndexerConfig::new(workspace_root).with_index_dir(index_dir);
Self::with_config(config)
}
pub fn with_components(
config: SimpleIndexerConfig,
storage: Arc<dyn IndexStorage>,
filter: Arc<dyn TraversalFilter>,
) -> Self {
Self {
config,
index_cache: HashMap::new(),
storage,
filter,
}
}
pub fn with_storage(self, storage: Arc<dyn IndexStorage>) -> Self {
Self { storage, ..self }
}
pub fn with_filter(self, filter: Arc<dyn TraversalFilter>) -> Self {
Self { filter, ..self }
}
pub fn init(&self) -> Result<()> {
self.storage.init(self.config.index_dir())
}
pub fn workspace_root(&self) -> &Path {
self.config.workspace_root()
}
pub fn index_dir(&self) -> &Path {
self.config.index_dir()
}
pub fn index_file(&mut self, file_path: &Path) -> Result<()> {
let cache_key = file_path.to_string_lossy().into_owned();
if self.storage.prefers_snapshot_persistence() {
let next_entry = if file_path.exists() && self.should_process_file_path(file_path) {
self.build_file_index(file_path)?
} else {
None
};
self.apply_snapshot_file_update(cache_key, next_entry)?;
return Ok(());
}
if !file_path.exists() || !self.should_process_file_path(file_path) {
self.index_cache.remove(cache_key.as_str());
self.storage.remove(self.config.index_dir(), file_path)?;
return Ok(());
}
if let Some(index) = self.build_file_index(file_path)? {
self.storage.persist(self.config.index_dir(), &index)?;
self.index_cache.insert(index.path.clone(), index);
} else {
self.index_cache.remove(cache_key.as_str());
self.storage.remove(self.config.index_dir(), file_path)?;
}
Ok(())
}
pub fn index_directory(&mut self, dir_path: &Path) -> Result<()> {
let walker = self.build_walker(dir_path);
let mut entries = Vec::new();
for entry in walker.filter_map(|e| e.ok()) {
let path = entry.path();
if entry.file_type().is_some_and(|ft| ft.is_file())
&& let Some(index) = self.build_file_index(path)?
{
entries.push(index);
}
}
if self.storage.prefers_snapshot_persistence() {
self.apply_snapshot_directory_update(dir_path, &entries)?;
} else {
entries.sort_unstable_by(|left, right| left.path.cmp(&right.path));
self.storage
.persist_batch(self.config.index_dir(), &entries)?;
}
self.replace_cached_entries(dir_path, &entries);
Ok(())
}
pub fn discover_files(&self, dir_path: &Path) -> Vec<String> {
let walker = self.build_walker(dir_path);
let mut files = walker
.filter_map(|e| e.ok())
.filter(|e| {
if !e.file_type().is_some_and(|ft| ft.is_file()) {
return false;
}
self.should_process_file_path(e.path())
})
.map(|e| e.path().to_string_lossy().into_owned())
.collect::<Vec<_>>();
files.sort_unstable();
files
}
fn search_files_internal(
&self,
regex: &Regex,
path_filter: Option<&str>,
extract_matches: bool,
) -> Vec<SearchResult> {
let mut results = Vec::new();
for file_path in self.index_cache.keys() {
if path_filter.is_some_and(|filter| !file_path.contains(filter)) {
continue;
}
if let Ok(content) = fs::read_to_string(file_path) {
for (line_num, line) in content.lines().enumerate() {
if regex.is_match(line) {
let matches = if extract_matches {
regex
.find_iter(line)
.map(|m| m.as_str().to_string())
.collect()
} else {
vec![line.to_string()]
};
results.push(SearchResult {
file_path: file_path.clone(),
line_number: line_num + 1,
line_content: line.to_string(),
matches,
});
}
}
}
}
results.sort_unstable_by(|left, right| {
left.file_path
.cmp(&right.file_path)
.then_with(|| left.line_number.cmp(&right.line_number))
});
results
}
pub fn search(&self, pattern: &str, path_filter: Option<&str>) -> Result<Vec<SearchResult>> {
let regex = Regex::new(pattern)?;
Ok(self.search_files_internal(®ex, path_filter, true))
}
pub fn find_files(&self, pattern: &str) -> Result<Vec<String>> {
let regex = Regex::new(pattern)?;
let mut results = Vec::new();
for file_path in self.index_cache.keys() {
if regex.is_match(file_path) {
results.push(file_path.clone());
}
}
results.sort_unstable();
Ok(results)
}
pub fn all_files(&self) -> Vec<String> {
let mut files = self.index_cache.keys().cloned().collect::<Vec<_>>();
files.sort_unstable();
files
}
pub fn get_file_content(
&self,
file_path: &str,
start_line: Option<usize>,
end_line: Option<usize>,
) -> Result<String> {
let content = fs::read_to_string(file_path)?;
let start = start_line.unwrap_or(1).max(1);
let end = end_line.unwrap_or(usize::MAX);
if start > end {
return Ok(String::new());
}
let mut result = String::new();
for (line_number, line) in content.lines().enumerate() {
let line_number = line_number + 1;
if line_number < start {
continue;
}
if line_number > end {
break;
}
writeln!(&mut result, "{line_number}: {line}")?;
}
Ok(result)
}
pub fn list_files(&self, dir_path: &str, show_hidden: bool) -> Result<Vec<String>> {
let path = Path::new(dir_path);
if !path.exists() {
return Ok(vec![]);
}
let mut files = Vec::new();
for entry in fs::read_dir(path)? {
let entry = entry?;
let file_name = entry.file_name().to_string_lossy().into_owned();
if !show_hidden && file_name.starts_with('.') {
continue;
}
files.push(file_name);
}
files.sort_unstable();
Ok(files)
}
pub fn grep(&self, pattern: &str, file_pattern: Option<&str>) -> Result<Vec<SearchResult>> {
let regex = Regex::new(pattern)?;
Ok(self.search_files_internal(®ex, file_pattern, false))
}
#[allow(dead_code)]
fn walk_directory<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
where
F: FnMut(&Path) -> Result<()>,
{
if !dir_path.exists() {
return Ok(());
}
self.walk_directory_internal(dir_path, callback)
}
#[allow(dead_code)]
fn walk_directory_internal<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
where
F: FnMut(&Path) -> Result<()>,
{
for entry in fs::read_dir(dir_path)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
if self.is_allowed_path(&path) {
self.walk_directory_internal(&path, callback)?;
continue;
}
if !self.filter.should_descend(&path, &self.config) {
self.walk_allowed_descendants(&path, callback)?;
continue;
}
self.walk_directory_internal(&path, callback)?;
} else if path.is_file() {
callback(&path)?;
}
}
Ok(())
}
#[allow(dead_code)]
fn is_allowed_path(&self, path: &Path) -> bool {
self.config
.allowed_dirs
.iter()
.any(|allowed| path.starts_with(allowed))
}
#[allow(dead_code)]
fn walk_allowed_descendants<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
where
F: FnMut(&Path) -> Result<()>,
{
let allowed_dirs = self.config.allowed_dirs.clone();
for allowed in allowed_dirs {
if allowed.starts_with(dir_path) && allowed.exists() {
self.walk_directory_internal(&allowed, callback)?;
}
}
Ok(())
}
#[inline]
fn get_modified_time(&self, file_path: &Path) -> Result<u64> {
let metadata = fs::metadata(file_path)?;
let modified = metadata.modified()?;
Ok(modified.duration_since(SystemTime::UNIX_EPOCH)?.as_secs())
}
#[inline]
fn detect_language(&self, file_path: &Path) -> String {
file_path
.extension()
.and_then(|ext| ext.to_str())
.unwrap_or("unknown")
.to_string()
}
fn build_file_index(&self, file_path: &Path) -> Result<Option<FileIndex>> {
if !self.should_process_file_path(file_path) {
return Ok(None);
}
let content = match fs::read_to_string(file_path) {
Ok(text) => text,
Err(err) => {
if err.kind() == ErrorKind::InvalidData {
return Ok(None);
}
return Err(err.into());
}
};
let index = FileIndex {
path: file_path.to_string_lossy().into_owned(),
hash: calculate_hash(&content),
modified: self.get_modified_time(file_path)?,
size: content.len() as u64,
language: self.detect_language(file_path),
tags: vec![],
};
Ok(Some(index))
}
#[inline]
fn is_excluded_path(&self, path: &Path) -> bool {
self.config
.excluded_dirs
.iter()
.any(|excluded| path.starts_with(excluded))
}
#[inline]
fn should_index_file_path(&self, path: &Path) -> bool {
self.filter.should_index_file(path, &self.config)
}
#[inline]
fn should_process_file_path(&self, path: &Path) -> bool {
if self.is_allowed_path(path) {
return self.should_index_file_path(path);
}
!self.is_excluded_path(path) && self.should_index_file_path(path)
}
fn build_walker(&self, dir_path: &Path) -> Walk {
let walk_root = dir_path.to_path_buf();
let config = self.config.clone();
let filter = Arc::clone(&self.filter);
let mut builder = WalkBuilder::new(dir_path);
builder
.hidden(false)
.git_ignore(true)
.git_global(true)
.git_exclude(true)
.ignore(true)
.parents(true);
builder.filter_entry(move |entry| {
should_visit_entry(entry, walk_root.as_path(), &config, filter.as_ref())
});
builder.build()
}
fn replace_cached_entries(&mut self, dir_path: &Path, entries: &[FileIndex]) {
self.index_cache
.retain(|path, _| !Path::new(path).starts_with(dir_path));
self.index_cache.extend(
entries
.iter()
.cloned()
.map(|entry| (entry.path.clone(), entry)),
);
}
fn apply_snapshot_file_update(
&mut self,
cache_key: String,
next_entry: Option<FileIndex>,
) -> Result<()> {
let previous_entry = match next_entry {
Some(entry) => self.index_cache.insert(cache_key.clone(), entry),
None => self.index_cache.remove(cache_key.as_str()),
};
if let Err(err) = self.persist_current_snapshot() {
match previous_entry {
Some(entry) => {
self.index_cache.insert(cache_key, entry);
}
None => {
self.index_cache.remove(cache_key.as_str());
}
}
return Err(err);
}
Ok(())
}
fn apply_snapshot_directory_update(
&mut self,
dir_path: &Path,
entries: &[FileIndex],
) -> Result<()> {
let previous_entries = self.take_cached_entries(dir_path);
self.index_cache.extend(
entries
.iter()
.cloned()
.map(|entry| (entry.path.clone(), entry)),
);
if let Err(err) = self.persist_current_snapshot() {
self.index_cache
.retain(|path, _| !Path::new(path).starts_with(dir_path));
self.index_cache.extend(
previous_entries
.into_iter()
.map(|entry| (entry.path.clone(), entry)),
);
return Err(err);
}
Ok(())
}
fn take_cached_entries(&mut self, dir_path: &Path) -> Vec<FileIndex> {
let keys = self
.index_cache
.keys()
.filter(|path| Path::new(path).starts_with(dir_path))
.cloned()
.collect::<Vec<_>>();
keys.into_iter()
.filter_map(|path| self.index_cache.remove(path.as_str()))
.collect()
}
fn persist_current_snapshot(&self) -> Result<()> {
let mut snapshot = self.index_cache.values().collect::<Vec<_>>();
snapshot.sort_unstable_by(|left, right| left.path.cmp(&right.path));
self.storage
.persist_batch_refs(self.config.index_dir(), &snapshot)
}
}
impl Clone for SimpleIndexer {
fn clone(&self) -> Self {
Self {
config: self.config.clone(),
index_cache: self.index_cache.clone(),
storage: self.storage.clone(),
filter: self.filter.clone(),
}
}
}
fn should_skip_dir(path: &Path, config: &SimpleIndexerConfig) -> bool {
if is_allowed_path_or_ancestor(path, config) {
return false;
}
if config
.excluded_dirs
.iter()
.any(|excluded| path.starts_with(excluded))
{
return true;
}
if config.ignore_hidden
&& path
.file_name()
.and_then(|name| name.to_str())
.is_some_and(|name_str| name_str.starts_with('.'))
{
return true;
}
false
}
fn is_allowed_path_or_ancestor(path: &Path, config: &SimpleIndexerConfig) -> bool {
config
.allowed_dirs
.iter()
.any(|allowed| path.starts_with(allowed) || allowed.starts_with(path))
}
fn should_visit_entry(
entry: &DirEntry,
walk_root: &Path,
config: &SimpleIndexerConfig,
filter: &dyn TraversalFilter,
) -> bool {
if entry.path() == walk_root {
return true;
}
if !entry
.file_type()
.is_some_and(|file_type| file_type.is_dir())
{
return true;
}
filter.should_descend(entry.path(), config)
}
#[inline]
fn calculate_hash(content: &str) -> String {
vtcode_commons::utils::calculate_sha256(content.as_bytes())
}
fn write_markdown_entry(writer: &mut impl Write, entry: &FileIndex) -> std::io::Result<()> {
writeln!(writer, "## {}", entry.path)?;
writeln!(writer)?;
write_markdown_fields(writer, entry)?;
writeln!(writer)?;
Ok(())
}
fn write_markdown_fields(writer: &mut impl Write, entry: &FileIndex) -> std::io::Result<()> {
writeln!(writer, "- **Path**: {}", entry.path)?;
writeln!(writer, "- **Hash**: {}", entry.hash)?;
writeln!(writer, "- **Modified**: {}", entry.modified)?;
writeln!(writer, "- **Size**: {} bytes", entry.size)?;
writeln!(writer, "- **Language**: {}", entry.language)?;
writeln!(writer, "- **Tags**: {}", entry.tags.join(", "))?;
Ok(())
}
fn cleanup_legacy_markdown_entries(index_dir: &Path) -> Result<()> {
for entry in fs::read_dir(index_dir)? {
let entry = entry?;
let file_name = entry.file_name();
let file_name = file_name.to_string_lossy();
if is_legacy_markdown_entry_name(file_name.as_ref()) {
fs::remove_file(entry.path())?;
}
}
Ok(())
}
#[inline]
fn is_legacy_markdown_entry_name(file_name: &str) -> bool {
let Some(hash_part) = file_name.strip_suffix(".md") else {
return false;
};
hash_part.len() == 64 && hash_part.bytes().all(|byte| byte.is_ascii_hexdigit())
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use std::sync::{Arc, Mutex};
use tempfile::tempdir;
#[test]
fn skips_hidden_directories_by_default() -> Result<()> {
let temp = tempdir()?;
let workspace = temp.path();
let hidden_dir = workspace.join(".private");
fs::create_dir_all(&hidden_dir)?;
fs::write(hidden_dir.join("secret.txt"), "classified")?;
let visible_dir = workspace.join("src");
fs::create_dir_all(&visible_dir)?;
fs::write(visible_dir.join("lib.rs"), "fn main() {}")?;
let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
indexer.init()?;
indexer.index_directory(workspace)?;
assert!(indexer.find_files("secret\\.txt$")?.is_empty());
assert!(!indexer.find_files("lib\\.rs$")?.is_empty());
Ok(())
}
#[test]
fn can_include_hidden_directories_when_configured() -> Result<()> {
let temp = tempdir()?;
let workspace = temp.path();
let hidden_dir = workspace.join(".cache");
fs::create_dir_all(&hidden_dir)?;
fs::write(hidden_dir.join("data.log"), "details")?;
let config = SimpleIndexerConfig::new(workspace.to_path_buf()).ignore_hidden(false);
let mut indexer = SimpleIndexer::with_config(config);
indexer.init()?;
indexer.index_directory(workspace)?;
let results = indexer.find_files("data\\.log$")?;
assert_eq!(results.len(), 1);
Ok(())
}
#[test]
fn indexes_allowed_directories_inside_hidden_excluded_parents() -> Result<()> {
let temp = tempdir()?;
let workspace = temp.path();
let allowed_dir = workspace.join(".vtcode").join("external");
fs::create_dir_all(&allowed_dir)?;
fs::write(allowed_dir.join("plugin.toml"), "name = 'demo'")?;
let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
indexer.init()?;
indexer.index_directory(workspace)?;
let results = indexer.find_files("plugin\\.toml$")?;
assert_eq!(results.len(), 1);
Ok(())
}
#[test]
fn reindexing_prunes_deleted_files_from_cache() -> Result<()> {
let temp = tempdir()?;
let workspace = temp.path();
let file_path = workspace.join("notes.txt");
fs::write(&file_path, "remember this")?;
let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
indexer.init()?;
indexer.index_directory(workspace)?;
assert_eq!(indexer.find_files("notes\\.txt$")?.len(), 1);
fs::remove_file(&file_path)?;
indexer.index_directory(workspace)?;
assert!(indexer.find_files("notes\\.txt$")?.is_empty());
assert!(indexer.all_files().is_empty());
Ok(())
}
#[test]
fn index_file_skips_excluded_paths() -> Result<()> {
let temp = tempdir()?;
let workspace = temp.path();
let index_dir = workspace.join(".vtcode").join("index");
fs::create_dir_all(&index_dir)?;
let generated_index = index_dir.join("index.md");
fs::write(&generated_index, "# generated")?;
let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
indexer.init()?;
indexer.index_file(&generated_index)?;
assert!(indexer.all_files().is_empty());
Ok(())
}
#[test]
fn index_file_removes_stale_entry_when_file_becomes_unreadable() -> Result<()> {
let temp = tempdir()?;
let workspace = temp.path();
let file_path = workspace.join("notes.txt");
fs::write(&file_path, "remember this")?;
let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
indexer.init()?;
indexer.index_file(&file_path)?;
assert!(
indexer
.find_files("notes\\.txt$")?
.iter()
.any(|file| file.ends_with("notes.txt"))
);
fs::write(&file_path, [0xFF, 0xFE, 0xFD])?;
indexer.index_file(&file_path)?;
assert!(indexer.find_files("notes\\.txt$")?.is_empty());
let index_content =
fs::read_to_string(workspace.join(".vtcode").join("index").join("index.md"))?;
assert!(!index_content.contains(file_path.to_string_lossy().as_ref()));
Ok(())
}
#[test]
fn index_file_maintains_markdown_snapshot_across_updates() -> Result<()> {
let temp = tempdir()?;
let workspace = temp.path();
let first = workspace.join("first.txt");
let second = workspace.join("second.txt");
fs::write(&first, "one")?;
fs::write(&second, "two")?;
let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
indexer.init()?;
indexer.index_file(&first)?;
indexer.index_file(&second)?;
let index_dir = workspace.join(".vtcode").join("index");
let files = fs::read_dir(&index_dir)?
.filter_map(|entry| entry.ok())
.map(|entry| entry.file_name().to_string_lossy().into_owned())
.collect::<Vec<_>>();
assert_eq!(files, vec!["index.md".to_string()]);
let index_content = fs::read_to_string(index_dir.join("index.md"))?;
assert!(index_content.contains(first.to_string_lossy().as_ref()));
assert!(index_content.contains(second.to_string_lossy().as_ref()));
Ok(())
}
#[test]
fn index_directory_writes_markdown_snapshot_without_manual_init() -> Result<()> {
let temp = tempdir()?;
let workspace = temp.path();
fs::write(workspace.join("notes.txt"), "remember this")?;
let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
indexer.index_directory(workspace)?;
let index_content =
fs::read_to_string(workspace.join(".vtcode").join("index").join("index.md"))?;
assert!(index_content.contains(workspace.join("notes.txt").to_string_lossy().as_ref()));
Ok(())
}
#[test]
fn get_file_content_clamps_ranges_without_panicking() -> Result<()> {
let temp = tempdir()?;
let workspace = temp.path();
let file_path = workspace.join("notes.txt");
fs::write(&file_path, "first\nsecond")?;
let indexer = SimpleIndexer::new(workspace.to_path_buf());
let file_path = file_path.to_string_lossy().into_owned();
assert_eq!(indexer.get_file_content(&file_path, Some(5), None)?, "");
assert_eq!(
indexer.get_file_content(&file_path, Some(0), Some(1))?,
"1: first\n"
);
assert_eq!(indexer.get_file_content(&file_path, Some(2), Some(1))?, "");
Ok(())
}
#[test]
fn supports_custom_storage_backends() -> Result<()> {
#[derive(Clone, Default)]
struct MemoryStorage {
records: Arc<Mutex<Vec<FileIndex>>>,
}
impl MemoryStorage {
fn new(records: Arc<Mutex<Vec<FileIndex>>>) -> Self {
Self { records }
}
}
impl IndexStorage for MemoryStorage {
fn init(&self, _index_dir: &Path) -> Result<()> {
Ok(())
}
fn persist(&self, _index_dir: &Path, entry: &FileIndex) -> Result<()> {
let mut guard = self.records.lock().expect("lock poisoned");
guard.push(entry.clone());
Ok(())
}
}
let temp = tempdir()?;
let workspace = temp.path();
fs::write(workspace.join("notes.txt"), "remember this")?;
let records: Arc<Mutex<Vec<FileIndex>>> = Arc::new(Mutex::new(Vec::new()));
let storage = MemoryStorage::new(records.clone());
let config = SimpleIndexerConfig::new(workspace.to_path_buf());
let mut indexer = SimpleIndexer::with_config(config).with_storage(Arc::new(storage));
indexer.init()?;
indexer.index_directory(workspace)?;
let entries = records.lock().expect("lock poisoned");
assert_eq!(entries.len(), 1);
assert_eq!(
entries[0].path,
workspace.join("notes.txt").to_string_lossy().into_owned()
);
Ok(())
}
#[test]
fn custom_filters_can_skip_files() -> Result<()> {
#[derive(Default)]
struct SkipRustFilter {
inner: ConfigTraversalFilter,
}
impl TraversalFilter for SkipRustFilter {
fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
self.inner.should_descend(path, config)
}
fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
if path
.extension()
.and_then(|ext| ext.to_str())
.is_some_and(|ext| ext.eq_ignore_ascii_case("rs"))
{
return false;
}
self.inner.should_index_file(path, config)
}
}
let temp = tempdir()?;
let workspace = temp.path();
fs::write(workspace.join("lib.rs"), "fn main() {}")?;
fs::write(workspace.join("README.md"), "# Notes")?;
let config = SimpleIndexerConfig::new(workspace.to_path_buf());
let mut indexer =
SimpleIndexer::with_config(config).with_filter(Arc::new(SkipRustFilter::default()));
indexer.init()?;
indexer.index_directory(workspace)?;
assert!(indexer.find_files("lib\\.rs$")?.is_empty());
assert!(!indexer.find_files("README\\.md$")?.is_empty());
Ok(())
}
#[test]
fn custom_filters_can_skip_directories() -> Result<()> {
#[derive(Default)]
struct SkipGeneratedFilter {
inner: ConfigTraversalFilter,
}
impl TraversalFilter for SkipGeneratedFilter {
fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
if path.ends_with("generated") {
return false;
}
self.inner.should_descend(path, config)
}
fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
self.inner.should_index_file(path, config)
}
}
let temp = tempdir()?;
let workspace = temp.path();
let generated_dir = workspace.join("generated");
fs::create_dir_all(&generated_dir)?;
fs::write(generated_dir.join("skip.txt"), "ignore me")?;
fs::write(workspace.join("README.md"), "# Notes")?;
let config = SimpleIndexerConfig::new(workspace.to_path_buf());
let indexer = SimpleIndexer::with_config(config)
.with_filter(Arc::new(SkipGeneratedFilter::default()));
let files = indexer.discover_files(workspace);
assert!(!files.iter().any(|file| file.ends_with("skip.txt")));
assert!(files.iter().any(|file| file.ends_with("README.md")));
Ok(())
}
#[test]
fn indexing_multiple_directories_preserves_existing_cache_entries() -> Result<()> {
let temp = tempdir()?;
let workspace = temp.path();
let src_dir = workspace.join("src");
let docs_dir = workspace.join("docs");
fs::create_dir_all(&src_dir)?;
fs::create_dir_all(&docs_dir)?;
fs::write(src_dir.join("lib.rs"), "fn main() {}")?;
fs::write(docs_dir.join("guide.md"), "# Guide")?;
let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
indexer.init()?;
indexer.index_directory(&src_dir)?;
indexer.index_directory(&docs_dir)?;
assert!(
indexer
.find_files("lib\\.rs$")?
.iter()
.any(|file| file.ends_with("lib.rs"))
);
assert!(
indexer
.find_files("guide\\.md$")?
.iter()
.any(|file| file.ends_with("guide.md"))
);
let index_content =
fs::read_to_string(workspace.join(".vtcode").join("index").join("index.md"))?;
assert!(index_content.contains(src_dir.join("lib.rs").to_string_lossy().as_ref()));
assert!(index_content.contains(docs_dir.join("guide.md").to_string_lossy().as_ref()));
Ok(())
}
#[test]
fn batch_indexing_writes_single_markdown_file() -> Result<()> {
let temp = tempdir()?;
let workspace = temp.path();
fs::write(workspace.join("lib.rs"), "fn main() {}")?;
fs::write(workspace.join("README.md"), "# Notes")?;
let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
indexer.init()?;
indexer.index_directory(workspace)?;
let index_dir = workspace.join(".vtcode").join("index");
let files = fs::read_dir(&index_dir)?
.filter_map(|entry| entry.ok())
.map(|entry| entry.file_name().to_string_lossy().into_owned())
.collect::<Vec<_>>();
assert_eq!(files, vec!["index.md".to_string()]);
let index_content = fs::read_to_string(index_dir.join("index.md"))?;
assert!(index_content.contains(workspace.join("lib.rs").to_string_lossy().as_ref()));
assert!(index_content.contains(workspace.join("README.md").to_string_lossy().as_ref()));
Ok(())
}
#[test]
fn batch_indexing_removes_legacy_hashed_entries() -> Result<()> {
let temp = tempdir()?;
let workspace = temp.path();
fs::write(workspace.join("lib.rs"), "fn main() {}")?;
let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
indexer.init()?;
let legacy_file_name = format!("{}.md", calculate_hash("legacy-path"));
let legacy_file_path = workspace
.join(".vtcode")
.join("index")
.join(&legacy_file_name);
fs::write(&legacy_file_path, "# legacy")?;
assert!(legacy_file_path.exists());
indexer.index_directory(workspace)?;
assert!(!legacy_file_path.exists());
let files = fs::read_dir(workspace.join(".vtcode").join("index"))?
.filter_map(|entry| entry.ok())
.map(|entry| entry.file_name().to_string_lossy().into_owned())
.collect::<Vec<_>>();
assert_eq!(files, vec!["index.md".to_string()]);
Ok(())
}
#[test]
fn snapshot_storage_uses_default_ref_batch_persistence() -> Result<()> {
#[derive(Clone, Default)]
struct SnapshotMemoryStorage {
snapshots: Arc<Mutex<Vec<Vec<FileIndex>>>>,
}
impl SnapshotMemoryStorage {
fn new(snapshots: Arc<Mutex<Vec<Vec<FileIndex>>>>) -> Self {
Self { snapshots }
}
}
impl IndexStorage for SnapshotMemoryStorage {
fn init(&self, _index_dir: &Path) -> Result<()> {
Ok(())
}
fn persist(&self, _index_dir: &Path, _entry: &FileIndex) -> Result<()> {
Ok(())
}
fn prefers_snapshot_persistence(&self) -> bool {
true
}
fn persist_batch(&self, _index_dir: &Path, entries: &[FileIndex]) -> Result<()> {
self.snapshots
.lock()
.expect("lock poisoned")
.push(entries.to_vec());
Ok(())
}
}
let temp = tempdir()?;
let workspace = temp.path();
let file_path = workspace.join("notes.txt");
fs::write(&file_path, "remember this")?;
let snapshots = Arc::new(Mutex::new(Vec::new()));
let storage = SnapshotMemoryStorage::new(snapshots.clone());
let config = SimpleIndexerConfig::new(workspace.to_path_buf());
let mut indexer = SimpleIndexer::with_config(config).with_storage(Arc::new(storage));
indexer.index_file(&file_path)?;
let snapshots = snapshots.lock().expect("lock poisoned");
assert_eq!(snapshots.len(), 1);
assert_eq!(snapshots[0].len(), 1);
assert_eq!(
snapshots[0][0].path,
workspace.join("notes.txt").to_string_lossy().into_owned()
);
Ok(())
}
#[test]
fn snapshot_index_file_rolls_back_cache_when_persist_fails() -> Result<()> {
#[derive(Clone, Default)]
struct FlakySnapshotStorage {
persist_count: Arc<Mutex<usize>>,
}
impl IndexStorage for FlakySnapshotStorage {
fn init(&self, _index_dir: &Path) -> Result<()> {
Ok(())
}
fn persist(&self, _index_dir: &Path, _entry: &FileIndex) -> Result<()> {
Ok(())
}
fn prefers_snapshot_persistence(&self) -> bool {
true
}
fn persist_batch(&self, _index_dir: &Path, _entries: &[FileIndex]) -> Result<()> {
let mut count = self.persist_count.lock().expect("lock poisoned");
*count += 1;
if *count == 2 {
anyhow::bail!("simulated snapshot persistence failure");
}
Ok(())
}
}
let temp = tempdir()?;
let workspace = temp.path();
let first = workspace.join("first.txt");
let second = workspace.join("second.txt");
fs::write(&first, "one")?;
fs::write(&second, "two")?;
let config = SimpleIndexerConfig::new(workspace.to_path_buf());
let storage = Arc::new(FlakySnapshotStorage::default());
let mut indexer = SimpleIndexer::with_config(config).with_storage(storage);
indexer.index_file(&first)?;
assert!(
indexer
.find_files("first\\.txt$")?
.iter()
.any(|path| path.ends_with("first.txt"))
);
let err = indexer
.index_file(&second)
.expect_err("second persist should fail");
assert!(
err.to_string()
.contains("simulated snapshot persistence failure")
);
assert!(
indexer
.find_files("first\\.txt$")?
.iter()
.any(|path| path.ends_with("first.txt"))
);
assert!(indexer.find_files("second\\.txt$")?.is_empty());
Ok(())
}
}