vtcode_indexer/
lib.rs

1//! Workspace-friendly file indexer extracted from VTCode.
2//!
3//! `vtcode-indexer` offers a lightweight alternative to heavyweight
4//! search/indexing stacks. It recursively walks a workspace, computes
5//! hashes, and stores per-file metadata in Markdown-friendly summaries
6//! so changes remain easy to audit in git.
7
8use anyhow::Result;
9use regex::Regex;
10use serde::{Deserialize, Serialize};
11use std::collections::HashMap;
12use std::fs;
13use std::io::ErrorKind;
14use std::path::{Path, PathBuf};
15use std::time::SystemTime;
16
17/// Configuration for [`SimpleIndexer`].
18#[derive(Clone, Debug)]
19pub struct SimpleIndexerConfig {
20    workspace_root: PathBuf,
21    index_dir: PathBuf,
22    ignore_hidden: bool,
23    excluded_dirs: Vec<PathBuf>,
24    allowed_dirs: Vec<PathBuf>,
25}
26
27impl SimpleIndexerConfig {
28    /// Builds a configuration using VTCode's legacy layout as defaults.
29    pub fn new(workspace_root: PathBuf) -> Self {
30        let index_dir = workspace_root.join(".vtcode").join("index");
31        let vtcode_dir = workspace_root.join(".vtcode");
32        let external_dir = vtcode_dir.join("external");
33
34        let mut excluded_dirs = vec![
35            index_dir.clone(),
36            vtcode_dir,
37            workspace_root.join("target"),
38            workspace_root.join("node_modules"),
39        ];
40
41        excluded_dirs.dedup();
42
43        Self {
44            workspace_root,
45            index_dir,
46            ignore_hidden: true,
47            excluded_dirs,
48            allowed_dirs: vec![external_dir],
49        }
50    }
51
52    /// Updates the index directory used for persisted metadata.
53    pub fn with_index_dir(mut self, index_dir: impl Into<PathBuf>) -> Self {
54        let index_dir = index_dir.into();
55        self.index_dir = index_dir.clone();
56        self.push_unique_excluded(index_dir);
57        self
58    }
59
60    /// Adds an allowed directory that should be indexed even if hidden or inside an excluded parent.
61    pub fn add_allowed_dir(mut self, path: impl Into<PathBuf>) -> Self {
62        let path = path.into();
63        if !self.allowed_dirs.iter().any(|existing| existing == &path) {
64            self.allowed_dirs.push(path);
65        }
66        self
67    }
68
69    /// Adds an additional excluded directory to skip during traversal.
70    pub fn add_excluded_dir(mut self, path: impl Into<PathBuf>) -> Self {
71        let path = path.into();
72        self.push_unique_excluded(path);
73        self
74    }
75
76    /// Toggles whether hidden directories (prefix `.`) are ignored.
77    pub fn ignore_hidden(mut self, ignore_hidden: bool) -> Self {
78        self.ignore_hidden = ignore_hidden;
79        self
80    }
81
82    /// Workspace root accessor.
83    pub fn workspace_root(&self) -> &Path {
84        &self.workspace_root
85    }
86
87    /// Index directory accessor.
88    pub fn index_dir(&self) -> &Path {
89        &self.index_dir
90    }
91
92    fn push_unique_excluded(&mut self, path: PathBuf) {
93        if !self.excluded_dirs.iter().any(|existing| existing == &path) {
94            self.excluded_dirs.push(path);
95        }
96    }
97}
98
99/// Simple file index entry.
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct FileIndex {
102    /// File path.
103    pub path: String,
104    /// File content hash for change detection.
105    pub hash: String,
106    /// Last modified timestamp.
107    pub modified: u64,
108    /// File size.
109    pub size: u64,
110    /// Language/extension.
111    pub language: String,
112    /// Simple tags.
113    pub tags: Vec<String>,
114}
115
116/// Simple search result.
117#[derive(Debug, Clone, Serialize, Deserialize)]
118pub struct SearchResult {
119    pub file_path: String,
120    pub line_number: usize,
121    pub line_content: String,
122    pub matches: Vec<String>,
123}
124
125/// Simple file indexer.
126#[derive(Clone)]
127pub struct SimpleIndexer {
128    config: SimpleIndexerConfig,
129    index_cache: HashMap<String, FileIndex>,
130}
131
132impl SimpleIndexer {
133    /// Create a new simple indexer with default VTCode paths.
134    pub fn new(workspace_root: PathBuf) -> Self {
135        Self::with_config(SimpleIndexerConfig::new(workspace_root))
136    }
137
138    /// Create a simple indexer with the provided configuration.
139    pub fn with_config(config: SimpleIndexerConfig) -> Self {
140        Self {
141            config,
142            index_cache: HashMap::new(),
143        }
144    }
145
146    /// Create a new simple indexer using a custom index directory.
147    pub fn with_index_dir(workspace_root: PathBuf, index_dir: PathBuf) -> Self {
148        let config = SimpleIndexerConfig::new(workspace_root).with_index_dir(index_dir);
149        Self::with_config(config)
150    }
151
152    /// Initialize the index directory.
153    pub fn init(&self) -> Result<()> {
154        fs::create_dir_all(self.config.index_dir())?;
155        Ok(())
156    }
157
158    /// Get the workspace root path.
159    pub fn workspace_root(&self) -> &Path {
160        self.config.workspace_root()
161    }
162
163    /// Get the index directory used for persisted metadata.
164    pub fn index_dir(&self) -> &Path {
165        self.config.index_dir()
166    }
167
168    /// Index a single file.
169    pub fn index_file(&mut self, file_path: &Path) -> Result<()> {
170        if !file_path.exists() || !file_path.is_file() {
171            return Ok(());
172        }
173
174        let content = match fs::read_to_string(file_path) {
175            Ok(text) => text,
176            Err(err) => {
177                if err.kind() == ErrorKind::InvalidData {
178                    return Ok(());
179                }
180                return Err(err.into());
181            }
182        };
183        let hash = self.calculate_hash(&content);
184        let modified = self.get_modified_time(file_path)?;
185        let size = content.len() as u64;
186        let language = self.detect_language(file_path);
187
188        let index = FileIndex {
189            path: file_path.to_string_lossy().to_string(),
190            hash,
191            modified,
192            size,
193            language,
194            tags: vec![],
195        };
196
197        self.index_cache
198            .insert(file_path.to_string_lossy().to_string(), index.clone());
199
200        // Save to markdown file.
201        self.save_index_to_markdown(&index)?;
202
203        Ok(())
204    }
205
206    /// Index all files in directory recursively.
207    pub fn index_directory(&mut self, dir_path: &Path) -> Result<()> {
208        let mut file_paths = Vec::new();
209
210        // First pass: collect all file paths.
211        self.walk_directory(dir_path, &mut |file_path| {
212            file_paths.push(file_path.to_path_buf());
213            Ok(())
214        })?;
215
216        // Second pass: index each file.
217        for file_path in file_paths {
218            self.index_file(&file_path)?;
219        }
220
221        Ok(())
222    }
223
224    /// Search files using regex pattern.
225    pub fn search(&self, pattern: &str, path_filter: Option<&str>) -> Result<Vec<SearchResult>> {
226        let regex = Regex::new(pattern)?;
227
228        let mut results = Vec::new();
229
230        // Search through indexed files.
231        for file_path in self.index_cache.keys() {
232            if path_filter.is_some_and(|filter| !file_path.contains(filter)) {
233                continue;
234            }
235
236            if let Ok(content) = fs::read_to_string(file_path) {
237                for (line_num, line) in content.lines().enumerate() {
238                    if regex.is_match(line) {
239                        let matches: Vec<String> = regex
240                            .find_iter(line)
241                            .map(|m| m.as_str().to_string())
242                            .collect();
243
244                        results.push(SearchResult {
245                            file_path: file_path.clone(),
246                            line_number: line_num + 1,
247                            line_content: line.to_string(),
248                            matches,
249                        });
250                    }
251                }
252            }
253        }
254
255        Ok(results)
256    }
257
258    /// Find files by name pattern.
259    pub fn find_files(&self, pattern: &str) -> Result<Vec<String>> {
260        let regex = Regex::new(pattern)?;
261        let mut results = Vec::new();
262
263        for file_path in self.index_cache.keys() {
264            if regex.is_match(file_path) {
265                results.push(file_path.clone());
266            }
267        }
268
269        Ok(results)
270    }
271
272    /// Get file content with line numbers.
273    pub fn get_file_content(
274        &self,
275        file_path: &str,
276        start_line: Option<usize>,
277        end_line: Option<usize>,
278    ) -> Result<String> {
279        let content = fs::read_to_string(file_path)?;
280        let lines: Vec<&str> = content.lines().collect();
281
282        let start = start_line.unwrap_or(1).saturating_sub(1);
283        let end = end_line.unwrap_or(lines.len());
284
285        let selected_lines = &lines[start..end.min(lines.len())];
286
287        let mut result = String::new();
288        for (i, line) in selected_lines.iter().enumerate() {
289            result.push_str(&format!("{}: {}\n", start + i + 1, line));
290        }
291
292        Ok(result)
293    }
294
295    /// List files in directory (like ls).
296    pub fn list_files(&self, dir_path: &str, show_hidden: bool) -> Result<Vec<String>> {
297        let path = Path::new(dir_path);
298        if !path.exists() {
299            return Ok(vec![]);
300        }
301
302        let mut files = Vec::new();
303
304        for entry in fs::read_dir(path)? {
305            let entry = entry?;
306            let file_name = entry.file_name().to_string_lossy().to_string();
307
308            if !show_hidden && file_name.starts_with('.') {
309                continue;
310            }
311
312            files.push(file_name);
313        }
314
315        Ok(files)
316    }
317
318    /// Grep-like search (like grep command).
319    pub fn grep(&self, pattern: &str, file_pattern: Option<&str>) -> Result<Vec<SearchResult>> {
320        let regex = Regex::new(pattern)?;
321        let mut results = Vec::new();
322
323        for file_path in self.index_cache.keys() {
324            if file_pattern.is_some_and(|fp| !file_path.contains(fp)) {
325                continue;
326            }
327
328            if let Ok(content) = fs::read_to_string(file_path) {
329                for (line_num, line) in content.lines().enumerate() {
330                    if regex.is_match(line) {
331                        results.push(SearchResult {
332                            file_path: file_path.clone(),
333                            line_number: line_num + 1,
334                            line_content: line.to_string(),
335                            matches: vec![line.to_string()],
336                        });
337                    }
338                }
339            }
340        }
341
342        Ok(results)
343    }
344
345    fn walk_directory<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
346    where
347        F: FnMut(&Path) -> Result<()>,
348    {
349        if !dir_path.exists() {
350            return Ok(());
351        }
352
353        self.walk_directory_internal(dir_path, callback)
354    }
355
356    fn walk_directory_internal<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
357    where
358        F: FnMut(&Path) -> Result<()>,
359    {
360        for entry in fs::read_dir(dir_path)? {
361            let entry = entry?;
362            let path = entry.path();
363
364            if path.is_dir() {
365                if self.is_allowed_dir(&path) {
366                    self.walk_directory_internal(&path, callback)?;
367                    continue;
368                }
369
370                if self.should_skip_dir(&path) {
371                    self.walk_allowed_descendants(&path, callback)?;
372                    continue;
373                }
374
375                self.walk_directory_internal(&path, callback)?;
376            } else if path.is_file() {
377                callback(&path)?;
378            }
379        }
380
381        Ok(())
382    }
383
384    fn is_allowed_dir(&self, path: &Path) -> bool {
385        self.config
386            .allowed_dirs
387            .iter()
388            .any(|allowed| path.starts_with(allowed))
389    }
390
391    fn walk_allowed_descendants<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
392    where
393        F: FnMut(&Path) -> Result<()>,
394    {
395        let allowed_dirs = self.config.allowed_dirs.clone();
396        for allowed in allowed_dirs {
397            if allowed.starts_with(dir_path) && allowed.exists() {
398                self.walk_directory_internal(&allowed, callback)?;
399            }
400        }
401        Ok(())
402    }
403
404    fn should_skip_dir(&self, path: &Path) -> bool {
405        if self
406            .config
407            .allowed_dirs
408            .iter()
409            .any(|allowed| path.starts_with(allowed))
410        {
411            return false;
412        }
413
414        if self
415            .config
416            .excluded_dirs
417            .iter()
418            .any(|excluded| path.starts_with(excluded))
419        {
420            return true;
421        }
422
423        if self.config.ignore_hidden
424            && path
425                .file_name()
426                .and_then(|name| name.to_str())
427                .is_some_and(|name_str| name_str.starts_with('.'))
428        {
429            return true;
430        }
431
432        false
433    }
434
435    fn calculate_hash(&self, content: &str) -> String {
436        use std::collections::hash_map::DefaultHasher;
437        use std::hash::{Hash, Hasher};
438
439        let mut hasher = DefaultHasher::new();
440        content.hash(&mut hasher);
441        format!("{:x}", hasher.finish())
442    }
443
444    fn get_modified_time(&self, file_path: &Path) -> Result<u64> {
445        let metadata = fs::metadata(file_path)?;
446        let modified = metadata.modified()?;
447        Ok(modified.duration_since(SystemTime::UNIX_EPOCH)?.as_secs())
448    }
449
450    fn detect_language(&self, file_path: &Path) -> String {
451        file_path
452            .extension()
453            .and_then(|ext| ext.to_str())
454            .unwrap_or("unknown")
455            .to_string()
456    }
457
458    fn save_index_to_markdown(&self, index: &FileIndex) -> Result<()> {
459        let file_name = format!("{}.md", self.calculate_hash(&index.path));
460        let index_path = self.config.index_dir().join(file_name);
461
462        let markdown = format!(
463            "# File Index: {}\n\n\
464            - **Path**: {}\n\
465            - **Hash**: {}\n\
466            - **Modified**: {}\n\
467            - **Size**: {} bytes\n\
468            - **Language**: {}\n\
469            - **Tags**: {}\n\n",
470            index.path,
471            index.path,
472            index.hash,
473            index.modified,
474            index.size,
475            index.language,
476            index.tags.join(", ")
477        );
478
479        fs::write(index_path, markdown)?;
480        Ok(())
481    }
482}
483
484#[cfg(test)]
485mod tests {
486    use super::*;
487    use std::fs;
488    use tempfile::tempdir;
489
490    #[test]
491    fn skips_hidden_directories_by_default() -> Result<()> {
492        let temp = tempdir()?;
493        let workspace = temp.path();
494        let hidden_dir = workspace.join(".private");
495        fs::create_dir_all(&hidden_dir)?;
496        fs::write(hidden_dir.join("secret.txt"), "classified")?;
497
498        let visible_dir = workspace.join("src");
499        fs::create_dir_all(&visible_dir)?;
500        fs::write(visible_dir.join("lib.rs"), "fn main() {}")?;
501
502        let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
503        indexer.init()?;
504        indexer.index_directory(workspace)?;
505
506        assert!(indexer.find_files("secret\\.txt$")?.is_empty());
507        assert!(!indexer.find_files("lib\\.rs$")?.is_empty());
508
509        Ok(())
510    }
511
512    #[test]
513    fn can_include_hidden_directories_when_configured() -> Result<()> {
514        let temp = tempdir()?;
515        let workspace = temp.path();
516        let hidden_dir = workspace.join(".cache");
517        fs::create_dir_all(&hidden_dir)?;
518        fs::write(hidden_dir.join("data.log"), "details")?;
519
520        let config = SimpleIndexerConfig::new(workspace.to_path_buf()).ignore_hidden(false);
521        let mut indexer = SimpleIndexer::with_config(config);
522        indexer.init()?;
523        indexer.index_directory(workspace)?;
524
525        let results = indexer.find_files("data\\.log$")?;
526        assert_eq!(results.len(), 1);
527
528        Ok(())
529    }
530}