vtcode_core/
simple_indexer.rs

1//! Simple file indexer using regex and markdown storage
2//!
3//! This module provides a simple, direct approach to code indexing and retrieval
4//! using regex patterns and markdown files for storage. No complex embeddings
5//! or databases - just direct file operations like a human using bash.
6
7use anyhow::Result;
8use regex::Regex;
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::fs;
12use std::io::ErrorKind;
13use std::path::{Path, PathBuf};
14use std::time::SystemTime;
15
16/// Simple file index entry
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct FileIndex {
19    /// File path
20    pub path: String,
21    /// File content hash for change detection
22    pub hash: String,
23    /// Last modified timestamp
24    pub modified: u64,
25    /// File size
26    pub size: u64,
27    /// Language/extension
28    pub language: String,
29    /// Simple tags
30    pub tags: Vec<String>,
31}
32
33/// Simple search result
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct SearchResult {
36    pub file_path: String,
37    pub line_number: usize,
38    pub line_content: String,
39    pub matches: Vec<String>,
40}
41
42/// Simple file indexer
43#[derive(Clone)]
44pub struct SimpleIndexer {
45    /// Index storage directory
46    index_dir: PathBuf,
47    /// Workspace root
48    workspace_root: PathBuf,
49    /// In-memory index cache
50    index_cache: HashMap<String, FileIndex>,
51}
52
53impl SimpleIndexer {
54    /// Create a new simple indexer
55    pub fn new(workspace_root: PathBuf) -> Self {
56        let index_dir = workspace_root.join(".vtcode").join("index");
57
58        Self {
59            index_dir,
60            workspace_root,
61            index_cache: HashMap::new(),
62        }
63    }
64
65    /// Initialize the index directory
66    pub fn init(&self) -> Result<()> {
67        fs::create_dir_all(&self.index_dir)?;
68        Ok(())
69    }
70
71    /// Get the workspace root path
72    pub fn workspace_root(&self) -> &Path {
73        &self.workspace_root
74    }
75
76    /// Index a single file
77    pub fn index_file(&mut self, file_path: &Path) -> Result<()> {
78        if !file_path.exists() || !file_path.is_file() {
79            return Ok(());
80        }
81
82        let content = match fs::read_to_string(file_path) {
83            Ok(text) => text,
84            Err(err) => {
85                if err.kind() == ErrorKind::InvalidData {
86                    return Ok(());
87                }
88                return Err(err.into());
89            }
90        };
91        let hash = self.calculate_hash(&content);
92        let modified = self.get_modified_time(file_path)?;
93        let size = content.len() as u64;
94        let language = self.detect_language(file_path);
95
96        let index = FileIndex {
97            path: file_path.to_string_lossy().to_string(),
98            hash,
99            modified,
100            size,
101            language,
102            tags: vec![],
103        };
104
105        self.index_cache
106            .insert(file_path.to_string_lossy().to_string(), index.clone());
107
108        // Save to markdown file
109        self.save_index_to_markdown(&index)?;
110
111        Ok(())
112    }
113
114    /// Index all files in directory recursively
115    pub fn index_directory(&mut self, dir_path: &Path) -> Result<()> {
116        let mut file_paths = Vec::new();
117
118        // First pass: collect all file paths
119        self.walk_directory(dir_path, &mut |file_path| {
120            file_paths.push(file_path.to_path_buf());
121            Ok(())
122        })?;
123
124        // Second pass: index each file
125        for file_path in file_paths {
126            self.index_file(&file_path)?;
127        }
128
129        Ok(())
130    }
131
132    /// Search files using regex pattern
133    pub fn search(&self, pattern: &str, path_filter: Option<&str>) -> Result<Vec<SearchResult>> {
134        let regex = Regex::new(pattern)?;
135
136        let mut results = Vec::new();
137
138        // Search through indexed files
139        for (file_path, _) in &self.index_cache {
140            if let Some(filter) = path_filter {
141                if !file_path.contains(filter) {
142                    continue;
143                }
144            }
145
146            if let Ok(content) = fs::read_to_string(file_path) {
147                for (line_num, line) in content.lines().enumerate() {
148                    if regex.is_match(line) {
149                        let matches: Vec<String> = regex
150                            .find_iter(line)
151                            .map(|m| m.as_str().to_string())
152                            .collect();
153
154                        results.push(SearchResult {
155                            file_path: file_path.clone(),
156                            line_number: line_num + 1,
157                            line_content: line.to_string(),
158                            matches,
159                        });
160                    }
161                }
162            }
163        }
164
165        Ok(results)
166    }
167
168    /// Find files by name pattern
169    pub fn find_files(&self, pattern: &str) -> Result<Vec<String>> {
170        let regex = Regex::new(pattern)?;
171        let mut results = Vec::new();
172
173        for file_path in self.index_cache.keys() {
174            if regex.is_match(file_path) {
175                results.push(file_path.clone());
176            }
177        }
178
179        Ok(results)
180    }
181
182    /// Get file content with line numbers
183    pub fn get_file_content(
184        &self,
185        file_path: &str,
186        start_line: Option<usize>,
187        end_line: Option<usize>,
188    ) -> Result<String> {
189        let content = fs::read_to_string(file_path)?;
190        let lines: Vec<&str> = content.lines().collect();
191
192        let start = start_line.unwrap_or(1).saturating_sub(1);
193        let end = end_line.unwrap_or(lines.len());
194
195        let selected_lines = &lines[start..end.min(lines.len())];
196
197        let mut result = String::new();
198        for (i, line) in selected_lines.iter().enumerate() {
199            result.push_str(&format!("{}: {}\n", start + i + 1, line));
200        }
201
202        Ok(result)
203    }
204
205    /// List files in directory (like ls)
206    pub fn list_files(&self, dir_path: &str, show_hidden: bool) -> Result<Vec<String>> {
207        let path = Path::new(dir_path);
208        if !path.exists() {
209            return Ok(vec![]);
210        }
211
212        let mut files = Vec::new();
213
214        for entry in fs::read_dir(path)? {
215            let entry = entry?;
216            let file_name = entry.file_name().to_string_lossy().to_string();
217
218            if !show_hidden && file_name.starts_with('.') {
219                continue;
220            }
221
222            files.push(file_name);
223        }
224
225        Ok(files)
226    }
227
228    /// Grep-like search (like grep command)
229    pub fn grep(&self, pattern: &str, file_pattern: Option<&str>) -> Result<Vec<SearchResult>> {
230        let regex = Regex::new(pattern)?;
231        let mut results = Vec::new();
232
233        for (file_path, _) in &self.index_cache {
234            if let Some(fp) = file_pattern {
235                if !file_path.contains(fp) {
236                    continue;
237                }
238            }
239
240            if let Ok(content) = fs::read_to_string(file_path) {
241                for (line_num, line) in content.lines().enumerate() {
242                    if regex.is_match(line) {
243                        results.push(SearchResult {
244                            file_path: file_path.clone(),
245                            line_number: line_num + 1,
246                            line_content: line.to_string(),
247                            matches: vec![line.to_string()],
248                        });
249                    }
250                }
251            }
252        }
253
254        Ok(results)
255    }
256
257    // Helper methods
258
259    fn walk_directory<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
260    where
261        F: FnMut(&Path) -> Result<()>,
262    {
263        if !dir_path.exists() {
264            return Ok(());
265        }
266
267        for entry in fs::read_dir(dir_path)? {
268            let entry = entry?;
269            let path = entry.path();
270
271            if path.is_dir() {
272                // Skip common directories
273                if let Some(name) = path.file_name() {
274                    let name_str = name.to_string_lossy();
275                    if name_str.starts_with('.')
276                        || name_str == "target"
277                        || name_str == "node_modules"
278                    {
279                        continue;
280                    }
281                }
282                self.walk_directory(&path, callback)?;
283            } else if path.is_file() {
284                callback(&path)?;
285            }
286        }
287
288        Ok(())
289    }
290
291    fn calculate_hash(&self, content: &str) -> String {
292        use std::collections::hash_map::DefaultHasher;
293        use std::hash::{Hash, Hasher};
294
295        let mut hasher = DefaultHasher::new();
296        content.hash(&mut hasher);
297        format!("{:x}", hasher.finish())
298    }
299
300    fn get_modified_time(&self, file_path: &Path) -> Result<u64> {
301        let metadata = fs::metadata(file_path)?;
302        let modified = metadata.modified()?;
303        Ok(modified.duration_since(SystemTime::UNIX_EPOCH)?.as_secs())
304    }
305
306    fn detect_language(&self, file_path: &Path) -> String {
307        file_path
308            .extension()
309            .and_then(|ext| ext.to_str())
310            .unwrap_or("unknown")
311            .to_string()
312    }
313
314    fn save_index_to_markdown(&self, index: &FileIndex) -> Result<()> {
315        let file_name = format!("{}.md", self.calculate_hash(&index.path));
316        let index_path = self.index_dir.join(file_name);
317
318        let markdown = format!(
319            "# File Index: {}\n\n\
320            - **Path**: {}\n\
321            - **Hash**: {}\n\
322            - **Modified**: {}\n\
323            - **Size**: {} bytes\n\
324            - **Language**: {}\n\
325            - **Tags**: {}\n\n",
326            index.path,
327            index.path,
328            index.hash,
329            index.modified,
330            index.size,
331            index.language,
332            index.tags.join(", ")
333        );
334
335        fs::write(index_path, markdown)?;
336        Ok(())
337    }
338}