vtcode_core/
simple_indexer.rs

1//! Simple file indexer using regex and markdown storage
2//!
3//! This module provides a simple, direct approach to code indexing and retrieval
4//! using regex patterns and markdown files for storage. No complex embeddings
5//! or databases - just direct file operations like a human using bash.
6
7use anyhow::Result;
8use regex::Regex;
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::fs;
12use std::path::{Path, PathBuf};
13use std::time::SystemTime;
14
15/// Simple file index entry
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct FileIndex {
18    /// File path
19    pub path: String,
20    /// File content hash for change detection
21    pub hash: String,
22    /// Last modified timestamp
23    pub modified: u64,
24    /// File size
25    pub size: u64,
26    /// Language/extension
27    pub language: String,
28    /// Simple tags
29    pub tags: Vec<String>,
30}
31
32/// Simple search result
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct SearchResult {
35    pub file_path: String,
36    pub line_number: usize,
37    pub line_content: String,
38    pub matches: Vec<String>,
39}
40
41/// Simple file indexer
42#[derive(Clone)]
43pub struct SimpleIndexer {
44    /// Index storage directory
45    index_dir: PathBuf,
46    /// Workspace root
47    workspace_root: PathBuf,
48    /// In-memory index cache
49    index_cache: HashMap<String, FileIndex>,
50}
51
52impl SimpleIndexer {
53    /// Create a new simple indexer
54    pub fn new(workspace_root: PathBuf) -> Self {
55        let index_dir = workspace_root.join(".vtcode").join("index");
56
57        Self {
58            index_dir,
59            workspace_root,
60            index_cache: HashMap::new(),
61        }
62    }
63
64    /// Initialize the index directory
65    pub fn init(&self) -> Result<()> {
66        fs::create_dir_all(&self.index_dir)?;
67        Ok(())
68    }
69
70    /// Get the workspace root path
71    pub fn workspace_root(&self) -> &Path {
72        &self.workspace_root
73    }
74
75    /// Index a single file
76    pub fn index_file(&mut self, file_path: &Path) -> Result<()> {
77        if !file_path.exists() || !file_path.is_file() {
78            return Ok(());
79        }
80
81        let content = fs::read_to_string(file_path)?;
82        let hash = self.calculate_hash(&content);
83        let modified = self.get_modified_time(file_path)?;
84        let size = content.len() as u64;
85        let language = self.detect_language(file_path);
86
87        let index = FileIndex {
88            path: file_path.to_string_lossy().to_string(),
89            hash,
90            modified,
91            size,
92            language,
93            tags: vec![],
94        };
95
96        self.index_cache
97            .insert(file_path.to_string_lossy().to_string(), index.clone());
98
99        // Save to markdown file
100        self.save_index_to_markdown(&index)?;
101
102        Ok(())
103    }
104
105    /// Index all files in directory recursively
106    pub fn index_directory(&mut self, dir_path: &Path) -> Result<()> {
107        let mut file_paths = Vec::new();
108
109        // First pass: collect all file paths
110        self.walk_directory(dir_path, &mut |file_path| {
111            file_paths.push(file_path.to_path_buf());
112            Ok(())
113        })?;
114
115        // Second pass: index each file
116        for file_path in file_paths {
117            self.index_file(&file_path)?;
118        }
119
120        Ok(())
121    }
122
123    /// Search files using regex pattern
124    pub fn search(&self, pattern: &str, path_filter: Option<&str>) -> Result<Vec<SearchResult>> {
125        let regex = Regex::new(pattern)?;
126
127        let mut results = Vec::new();
128
129        // Search through indexed files
130        for (file_path, _) in &self.index_cache {
131            if let Some(filter) = path_filter {
132                if !file_path.contains(filter) {
133                    continue;
134                }
135            }
136
137            if let Ok(content) = fs::read_to_string(file_path) {
138                for (line_num, line) in content.lines().enumerate() {
139                    if regex.is_match(line) {
140                        let matches: Vec<String> = regex
141                            .find_iter(line)
142                            .map(|m| m.as_str().to_string())
143                            .collect();
144
145                        results.push(SearchResult {
146                            file_path: file_path.clone(),
147                            line_number: line_num + 1,
148                            line_content: line.to_string(),
149                            matches,
150                        });
151                    }
152                }
153            }
154        }
155
156        Ok(results)
157    }
158
159    /// Find files by name pattern
160    pub fn find_files(&self, pattern: &str) -> Result<Vec<String>> {
161        let regex = Regex::new(pattern)?;
162        let mut results = Vec::new();
163
164        for file_path in self.index_cache.keys() {
165            if regex.is_match(file_path) {
166                results.push(file_path.clone());
167            }
168        }
169
170        Ok(results)
171    }
172
173    /// Get file content with line numbers
174    pub fn get_file_content(
175        &self,
176        file_path: &str,
177        start_line: Option<usize>,
178        end_line: Option<usize>,
179    ) -> Result<String> {
180        let content = fs::read_to_string(file_path)?;
181        let lines: Vec<&str> = content.lines().collect();
182
183        let start = start_line.unwrap_or(1).saturating_sub(1);
184        let end = end_line.unwrap_or(lines.len());
185
186        let selected_lines = &lines[start..end.min(lines.len())];
187
188        let mut result = String::new();
189        for (i, line) in selected_lines.iter().enumerate() {
190            result.push_str(&format!("{}: {}\n", start + i + 1, line));
191        }
192
193        Ok(result)
194    }
195
196    /// List files in directory (like ls)
197    pub fn list_files(&self, dir_path: &str, show_hidden: bool) -> Result<Vec<String>> {
198        let path = Path::new(dir_path);
199        if !path.exists() {
200            return Ok(vec![]);
201        }
202
203        let mut files = Vec::new();
204
205        for entry in fs::read_dir(path)? {
206            let entry = entry?;
207            let file_name = entry.file_name().to_string_lossy().to_string();
208
209            if !show_hidden && file_name.starts_with('.') {
210                continue;
211            }
212
213            files.push(file_name);
214        }
215
216        Ok(files)
217    }
218
219    /// Grep-like search (like grep command)
220    pub fn grep(&self, pattern: &str, file_pattern: Option<&str>) -> Result<Vec<SearchResult>> {
221        let regex = Regex::new(pattern)?;
222        let mut results = Vec::new();
223
224        for (file_path, _) in &self.index_cache {
225            if let Some(fp) = file_pattern {
226                if !file_path.contains(fp) {
227                    continue;
228                }
229            }
230
231            if let Ok(content) = fs::read_to_string(file_path) {
232                for (line_num, line) in content.lines().enumerate() {
233                    if regex.is_match(line) {
234                        results.push(SearchResult {
235                            file_path: file_path.clone(),
236                            line_number: line_num + 1,
237                            line_content: line.to_string(),
238                            matches: vec![line.to_string()],
239                        });
240                    }
241                }
242            }
243        }
244
245        Ok(results)
246    }
247
248    // Helper methods
249
250    fn walk_directory<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
251    where
252        F: FnMut(&Path) -> Result<()>,
253    {
254        if !dir_path.exists() {
255            return Ok(());
256        }
257
258        for entry in fs::read_dir(dir_path)? {
259            let entry = entry?;
260            let path = entry.path();
261
262            if path.is_dir() {
263                // Skip common directories
264                if let Some(name) = path.file_name() {
265                    let name_str = name.to_string_lossy();
266                    if name_str.starts_with('.')
267                        || name_str == "target"
268                        || name_str == "node_modules"
269                    {
270                        continue;
271                    }
272                }
273                self.walk_directory(&path, callback)?;
274            } else if path.is_file() {
275                callback(&path)?;
276            }
277        }
278
279        Ok(())
280    }
281
282    fn calculate_hash(&self, content: &str) -> String {
283        use std::collections::hash_map::DefaultHasher;
284        use std::hash::{Hash, Hasher};
285
286        let mut hasher = DefaultHasher::new();
287        content.hash(&mut hasher);
288        format!("{:x}", hasher.finish())
289    }
290
291    fn get_modified_time(&self, file_path: &Path) -> Result<u64> {
292        let metadata = fs::metadata(file_path)?;
293        let modified = metadata.modified()?;
294        Ok(modified.duration_since(SystemTime::UNIX_EPOCH)?.as_secs())
295    }
296
297    fn detect_language(&self, file_path: &Path) -> String {
298        file_path
299            .extension()
300            .and_then(|ext| ext.to_str())
301            .unwrap_or("unknown")
302            .to_string()
303    }
304
305    fn save_index_to_markdown(&self, index: &FileIndex) -> Result<()> {
306        let file_name = format!("{}.md", self.calculate_hash(&index.path));
307        let index_path = self.index_dir.join(file_name);
308
309        let markdown = format!(
310            "# File Index: {}\n\n\
311            - **Path**: {}\n\
312            - **Hash**: {}\n\
313            - **Modified**: {}\n\
314            - **Size**: {} bytes\n\
315            - **Language**: {}\n\
316            - **Tags**: {}\n\n",
317            index.path,
318            index.path,
319            index.hash,
320            index.modified,
321            index.size,
322            index.language,
323            index.tags.join(", ")
324        );
325
326        fs::write(index_path, markdown)?;
327        Ok(())
328    }
329}