1use anyhow::Result;
9use regex::Regex;
10use serde::{Deserialize, Serialize};
11use std::collections::HashMap;
12use std::fs;
13use std::io::ErrorKind;
14use std::path::{Path, PathBuf};
15use std::time::SystemTime;
16
17#[derive(Clone, Debug)]
19pub struct SimpleIndexerConfig {
20 workspace_root: PathBuf,
21 index_dir: PathBuf,
22 ignore_hidden: bool,
23 excluded_dirs: Vec<PathBuf>,
24 allowed_dirs: Vec<PathBuf>,
25}
26
27impl SimpleIndexerConfig {
28 pub fn new(workspace_root: PathBuf) -> Self {
30 let index_dir = workspace_root.join(".vtcode").join("index");
31 let vtcode_dir = workspace_root.join(".vtcode");
32 let external_dir = vtcode_dir.join("external");
33
34 let mut excluded_dirs = vec![
35 index_dir.clone(),
36 vtcode_dir,
37 workspace_root.join("target"),
38 workspace_root.join("node_modules"),
39 ];
40
41 excluded_dirs.dedup();
42
43 Self {
44 workspace_root,
45 index_dir,
46 ignore_hidden: true,
47 excluded_dirs,
48 allowed_dirs: vec![external_dir],
49 }
50 }
51
52 pub fn with_index_dir(mut self, index_dir: impl Into<PathBuf>) -> Self {
54 let index_dir = index_dir.into();
55 self.index_dir = index_dir.clone();
56 self.push_unique_excluded(index_dir);
57 self
58 }
59
60 pub fn add_allowed_dir(mut self, path: impl Into<PathBuf>) -> Self {
62 let path = path.into();
63 if !self.allowed_dirs.iter().any(|existing| existing == &path) {
64 self.allowed_dirs.push(path);
65 }
66 self
67 }
68
69 pub fn add_excluded_dir(mut self, path: impl Into<PathBuf>) -> Self {
71 let path = path.into();
72 self.push_unique_excluded(path);
73 self
74 }
75
76 pub fn ignore_hidden(mut self, ignore_hidden: bool) -> Self {
78 self.ignore_hidden = ignore_hidden;
79 self
80 }
81
82 pub fn workspace_root(&self) -> &Path {
84 &self.workspace_root
85 }
86
87 pub fn index_dir(&self) -> &Path {
89 &self.index_dir
90 }
91
92 fn push_unique_excluded(&mut self, path: PathBuf) {
93 if !self.excluded_dirs.iter().any(|existing| existing == &path) {
94 self.excluded_dirs.push(path);
95 }
96 }
97}
98
99#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct FileIndex {
102 pub path: String,
104 pub hash: String,
106 pub modified: u64,
108 pub size: u64,
110 pub language: String,
112 pub tags: Vec<String>,
114}
115
116#[derive(Debug, Clone, Serialize, Deserialize)]
118pub struct SearchResult {
119 pub file_path: String,
120 pub line_number: usize,
121 pub line_content: String,
122 pub matches: Vec<String>,
123}
124
125#[derive(Clone)]
127pub struct SimpleIndexer {
128 config: SimpleIndexerConfig,
129 index_cache: HashMap<String, FileIndex>,
130}
131
132impl SimpleIndexer {
133 pub fn new(workspace_root: PathBuf) -> Self {
135 Self::with_config(SimpleIndexerConfig::new(workspace_root))
136 }
137
138 pub fn with_config(config: SimpleIndexerConfig) -> Self {
140 Self {
141 config,
142 index_cache: HashMap::new(),
143 }
144 }
145
146 pub fn with_index_dir(workspace_root: PathBuf, index_dir: PathBuf) -> Self {
148 let config = SimpleIndexerConfig::new(workspace_root).with_index_dir(index_dir);
149 Self::with_config(config)
150 }
151
152 pub fn init(&self) -> Result<()> {
154 fs::create_dir_all(self.config.index_dir())?;
155 Ok(())
156 }
157
158 pub fn workspace_root(&self) -> &Path {
160 self.config.workspace_root()
161 }
162
163 pub fn index_dir(&self) -> &Path {
165 self.config.index_dir()
166 }
167
168 pub fn index_file(&mut self, file_path: &Path) -> Result<()> {
170 if !file_path.exists() || !file_path.is_file() {
171 return Ok(());
172 }
173
174 let content = match fs::read_to_string(file_path) {
175 Ok(text) => text,
176 Err(err) => {
177 if err.kind() == ErrorKind::InvalidData {
178 return Ok(());
179 }
180 return Err(err.into());
181 }
182 };
183 let hash = self.calculate_hash(&content);
184 let modified = self.get_modified_time(file_path)?;
185 let size = content.len() as u64;
186 let language = self.detect_language(file_path);
187
188 let index = FileIndex {
189 path: file_path.to_string_lossy().to_string(),
190 hash,
191 modified,
192 size,
193 language,
194 tags: vec![],
195 };
196
197 self.index_cache
198 .insert(file_path.to_string_lossy().to_string(), index.clone());
199
200 self.save_index_to_markdown(&index)?;
202
203 Ok(())
204 }
205
206 pub fn index_directory(&mut self, dir_path: &Path) -> Result<()> {
208 let mut file_paths = Vec::new();
209
210 self.walk_directory(dir_path, &mut |file_path| {
212 file_paths.push(file_path.to_path_buf());
213 Ok(())
214 })?;
215
216 for file_path in file_paths {
218 self.index_file(&file_path)?;
219 }
220
221 Ok(())
222 }
223
224 pub fn search(&self, pattern: &str, path_filter: Option<&str>) -> Result<Vec<SearchResult>> {
226 let regex = Regex::new(pattern)?;
227
228 let mut results = Vec::new();
229
230 for file_path in self.index_cache.keys() {
232 if path_filter.is_some_and(|filter| !file_path.contains(filter)) {
233 continue;
234 }
235
236 if let Ok(content) = fs::read_to_string(file_path) {
237 for (line_num, line) in content.lines().enumerate() {
238 if regex.is_match(line) {
239 let matches: Vec<String> = regex
240 .find_iter(line)
241 .map(|m| m.as_str().to_string())
242 .collect();
243
244 results.push(SearchResult {
245 file_path: file_path.clone(),
246 line_number: line_num + 1,
247 line_content: line.to_string(),
248 matches,
249 });
250 }
251 }
252 }
253 }
254
255 Ok(results)
256 }
257
258 pub fn find_files(&self, pattern: &str) -> Result<Vec<String>> {
260 let regex = Regex::new(pattern)?;
261 let mut results = Vec::new();
262
263 for file_path in self.index_cache.keys() {
264 if regex.is_match(file_path) {
265 results.push(file_path.clone());
266 }
267 }
268
269 Ok(results)
270 }
271
272 pub fn get_file_content(
274 &self,
275 file_path: &str,
276 start_line: Option<usize>,
277 end_line: Option<usize>,
278 ) -> Result<String> {
279 let content = fs::read_to_string(file_path)?;
280 let lines: Vec<&str> = content.lines().collect();
281
282 let start = start_line.unwrap_or(1).saturating_sub(1);
283 let end = end_line.unwrap_or(lines.len());
284
285 let selected_lines = &lines[start..end.min(lines.len())];
286
287 let mut result = String::new();
288 for (i, line) in selected_lines.iter().enumerate() {
289 result.push_str(&format!("{}: {}\n", start + i + 1, line));
290 }
291
292 Ok(result)
293 }
294
295 pub fn list_files(&self, dir_path: &str, show_hidden: bool) -> Result<Vec<String>> {
297 let path = Path::new(dir_path);
298 if !path.exists() {
299 return Ok(vec![]);
300 }
301
302 let mut files = Vec::new();
303
304 for entry in fs::read_dir(path)? {
305 let entry = entry?;
306 let file_name = entry.file_name().to_string_lossy().to_string();
307
308 if !show_hidden && file_name.starts_with('.') {
309 continue;
310 }
311
312 files.push(file_name);
313 }
314
315 Ok(files)
316 }
317
318 pub fn grep(&self, pattern: &str, file_pattern: Option<&str>) -> Result<Vec<SearchResult>> {
320 let regex = Regex::new(pattern)?;
321 let mut results = Vec::new();
322
323 for file_path in self.index_cache.keys() {
324 if file_pattern.is_some_and(|fp| !file_path.contains(fp)) {
325 continue;
326 }
327
328 if let Ok(content) = fs::read_to_string(file_path) {
329 for (line_num, line) in content.lines().enumerate() {
330 if regex.is_match(line) {
331 results.push(SearchResult {
332 file_path: file_path.clone(),
333 line_number: line_num + 1,
334 line_content: line.to_string(),
335 matches: vec![line.to_string()],
336 });
337 }
338 }
339 }
340 }
341
342 Ok(results)
343 }
344
345 fn walk_directory<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
346 where
347 F: FnMut(&Path) -> Result<()>,
348 {
349 if !dir_path.exists() {
350 return Ok(());
351 }
352
353 self.walk_directory_internal(dir_path, callback)
354 }
355
356 fn walk_directory_internal<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
357 where
358 F: FnMut(&Path) -> Result<()>,
359 {
360 for entry in fs::read_dir(dir_path)? {
361 let entry = entry?;
362 let path = entry.path();
363
364 if path.is_dir() {
365 if self.is_allowed_dir(&path) {
366 self.walk_directory_internal(&path, callback)?;
367 continue;
368 }
369
370 if self.should_skip_dir(&path) {
371 self.walk_allowed_descendants(&path, callback)?;
372 continue;
373 }
374
375 self.walk_directory_internal(&path, callback)?;
376 } else if path.is_file() {
377 callback(&path)?;
378 }
379 }
380
381 Ok(())
382 }
383
384 fn is_allowed_dir(&self, path: &Path) -> bool {
385 self.config
386 .allowed_dirs
387 .iter()
388 .any(|allowed| path.starts_with(allowed))
389 }
390
391 fn walk_allowed_descendants<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
392 where
393 F: FnMut(&Path) -> Result<()>,
394 {
395 let allowed_dirs = self.config.allowed_dirs.clone();
396 for allowed in allowed_dirs {
397 if allowed.starts_with(dir_path) && allowed.exists() {
398 self.walk_directory_internal(&allowed, callback)?;
399 }
400 }
401 Ok(())
402 }
403
404 fn should_skip_dir(&self, path: &Path) -> bool {
405 if self
406 .config
407 .allowed_dirs
408 .iter()
409 .any(|allowed| path.starts_with(allowed))
410 {
411 return false;
412 }
413
414 if self
415 .config
416 .excluded_dirs
417 .iter()
418 .any(|excluded| path.starts_with(excluded))
419 {
420 return true;
421 }
422
423 if self.config.ignore_hidden
424 && path
425 .file_name()
426 .and_then(|name| name.to_str())
427 .is_some_and(|name_str| name_str.starts_with('.'))
428 {
429 return true;
430 }
431
432 false
433 }
434
435 fn calculate_hash(&self, content: &str) -> String {
436 use std::collections::hash_map::DefaultHasher;
437 use std::hash::{Hash, Hasher};
438
439 let mut hasher = DefaultHasher::new();
440 content.hash(&mut hasher);
441 format!("{:x}", hasher.finish())
442 }
443
444 fn get_modified_time(&self, file_path: &Path) -> Result<u64> {
445 let metadata = fs::metadata(file_path)?;
446 let modified = metadata.modified()?;
447 Ok(modified.duration_since(SystemTime::UNIX_EPOCH)?.as_secs())
448 }
449
450 fn detect_language(&self, file_path: &Path) -> String {
451 file_path
452 .extension()
453 .and_then(|ext| ext.to_str())
454 .unwrap_or("unknown")
455 .to_string()
456 }
457
458 fn save_index_to_markdown(&self, index: &FileIndex) -> Result<()> {
459 let file_name = format!("{}.md", self.calculate_hash(&index.path));
460 let index_path = self.config.index_dir().join(file_name);
461
462 let markdown = format!(
463 "# File Index: {}\n\n\
464 - **Path**: {}\n\
465 - **Hash**: {}\n\
466 - **Modified**: {}\n\
467 - **Size**: {} bytes\n\
468 - **Language**: {}\n\
469 - **Tags**: {}\n\n",
470 index.path,
471 index.path,
472 index.hash,
473 index.modified,
474 index.size,
475 index.language,
476 index.tags.join(", ")
477 );
478
479 fs::write(index_path, markdown)?;
480 Ok(())
481 }
482}
483
484#[cfg(test)]
485mod tests {
486 use super::*;
487 use std::fs;
488 use tempfile::tempdir;
489
490 #[test]
491 fn skips_hidden_directories_by_default() -> Result<()> {
492 let temp = tempdir()?;
493 let workspace = temp.path();
494 let hidden_dir = workspace.join(".private");
495 fs::create_dir_all(&hidden_dir)?;
496 fs::write(hidden_dir.join("secret.txt"), "classified")?;
497
498 let visible_dir = workspace.join("src");
499 fs::create_dir_all(&visible_dir)?;
500 fs::write(visible_dir.join("lib.rs"), "fn main() {}")?;
501
502 let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
503 indexer.init()?;
504 indexer.index_directory(workspace)?;
505
506 assert!(indexer.find_files("secret\\.txt$")?.is_empty());
507 assert!(!indexer.find_files("lib\\.rs$")?.is_empty());
508
509 Ok(())
510 }
511
512 #[test]
513 fn can_include_hidden_directories_when_configured() -> Result<()> {
514 let temp = tempdir()?;
515 let workspace = temp.path();
516 let hidden_dir = workspace.join(".cache");
517 fs::create_dir_all(&hidden_dir)?;
518 fs::write(hidden_dir.join("data.log"), "details")?;
519
520 let config = SimpleIndexerConfig::new(workspace.to_path_buf()).ignore_hidden(false);
521 let mut indexer = SimpleIndexer::with_config(config);
522 indexer.init()?;
523 indexer.index_directory(workspace)?;
524
525 let results = indexer.find_files("data\\.log$")?;
526 assert_eq!(results.len(), 1);
527
528 Ok(())
529 }
530}