1use anyhow::Result;
9use ignore::WalkBuilder;
10use regex::Regex;
11use serde::{Deserialize, Serialize};
12use std::collections::HashMap;
13use std::fs;
14use std::io::ErrorKind;
15use std::path::{Path, PathBuf};
16use std::sync::Arc;
17use std::time::SystemTime;
18
19pub trait IndexStorage: Send + Sync {
21    fn init(&self, index_dir: &Path) -> Result<()>;
23
24    fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()>;
26}
27
28pub trait TraversalFilter: Send + Sync {
30    fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
32
33    fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
35}
36
37#[derive(Debug, Default, Clone)]
39pub struct MarkdownIndexStorage;
40
41impl IndexStorage for MarkdownIndexStorage {
42    fn init(&self, index_dir: &Path) -> Result<()> {
43        fs::create_dir_all(index_dir)?;
44        Ok(())
45    }
46
47    fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()> {
48        let file_name = format!("{}.md", calculate_hash(&entry.path));
49        let index_path = index_dir.join(file_name);
50
51        let markdown = format!(
52            "# File Index: {}\n\n\
53            - **Path**: {}\n\
54            - **Hash**: {}\n\
55            - **Modified**: {}\n\
56            - **Size**: {} bytes\n\
57            - **Language**: {}\n\
58            - **Tags**: {}\n\n",
59            entry.path,
60            entry.path,
61            entry.hash,
62            entry.modified,
63            entry.size,
64            entry.language,
65            entry.tags.join(", ")
66        );
67
68        fs::write(index_path, markdown)?;
69        Ok(())
70    }
71}
72
73#[derive(Debug, Default, Clone)]
75pub struct ConfigTraversalFilter;
76
77impl TraversalFilter for ConfigTraversalFilter {
78    fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
79        !should_skip_dir(path, config)
80    }
81
82    fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
83        if !path.is_file() {
84            return false;
85        }
86
87        if config.ignore_hidden {
89            if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
90                if file_name.starts_with('.') {
91                    return false;
92                }
93            }
94        }
95
96        if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
98            let sensitive_files = [
99                ".env",
100                ".env.local",
101                ".env.production",
102                ".env.development",
103                ".env.test",
104                ".git",
105                ".gitignore",
106                ".DS_Store",
107            ];
108
109            if sensitive_files
110                .iter()
111                .any(|s| file_name == *s || file_name.starts_with(".env."))
112            {
113                return false;
114            }
115        }
116
117        true
118    }
119}
120
121#[derive(Clone, Debug)]
123pub struct SimpleIndexerConfig {
124    workspace_root: PathBuf,
125    index_dir: PathBuf,
126    ignore_hidden: bool,
127    excluded_dirs: Vec<PathBuf>,
128    allowed_dirs: Vec<PathBuf>,
129}
130
131impl SimpleIndexerConfig {
132    pub fn new(workspace_root: PathBuf) -> Self {
134        let index_dir = workspace_root.join(".vtcode").join("index");
135        let vtcode_dir = workspace_root.join(".vtcode");
136        let external_dir = vtcode_dir.join("external");
137
138        let mut excluded_dirs = vec![
139            index_dir.clone(),
140            vtcode_dir,
141            workspace_root.join("target"),
142            workspace_root.join("node_modules"),
143        ];
144
145        excluded_dirs.dedup();
146
147        Self {
148            workspace_root,
149            index_dir,
150            ignore_hidden: true,
151            excluded_dirs,
152            allowed_dirs: vec![external_dir],
153        }
154    }
155
156    pub fn with_index_dir(mut self, index_dir: impl Into<PathBuf>) -> Self {
158        let index_dir = index_dir.into();
159        self.index_dir = index_dir.clone();
160        self.push_unique_excluded(index_dir);
161        self
162    }
163
164    pub fn add_allowed_dir(mut self, path: impl Into<PathBuf>) -> Self {
166        let path = path.into();
167        if !self.allowed_dirs.iter().any(|existing| existing == &path) {
168            self.allowed_dirs.push(path);
169        }
170        self
171    }
172
173    pub fn add_excluded_dir(mut self, path: impl Into<PathBuf>) -> Self {
175        let path = path.into();
176        self.push_unique_excluded(path);
177        self
178    }
179
180    pub fn ignore_hidden(mut self, ignore_hidden: bool) -> Self {
182        self.ignore_hidden = ignore_hidden;
183        self
184    }
185
186    pub fn workspace_root(&self) -> &Path {
188        &self.workspace_root
189    }
190
191    pub fn index_dir(&self) -> &Path {
193        &self.index_dir
194    }
195
196    fn push_unique_excluded(&mut self, path: PathBuf) {
197        if !self.excluded_dirs.iter().any(|existing| existing == &path) {
198            self.excluded_dirs.push(path);
199        }
200    }
201}
202
203#[derive(Debug, Clone, Serialize, Deserialize)]
205pub struct FileIndex {
206    pub path: String,
208    pub hash: String,
210    pub modified: u64,
212    pub size: u64,
214    pub language: String,
216    pub tags: Vec<String>,
218}
219
220#[derive(Debug, Clone, Serialize, Deserialize)]
222pub struct SearchResult {
223    pub file_path: String,
224    pub line_number: usize,
225    pub line_content: String,
226    pub matches: Vec<String>,
227}
228
229pub struct SimpleIndexer {
231    config: SimpleIndexerConfig,
232    index_cache: HashMap<String, FileIndex>,
233    storage: Arc<dyn IndexStorage>,
234    filter: Arc<dyn TraversalFilter>,
235}
236
237impl SimpleIndexer {
238    pub fn new(workspace_root: PathBuf) -> Self {
240        Self::with_components(
241            SimpleIndexerConfig::new(workspace_root),
242            Arc::new(MarkdownIndexStorage),
243            Arc::new(ConfigTraversalFilter),
244        )
245    }
246
247    pub fn with_config(config: SimpleIndexerConfig) -> Self {
249        Self::with_components(
250            config,
251            Arc::new(MarkdownIndexStorage),
252            Arc::new(ConfigTraversalFilter),
253        )
254    }
255
256    pub fn with_index_dir(workspace_root: PathBuf, index_dir: PathBuf) -> Self {
258        let config = SimpleIndexerConfig::new(workspace_root).with_index_dir(index_dir);
259        Self::with_config(config)
260    }
261
262    pub fn with_components(
264        config: SimpleIndexerConfig,
265        storage: Arc<dyn IndexStorage>,
266        filter: Arc<dyn TraversalFilter>,
267    ) -> Self {
268        Self {
269            config,
270            index_cache: HashMap::new(),
271            storage,
272            filter,
273        }
274    }
275
276    pub fn with_storage(self, storage: Arc<dyn IndexStorage>) -> Self {
278        Self { storage, ..self }
279    }
280
281    pub fn with_filter(self, filter: Arc<dyn TraversalFilter>) -> Self {
283        Self { filter, ..self }
284    }
285
286    pub fn init(&self) -> Result<()> {
288        self.storage.init(self.config.index_dir())
289    }
290
291    pub fn workspace_root(&self) -> &Path {
293        self.config.workspace_root()
294    }
295
296    pub fn index_dir(&self) -> &Path {
298        self.config.index_dir()
299    }
300
301    pub fn index_file(&mut self, file_path: &Path) -> Result<()> {
303        if !file_path.exists() || !self.filter.should_index_file(file_path, &self.config) {
304            return Ok(());
305        }
306
307        let content = match fs::read_to_string(file_path) {
308            Ok(text) => text,
309            Err(err) => {
310                if err.kind() == ErrorKind::InvalidData {
311                    return Ok(());
312                }
313                return Err(err.into());
314            }
315        };
316        let hash = calculate_hash(&content);
317        let modified = self.get_modified_time(file_path)?;
318        let size = content.len() as u64;
319        let language = self.detect_language(file_path);
320
321        let index = FileIndex {
322            path: file_path.to_string_lossy().to_string(),
323            hash,
324            modified,
325            size,
326            language,
327            tags: vec![],
328        };
329
330        self.index_cache
331            .insert(file_path.to_string_lossy().to_string(), index.clone());
332
333        self.storage.persist(self.config.index_dir(), &index)?;
334
335        Ok(())
336    }
337
338    pub fn index_directory(&mut self, dir_path: &Path) -> Result<()> {
342        let walker = WalkBuilder::new(dir_path)
343            .hidden(true) .git_ignore(true) .git_global(true) .git_exclude(true) .ignore(true) .parents(true) .build();
350
351        for entry in walker.filter_map(|e| e.ok()) {
352            let path = entry.path();
353
354            if entry.file_type().map_or(false, |ft| ft.is_file()) {
356                let should_skip = self
358                    .config
359                    .excluded_dirs
360                    .iter()
361                    .any(|excluded| path.starts_with(excluded));
362
363                if !should_skip && self.filter.should_index_file(path, &self.config) {
364                    self.index_file(path)?;
365                }
366            }
367        }
368
369        Ok(())
370    }
371
372    pub fn search(&self, pattern: &str, path_filter: Option<&str>) -> Result<Vec<SearchResult>> {
374        let regex = Regex::new(pattern)?;
375
376        let mut results = Vec::new();
377
378        for file_path in self.index_cache.keys() {
380            if path_filter.is_some_and(|filter| !file_path.contains(filter)) {
381                continue;
382            }
383
384            if let Ok(content) = fs::read_to_string(file_path) {
385                for (line_num, line) in content.lines().enumerate() {
386                    if regex.is_match(line) {
387                        let matches: Vec<String> = regex
388                            .find_iter(line)
389                            .map(|m| m.as_str().to_string())
390                            .collect();
391
392                        results.push(SearchResult {
393                            file_path: file_path.clone(),
394                            line_number: line_num + 1,
395                            line_content: line.to_string(),
396                            matches,
397                        });
398                    }
399                }
400            }
401        }
402
403        Ok(results)
404    }
405
406    pub fn find_files(&self, pattern: &str) -> Result<Vec<String>> {
408        let regex = Regex::new(pattern)?;
409        let mut results = Vec::new();
410
411        for file_path in self.index_cache.keys() {
412            if regex.is_match(file_path) {
413                results.push(file_path.clone());
414            }
415        }
416
417        Ok(results)
418    }
419
420    pub fn all_files(&self) -> Vec<String> {
423        self.index_cache.keys().cloned().collect()
424    }
425
426    pub fn get_file_content(
428        &self,
429        file_path: &str,
430        start_line: Option<usize>,
431        end_line: Option<usize>,
432    ) -> Result<String> {
433        let content = fs::read_to_string(file_path)?;
434        let lines: Vec<&str> = content.lines().collect();
435
436        let start = start_line.unwrap_or(1).saturating_sub(1);
437        let end = end_line.unwrap_or(lines.len());
438
439        let selected_lines = &lines[start..end.min(lines.len())];
440
441        let mut result = String::new();
442        for (i, line) in selected_lines.iter().enumerate() {
443            result.push_str(&format!("{}: {}\n", start + i + 1, line));
444        }
445
446        Ok(result)
447    }
448
449    pub fn list_files(&self, dir_path: &str, show_hidden: bool) -> Result<Vec<String>> {
451        let path = Path::new(dir_path);
452        if !path.exists() {
453            return Ok(vec![]);
454        }
455
456        let mut files = Vec::new();
457
458        for entry in fs::read_dir(path)? {
459            let entry = entry?;
460            let file_name = entry.file_name().to_string_lossy().to_string();
461
462            if !show_hidden && file_name.starts_with('.') {
463                continue;
464            }
465
466            files.push(file_name);
467        }
468
469        Ok(files)
470    }
471
472    pub fn grep(&self, pattern: &str, file_pattern: Option<&str>) -> Result<Vec<SearchResult>> {
474        let regex = Regex::new(pattern)?;
475        let mut results = Vec::new();
476
477        for file_path in self.index_cache.keys() {
478            if file_pattern.is_some_and(|fp| !file_path.contains(fp)) {
479                continue;
480            }
481
482            if let Ok(content) = fs::read_to_string(file_path) {
483                for (line_num, line) in content.lines().enumerate() {
484                    if regex.is_match(line) {
485                        results.push(SearchResult {
486                            file_path: file_path.clone(),
487                            line_number: line_num + 1,
488                            line_content: line.to_string(),
489                            matches: vec![line.to_string()],
490                        });
491                    }
492                }
493            }
494        }
495
496        Ok(results)
497    }
498
499    #[allow(dead_code)]
500    fn walk_directory<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
501    where
502        F: FnMut(&Path) -> Result<()>,
503    {
504        if !dir_path.exists() {
505            return Ok(());
506        }
507
508        self.walk_directory_internal(dir_path, callback)
509    }
510
511    #[allow(dead_code)]
512    fn walk_directory_internal<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
513    where
514        F: FnMut(&Path) -> Result<()>,
515    {
516        for entry in fs::read_dir(dir_path)? {
517            let entry = entry?;
518            let path = entry.path();
519
520            if path.is_dir() {
521                if self.is_allowed_dir(&path) {
522                    self.walk_directory_internal(&path, callback)?;
523                    continue;
524                }
525
526                if !self.filter.should_descend(&path, &self.config) {
527                    self.walk_allowed_descendants(&path, callback)?;
528                    continue;
529                }
530
531                self.walk_directory_internal(&path, callback)?;
532            } else if path.is_file() {
533                callback(&path)?;
534            }
535        }
536
537        Ok(())
538    }
539
540    #[allow(dead_code)]
541    fn is_allowed_dir(&self, path: &Path) -> bool {
542        self.config
543            .allowed_dirs
544            .iter()
545            .any(|allowed| path.starts_with(allowed))
546    }
547
548    #[allow(dead_code)]
549    fn walk_allowed_descendants<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
550    where
551        F: FnMut(&Path) -> Result<()>,
552    {
553        let allowed_dirs = self.config.allowed_dirs.clone();
554        for allowed in allowed_dirs {
555            if allowed.starts_with(dir_path) && allowed.exists() {
556                self.walk_directory_internal(&allowed, callback)?;
557            }
558        }
559        Ok(())
560    }
561
562    fn get_modified_time(&self, file_path: &Path) -> Result<u64> {
563        let metadata = fs::metadata(file_path)?;
564        let modified = metadata.modified()?;
565        Ok(modified.duration_since(SystemTime::UNIX_EPOCH)?.as_secs())
566    }
567
568    fn detect_language(&self, file_path: &Path) -> String {
569        file_path
570            .extension()
571            .and_then(|ext| ext.to_str())
572            .unwrap_or("unknown")
573            .to_string()
574    }
575}
576
577impl Clone for SimpleIndexer {
578    fn clone(&self) -> Self {
579        Self {
580            config: self.config.clone(),
581            index_cache: self.index_cache.clone(),
582            storage: self.storage.clone(),
583            filter: self.filter.clone(),
584        }
585    }
586}
587
588fn should_skip_dir(path: &Path, config: &SimpleIndexerConfig) -> bool {
589    if config
590        .allowed_dirs
591        .iter()
592        .any(|allowed| path.starts_with(allowed))
593    {
594        return false;
595    }
596
597    if config
598        .excluded_dirs
599        .iter()
600        .any(|excluded| path.starts_with(excluded))
601    {
602        return true;
603    }
604
605    if config.ignore_hidden
606        && path
607            .file_name()
608            .and_then(|name| name.to_str())
609            .is_some_and(|name_str| name_str.starts_with('.'))
610    {
611        return true;
612    }
613
614    false
615}
616
617fn calculate_hash(content: &str) -> String {
618    use std::collections::hash_map::DefaultHasher;
619    use std::hash::{Hash, Hasher};
620
621    let mut hasher = DefaultHasher::new();
622    content.hash(&mut hasher);
623    format!("{:x}", hasher.finish())
624}
625
626#[cfg(test)]
627mod tests {
628    use super::*;
629    use std::fs;
630    use std::sync::{Arc, Mutex};
631    use tempfile::tempdir;
632
633    #[test]
634    fn skips_hidden_directories_by_default() -> Result<()> {
635        let temp = tempdir()?;
636        let workspace = temp.path();
637        let hidden_dir = workspace.join(".private");
638        fs::create_dir_all(&hidden_dir)?;
639        fs::write(hidden_dir.join("secret.txt"), "classified")?;
640
641        let visible_dir = workspace.join("src");
642        fs::create_dir_all(&visible_dir)?;
643        fs::write(visible_dir.join("lib.rs"), "fn main() {}")?;
644
645        let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
646        indexer.init()?;
647        indexer.index_directory(workspace)?;
648
649        assert!(indexer.find_files("secret\\.txt$")?.is_empty());
650        assert!(!indexer.find_files("lib\\.rs$")?.is_empty());
651
652        Ok(())
653    }
654
655    #[test]
656    fn can_include_hidden_directories_when_configured() -> Result<()> {
657        let temp = tempdir()?;
658        let workspace = temp.path();
659        let hidden_dir = workspace.join(".cache");
660        fs::create_dir_all(&hidden_dir)?;
661        fs::write(hidden_dir.join("data.log"), "details")?;
662
663        let config = SimpleIndexerConfig::new(workspace.to_path_buf()).ignore_hidden(false);
664        let mut indexer = SimpleIndexer::with_config(config);
665        indexer.init()?;
666        indexer.index_directory(workspace)?;
667
668        let results = indexer.find_files("data\\.log$")?;
669        assert_eq!(results.len(), 1);
670
671        Ok(())
672    }
673
674    #[test]
675    fn supports_custom_storage_backends() -> Result<()> {
676        #[derive(Clone, Default)]
677        struct MemoryStorage {
678            records: Arc<Mutex<Vec<FileIndex>>>,
679        }
680
681        impl MemoryStorage {
682            fn new(records: Arc<Mutex<Vec<FileIndex>>>) -> Self {
683                Self { records }
684            }
685        }
686
687        impl IndexStorage for MemoryStorage {
688            fn init(&self, _index_dir: &Path) -> Result<()> {
689                Ok(())
690            }
691
692            fn persist(&self, _index_dir: &Path, entry: &FileIndex) -> Result<()> {
693                let mut guard = self.records.lock().expect("lock poisoned");
694                guard.push(entry.clone());
695                Ok(())
696            }
697        }
698
699        let temp = tempdir()?;
700        let workspace = temp.path();
701        fs::write(workspace.join("notes.txt"), "remember this")?;
702
703        let records: Arc<Mutex<Vec<FileIndex>>> = Arc::new(Mutex::new(Vec::new()));
704        let storage = MemoryStorage::new(records.clone());
705
706        let config = SimpleIndexerConfig::new(workspace.to_path_buf());
707        let mut indexer = SimpleIndexer::with_config(config).with_storage(Arc::new(storage));
708        indexer.init()?;
709        indexer.index_directory(workspace)?;
710
711        let entries = records.lock().expect("lock poisoned");
712        assert_eq!(entries.len(), 1);
713        assert_eq!(
714            entries[0].path,
715            workspace.join("notes.txt").to_string_lossy().to_string()
716        );
717
718        Ok(())
719    }
720
721    #[test]
722    fn custom_filters_can_skip_files() -> Result<()> {
723        #[derive(Default)]
724        struct SkipRustFilter {
725            inner: ConfigTraversalFilter,
726        }
727
728        impl TraversalFilter for SkipRustFilter {
729            fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
730                self.inner.should_descend(path, config)
731            }
732
733            fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
734                if path
735                    .extension()
736                    .and_then(|ext| ext.to_str())
737                    .is_some_and(|ext| ext.eq_ignore_ascii_case("rs"))
738                {
739                    return false;
740                }
741
742                self.inner.should_index_file(path, config)
743            }
744        }
745
746        let temp = tempdir()?;
747        let workspace = temp.path();
748        fs::write(workspace.join("lib.rs"), "fn main() {}")?;
749        fs::write(workspace.join("README.md"), "# Notes")?;
750
751        let config = SimpleIndexerConfig::new(workspace.to_path_buf());
752        let mut indexer =
753            SimpleIndexer::with_config(config).with_filter(Arc::new(SkipRustFilter::default()));
754        indexer.init()?;
755        indexer.index_directory(workspace)?;
756
757        assert!(indexer.find_files("lib\\.rs$")?.is_empty());
758        assert!(!indexer.find_files("README\\.md$")?.is_empty());
759
760        Ok(())
761    }
762}