1use anyhow::Result;
9use ignore::WalkBuilder;
10use regex::Regex;
11use serde::{Deserialize, Serialize};
12use std::collections::HashMap;
13use std::fs;
14use std::io::ErrorKind;
15use std::path::{Path, PathBuf};
16use std::sync::Arc;
17use std::time::SystemTime;
18
19pub trait IndexStorage: Send + Sync {
21    fn init(&self, index_dir: &Path) -> Result<()>;
23
24    fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()>;
26}
27
28pub trait TraversalFilter: Send + Sync {
30    fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
32
33    fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool;
35}
36
37#[derive(Debug, Default, Clone)]
39pub struct MarkdownIndexStorage;
40
41impl IndexStorage for MarkdownIndexStorage {
42    fn init(&self, index_dir: &Path) -> Result<()> {
43        fs::create_dir_all(index_dir)?;
44        Ok(())
45    }
46
47    fn persist(&self, index_dir: &Path, entry: &FileIndex) -> Result<()> {
48        let file_name = format!("{}.md", calculate_hash(&entry.path));
49        let index_path = index_dir.join(file_name);
50
51        let markdown = format!(
52            "# File Index: {}\n\n\
53            - **Path**: {}\n\
54            - **Hash**: {}\n\
55            - **Modified**: {}\n\
56            - **Size**: {} bytes\n\
57            - **Language**: {}\n\
58            - **Tags**: {}\n\n",
59            entry.path,
60            entry.path,
61            entry.hash,
62            entry.modified,
63            entry.size,
64            entry.language,
65            entry.tags.join(", ")
66        );
67
68        fs::write(index_path, markdown)?;
69        Ok(())
70    }
71}
72
73#[derive(Debug, Default, Clone)]
75pub struct ConfigTraversalFilter;
76
77impl TraversalFilter for ConfigTraversalFilter {
78    fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
79        !should_skip_dir(path, config)
80    }
81
82    fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
83        if !path.is_file() {
84            return false;
85        }
86
87        if config.ignore_hidden
89            && path
90                .file_name()
91                .and_then(|n| n.to_str())
92                .is_some_and(|s| s.starts_with('.'))
93        {
94            return false;
95        }
96
97        if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
99            let is_sensitive = matches!(
100                file_name,
101                ".env"
102                    | ".env.local"
103                    | ".env.production"
104                    | ".env.development"
105                    | ".env.test"
106                    | ".git"
107                    | ".gitignore"
108                    | ".DS_Store"
109            ) || file_name.starts_with(".env.");
110            if is_sensitive {
111                return false;
112            }
113        }
114
115        true
116    }
117}
118
119#[derive(Clone, Debug)]
121pub struct SimpleIndexerConfig {
122    workspace_root: PathBuf,
123    index_dir: PathBuf,
124    ignore_hidden: bool,
125    excluded_dirs: Vec<PathBuf>,
126    allowed_dirs: Vec<PathBuf>,
127}
128
129impl SimpleIndexerConfig {
130    pub fn new(workspace_root: PathBuf) -> Self {
132        let index_dir = workspace_root.join(".vtcode").join("index");
133        let vtcode_dir = workspace_root.join(".vtcode");
134        let external_dir = vtcode_dir.join("external");
135
136        let mut excluded_dirs = vec![
137            index_dir.clone(),
138            vtcode_dir,
139            workspace_root.join("target"),
140            workspace_root.join("node_modules"),
141        ];
142
143        excluded_dirs.dedup();
144
145        Self {
146            workspace_root,
147            index_dir,
148            ignore_hidden: true,
149            excluded_dirs,
150            allowed_dirs: vec![external_dir],
151        }
152    }
153
154    pub fn with_index_dir(mut self, index_dir: impl Into<PathBuf>) -> Self {
156        let index_dir = index_dir.into();
157        self.index_dir = index_dir.clone();
158        self.push_unique_excluded(index_dir);
159        self
160    }
161
162    pub fn add_allowed_dir(mut self, path: impl Into<PathBuf>) -> Self {
164        let path = path.into();
165        if !self.allowed_dirs.iter().any(|existing| existing == &path) {
166            self.allowed_dirs.push(path);
167        }
168        self
169    }
170
171    pub fn add_excluded_dir(mut self, path: impl Into<PathBuf>) -> Self {
173        let path = path.into();
174        self.push_unique_excluded(path);
175        self
176    }
177
178    pub fn ignore_hidden(mut self, ignore_hidden: bool) -> Self {
180        self.ignore_hidden = ignore_hidden;
181        self
182    }
183
184    pub fn workspace_root(&self) -> &Path {
186        &self.workspace_root
187    }
188
189    pub fn index_dir(&self) -> &Path {
191        &self.index_dir
192    }
193
194    fn push_unique_excluded(&mut self, path: PathBuf) {
195        if !self.excluded_dirs.iter().any(|existing| existing == &path) {
196            self.excluded_dirs.push(path);
197        }
198    }
199}
200
201#[derive(Debug, Clone, Serialize, Deserialize)]
203pub struct FileIndex {
204    pub path: String,
206    pub hash: String,
208    pub modified: u64,
210    pub size: u64,
212    pub language: String,
214    pub tags: Vec<String>,
216}
217
218#[derive(Debug, Clone, Serialize, Deserialize)]
220pub struct SearchResult {
221    pub file_path: String,
222    pub line_number: usize,
223    pub line_content: String,
224    pub matches: Vec<String>,
225}
226
227pub struct SimpleIndexer {
229    config: SimpleIndexerConfig,
230    index_cache: HashMap<String, FileIndex>,
231    storage: Arc<dyn IndexStorage>,
232    filter: Arc<dyn TraversalFilter>,
233}
234
235impl SimpleIndexer {
236    pub fn new(workspace_root: PathBuf) -> Self {
238        Self::with_components(
239            SimpleIndexerConfig::new(workspace_root),
240            Arc::new(MarkdownIndexStorage),
241            Arc::new(ConfigTraversalFilter),
242        )
243    }
244
245    pub fn with_config(config: SimpleIndexerConfig) -> Self {
247        Self::with_components(
248            config,
249            Arc::new(MarkdownIndexStorage),
250            Arc::new(ConfigTraversalFilter),
251        )
252    }
253
254    pub fn with_index_dir(workspace_root: PathBuf, index_dir: PathBuf) -> Self {
256        let config = SimpleIndexerConfig::new(workspace_root).with_index_dir(index_dir);
257        Self::with_config(config)
258    }
259
260    pub fn with_components(
262        config: SimpleIndexerConfig,
263        storage: Arc<dyn IndexStorage>,
264        filter: Arc<dyn TraversalFilter>,
265    ) -> Self {
266        Self {
267            config,
268            index_cache: HashMap::new(),
269            storage,
270            filter,
271        }
272    }
273
274    pub fn with_storage(self, storage: Arc<dyn IndexStorage>) -> Self {
276        Self { storage, ..self }
277    }
278
279    pub fn with_filter(self, filter: Arc<dyn TraversalFilter>) -> Self {
281        Self { filter, ..self }
282    }
283
284    pub fn init(&self) -> Result<()> {
286        self.storage.init(self.config.index_dir())
287    }
288
289    pub fn workspace_root(&self) -> &Path {
291        self.config.workspace_root()
292    }
293
294    pub fn index_dir(&self) -> &Path {
296        self.config.index_dir()
297    }
298
299    pub fn index_file(&mut self, file_path: &Path) -> Result<()> {
301        if !file_path.exists() || !self.filter.should_index_file(file_path, &self.config) {
302            return Ok(());
303        }
304
305        let content = match fs::read_to_string(file_path) {
306            Ok(text) => text,
307            Err(err) => {
308                if err.kind() == ErrorKind::InvalidData {
309                    return Ok(());
310                }
311                return Err(err.into());
312            }
313        };
314        let hash = calculate_hash(&content);
315        let modified = self.get_modified_time(file_path)?;
316        let size = content.len() as u64;
317        let language = self.detect_language(file_path);
318
319        let index = FileIndex {
320            path: file_path.to_string_lossy().to_string(),
321            hash,
322            modified,
323            size,
324            language,
325            tags: vec![],
326        };
327
328        self.index_cache
329            .insert(file_path.to_string_lossy().to_string(), index.clone());
330
331        self.storage.persist(self.config.index_dir(), &index)?;
332
333        Ok(())
334    }
335
336    pub fn index_directory(&mut self, dir_path: &Path) -> Result<()> {
340        let walker = WalkBuilder::new(dir_path)
341            .hidden(true) .git_ignore(true) .git_global(true) .git_exclude(true) .ignore(true) .parents(true) .build();
348
349        for entry in walker.filter_map(|e| e.ok()) {
350            let path = entry.path();
351
352            if entry.file_type().is_some_and(|ft| ft.is_file()) {
354                let should_skip = self
356                    .config
357                    .excluded_dirs
358                    .iter()
359                    .any(|excluded| path.starts_with(excluded));
360
361                if !should_skip && self.filter.should_index_file(path, &self.config) {
362                    self.index_file(path)?;
363                }
364            }
365        }
366
367        Ok(())
368    }
369
370    pub fn search(&self, pattern: &str, path_filter: Option<&str>) -> Result<Vec<SearchResult>> {
372        let regex = Regex::new(pattern)?;
373
374        let mut results = Vec::new();
375
376        for file_path in self.index_cache.keys() {
378            if path_filter.is_some_and(|filter| !file_path.contains(filter)) {
379                continue;
380            }
381
382            if let Ok(content) = fs::read_to_string(file_path) {
383                for (line_num, line) in content.lines().enumerate() {
384                    if regex.is_match(line) {
385                        let matches: Vec<String> = regex
386                            .find_iter(line)
387                            .map(|m| m.as_str().to_string())
388                            .collect();
389
390                        results.push(SearchResult {
391                            file_path: file_path.clone(),
392                            line_number: line_num + 1,
393                            line_content: line.to_string(),
394                            matches,
395                        });
396                    }
397                }
398            }
399        }
400
401        Ok(results)
402    }
403
404    pub fn find_files(&self, pattern: &str) -> Result<Vec<String>> {
406        let regex = Regex::new(pattern)?;
407        let mut results = Vec::new();
408
409        for file_path in self.index_cache.keys() {
410            if regex.is_match(file_path) {
411                results.push(file_path.clone());
412            }
413        }
414
415        Ok(results)
416    }
417
418    pub fn all_files(&self) -> Vec<String> {
421        self.index_cache.keys().cloned().collect()
422    }
423
424    pub fn get_file_content(
426        &self,
427        file_path: &str,
428        start_line: Option<usize>,
429        end_line: Option<usize>,
430    ) -> Result<String> {
431        let content = fs::read_to_string(file_path)?;
432        let lines: Vec<&str> = content.lines().collect();
433
434        let start = start_line.unwrap_or(1).saturating_sub(1);
435        let end = end_line.unwrap_or(lines.len());
436
437        let selected_lines = &lines[start..end.min(lines.len())];
438
439        let mut result = String::new();
440        for (i, line) in selected_lines.iter().enumerate() {
441            result.push_str(&format!("{}: {}\n", start + i + 1, line));
442        }
443
444        Ok(result)
445    }
446
447    pub fn list_files(&self, dir_path: &str, show_hidden: bool) -> Result<Vec<String>> {
449        let path = Path::new(dir_path);
450        if !path.exists() {
451            return Ok(vec![]);
452        }
453
454        let mut files = Vec::new();
455
456        for entry in fs::read_dir(path)? {
457            let entry = entry?;
458            let file_name = entry.file_name().to_string_lossy().to_string();
459
460            if !show_hidden && file_name.starts_with('.') {
461                continue;
462            }
463
464            files.push(file_name);
465        }
466
467        Ok(files)
468    }
469
470    pub fn grep(&self, pattern: &str, file_pattern: Option<&str>) -> Result<Vec<SearchResult>> {
472        let regex = Regex::new(pattern)?;
473        let mut results = Vec::new();
474
475        for file_path in self.index_cache.keys() {
476            if file_pattern.is_some_and(|fp| !file_path.contains(fp)) {
477                continue;
478            }
479
480            if let Ok(content) = fs::read_to_string(file_path) {
481                for (line_num, line) in content.lines().enumerate() {
482                    if regex.is_match(line) {
483                        results.push(SearchResult {
484                            file_path: file_path.clone(),
485                            line_number: line_num + 1,
486                            line_content: line.to_string(),
487                            matches: vec![line.to_string()],
488                        });
489                    }
490                }
491            }
492        }
493
494        Ok(results)
495    }
496
497    #[allow(dead_code)]
498    fn walk_directory<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
499    where
500        F: FnMut(&Path) -> Result<()>,
501    {
502        if !dir_path.exists() {
503            return Ok(());
504        }
505
506        self.walk_directory_internal(dir_path, callback)
507    }
508
509    #[allow(dead_code)]
510    fn walk_directory_internal<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
511    where
512        F: FnMut(&Path) -> Result<()>,
513    {
514        for entry in fs::read_dir(dir_path)? {
515            let entry = entry?;
516            let path = entry.path();
517
518            if path.is_dir() {
519                if self.is_allowed_dir(&path) {
520                    self.walk_directory_internal(&path, callback)?;
521                    continue;
522                }
523
524                if !self.filter.should_descend(&path, &self.config) {
525                    self.walk_allowed_descendants(&path, callback)?;
526                    continue;
527                }
528
529                self.walk_directory_internal(&path, callback)?;
530            } else if path.is_file() {
531                callback(&path)?;
532            }
533        }
534
535        Ok(())
536    }
537
538    #[allow(dead_code)]
539    fn is_allowed_dir(&self, path: &Path) -> bool {
540        self.config
541            .allowed_dirs
542            .iter()
543            .any(|allowed| path.starts_with(allowed))
544    }
545
546    #[allow(dead_code)]
547    fn walk_allowed_descendants<F>(&mut self, dir_path: &Path, callback: &mut F) -> Result<()>
548    where
549        F: FnMut(&Path) -> Result<()>,
550    {
551        let allowed_dirs = self.config.allowed_dirs.clone();
552        for allowed in allowed_dirs {
553            if allowed.starts_with(dir_path) && allowed.exists() {
554                self.walk_directory_internal(&allowed, callback)?;
555            }
556        }
557        Ok(())
558    }
559
560    fn get_modified_time(&self, file_path: &Path) -> Result<u64> {
561        let metadata = fs::metadata(file_path)?;
562        let modified = metadata.modified()?;
563        Ok(modified.duration_since(SystemTime::UNIX_EPOCH)?.as_secs())
564    }
565
566    fn detect_language(&self, file_path: &Path) -> String {
567        file_path
568            .extension()
569            .and_then(|ext| ext.to_str())
570            .unwrap_or("unknown")
571            .to_string()
572    }
573}
574
575impl Clone for SimpleIndexer {
576    fn clone(&self) -> Self {
577        Self {
578            config: self.config.clone(),
579            index_cache: self.index_cache.clone(),
580            storage: self.storage.clone(),
581            filter: self.filter.clone(),
582        }
583    }
584}
585
586fn should_skip_dir(path: &Path, config: &SimpleIndexerConfig) -> bool {
587    if config
588        .allowed_dirs
589        .iter()
590        .any(|allowed| path.starts_with(allowed))
591    {
592        return false;
593    }
594
595    if config
596        .excluded_dirs
597        .iter()
598        .any(|excluded| path.starts_with(excluded))
599    {
600        return true;
601    }
602
603    if config.ignore_hidden
604        && path
605            .file_name()
606            .and_then(|name| name.to_str())
607            .is_some_and(|name_str| name_str.starts_with('.'))
608    {
609        return true;
610    }
611
612    false
613}
614
615fn calculate_hash(content: &str) -> String {
616    use std::collections::hash_map::DefaultHasher;
617    use std::hash::{Hash, Hasher};
618
619    let mut hasher = DefaultHasher::new();
620    content.hash(&mut hasher);
621    format!("{:x}", hasher.finish())
622}
623
624#[cfg(test)]
625mod tests {
626    use super::*;
627    use std::fs;
628    use std::sync::{Arc, Mutex};
629    use tempfile::tempdir;
630
631    #[test]
632    fn skips_hidden_directories_by_default() -> Result<()> {
633        let temp = tempdir()?;
634        let workspace = temp.path();
635        let hidden_dir = workspace.join(".private");
636        fs::create_dir_all(&hidden_dir)?;
637        fs::write(hidden_dir.join("secret.txt"), "classified")?;
638
639        let visible_dir = workspace.join("src");
640        fs::create_dir_all(&visible_dir)?;
641        fs::write(visible_dir.join("lib.rs"), "fn main() {}")?;
642
643        let mut indexer = SimpleIndexer::new(workspace.to_path_buf());
644        indexer.init()?;
645        indexer.index_directory(workspace)?;
646
647        assert!(indexer.find_files("secret\\.txt$")?.is_empty());
648        assert!(!indexer.find_files("lib\\.rs$")?.is_empty());
649
650        Ok(())
651    }
652
653    #[test]
654    fn can_include_hidden_directories_when_configured() -> Result<()> {
655        let temp = tempdir()?;
656        let workspace = temp.path();
657        let hidden_dir = workspace.join(".cache");
658        fs::create_dir_all(&hidden_dir)?;
659        fs::write(hidden_dir.join("data.log"), "details")?;
660
661        let config = SimpleIndexerConfig::new(workspace.to_path_buf()).ignore_hidden(false);
662        let mut indexer = SimpleIndexer::with_config(config);
663        indexer.init()?;
664        indexer.index_directory(workspace)?;
665
666        let results = indexer.find_files("data\\.log$")?;
667        assert_eq!(results.len(), 1);
668
669        Ok(())
670    }
671
672    #[test]
673    fn supports_custom_storage_backends() -> Result<()> {
674        #[derive(Clone, Default)]
675        struct MemoryStorage {
676            records: Arc<Mutex<Vec<FileIndex>>>,
677        }
678
679        impl MemoryStorage {
680            fn new(records: Arc<Mutex<Vec<FileIndex>>>) -> Self {
681                Self { records }
682            }
683        }
684
685        impl IndexStorage for MemoryStorage {
686            fn init(&self, _index_dir: &Path) -> Result<()> {
687                Ok(())
688            }
689
690            fn persist(&self, _index_dir: &Path, entry: &FileIndex) -> Result<()> {
691                let mut guard = self.records.lock().expect("lock poisoned");
692                guard.push(entry.clone());
693                Ok(())
694            }
695        }
696
697        let temp = tempdir()?;
698        let workspace = temp.path();
699        fs::write(workspace.join("notes.txt"), "remember this")?;
700
701        let records: Arc<Mutex<Vec<FileIndex>>> = Arc::new(Mutex::new(Vec::new()));
702        let storage = MemoryStorage::new(records.clone());
703
704        let config = SimpleIndexerConfig::new(workspace.to_path_buf());
705        let mut indexer = SimpleIndexer::with_config(config).with_storage(Arc::new(storage));
706        indexer.init()?;
707        indexer.index_directory(workspace)?;
708
709        let entries = records.lock().expect("lock poisoned");
710        assert_eq!(entries.len(), 1);
711        assert_eq!(
712            entries[0].path,
713            workspace.join("notes.txt").to_string_lossy().to_string()
714        );
715
716        Ok(())
717    }
718
719    #[test]
720    fn custom_filters_can_skip_files() -> Result<()> {
721        #[derive(Default)]
722        struct SkipRustFilter {
723            inner: ConfigTraversalFilter,
724        }
725
726        impl TraversalFilter for SkipRustFilter {
727            fn should_descend(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
728                self.inner.should_descend(path, config)
729            }
730
731            fn should_index_file(&self, path: &Path, config: &SimpleIndexerConfig) -> bool {
732                if path
733                    .extension()
734                    .and_then(|ext| ext.to_str())
735                    .is_some_and(|ext| ext.eq_ignore_ascii_case("rs"))
736                {
737                    return false;
738                }
739
740                self.inner.should_index_file(path, config)
741            }
742        }
743
744        let temp = tempdir()?;
745        let workspace = temp.path();
746        fs::write(workspace.join("lib.rs"), "fn main() {}")?;
747        fs::write(workspace.join("README.md"), "# Notes")?;
748
749        let config = SimpleIndexerConfig::new(workspace.to_path_buf());
750        let mut indexer =
751            SimpleIndexer::with_config(config).with_filter(Arc::new(SkipRustFilter::default()));
752        indexer.init()?;
753        indexer.index_directory(workspace)?;
754
755        assert!(indexer.find_files("lib\\.rs$")?.is_empty());
756        assert!(!indexer.find_files("README\\.md$")?.is_empty());
757
758        Ok(())
759    }
760}