1use anyhow::{Context, Result};
7use ignore::WalkBuilder;
8use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
9use rayon::prelude::*;
10use std::collections::HashMap;
11use std::path::{Path, PathBuf};
12use std::sync::atomic::{AtomicU64, Ordering};
13use std::sync::{Arc, Mutex};
14use std::time::Instant;
15
16use crate::cache::CacheManager;
17use crate::content_store::ContentWriter;
18use crate::dependency::DependencyIndex;
19use crate::models::{Dependency, IndexConfig, IndexStats, Language, ImportType};
20use crate::output;
21use crate::parsers::{DependencyExtractor, ImportInfo, ExportInfo};
22use crate::parsers::rust::RustDependencyExtractor;
23use crate::parsers::python::PythonDependencyExtractor;
24use crate::parsers::typescript::TypeScriptDependencyExtractor;
25use crate::parsers::go::GoDependencyExtractor;
26use crate::parsers::java::JavaDependencyExtractor;
27use crate::parsers::c::CDependencyExtractor;
28use crate::parsers::cpp::CppDependencyExtractor;
29use crate::parsers::csharp::CSharpDependencyExtractor;
30use crate::parsers::php::PhpDependencyExtractor;
31use crate::parsers::ruby::RubyDependencyExtractor;
32use crate::parsers::kotlin::KotlinDependencyExtractor;
33use crate::parsers::zig::ZigDependencyExtractor;
34use crate::parsers::vue::VueDependencyExtractor;
35use crate::parsers::svelte::SvelteDependencyExtractor;
36use crate::trigram::TrigramIndex;
37
38pub type ProgressCallback = Arc<dyn Fn(usize, usize, String) + Send + Sync>;
41
42struct FileProcessingResult {
44 path: PathBuf,
45 path_str: String,
46 hash: String,
47 content: String,
48 language: Language,
49 line_count: usize,
50 dependencies: Vec<ImportInfo>,
51 exports: Vec<ExportInfo>,
52}
53
54fn find_nearest_tsconfig<'a>(
59 file_path: &str,
60 root: &Path,
61 tsconfigs: &'a HashMap<PathBuf, crate::parsers::tsconfig::PathAliasMap>,
62) -> Option<&'a crate::parsers::tsconfig::PathAliasMap> {
63 let abs_file_path = if Path::new(file_path).is_absolute() {
65 PathBuf::from(file_path)
66 } else {
67 root.join(file_path)
68 };
69
70 let mut current_dir = abs_file_path.parent()?;
72
73 loop {
74 if let Some(alias_map) = tsconfigs.get(current_dir) {
76 return Some(alias_map);
77 }
78
79 current_dir = current_dir.parent()?;
81
82 if current_dir == root || !current_dir.starts_with(root) {
84 break;
85 }
86 }
87
88 None
89}
90
91pub struct Indexer {
93 cache: CacheManager,
94 config: IndexConfig,
95}
96
97impl Indexer {
98 pub fn new(cache: CacheManager, config: IndexConfig) -> Self {
100 Self { cache, config }
101 }
102
103 pub fn index(&self, root: impl AsRef<Path>, show_progress: bool) -> Result<IndexStats> {
105 self.index_with_callback(root, show_progress, None)
106 }
107
108 pub fn index_with_callback(
110 &self,
111 root: impl AsRef<Path>,
112 show_progress: bool,
113 progress_callback: Option<ProgressCallback>,
114 ) -> Result<IndexStats> {
115 let root = root.as_ref();
116 log::info!("Indexing directory: {:?}", root);
117
118 let git_state = crate::git::get_git_state_optional(root)?;
120 let branch = git_state
121 .as_ref()
122 .map(|s| s.branch.clone())
123 .unwrap_or_else(|| "_default".to_string());
124
125 if let Some(ref state) = git_state {
126 log::info!(
127 "Git state: branch='{}', commit='{}', dirty={}",
128 state.branch,
129 state.commit,
130 state.dirty
131 );
132 } else {
133 log::info!("Not a git repository, using default branch");
134 }
135
136 let num_threads = if self.config.parallel_threads == 0 {
139 let available_cores = std::thread::available_parallelism()
140 .map(|n| n.get())
141 .unwrap_or(4);
142 ((available_cores as f64 * 0.8).ceil() as usize).max(1).min(8)
145 } else {
146 self.config.parallel_threads
147 };
148
149 log::info!("Using {} threads for parallel indexing (out of {} available)",
150 num_threads,
151 std::thread::available_parallelism().map(|n| n.get()).unwrap_or(4));
152
153 self.cache.init()?;
155
156 self.check_disk_space(root)?;
158
159 let existing_hashes = self.cache.load_hashes_for_branch(&branch)?;
161 log::debug!("Loaded {} existing file hashes for branch '{}'", existing_hashes.len(), branch);
162
163 let files = self.discover_files(root)?;
165 let total_files = files.len();
166 log::info!("Discovered {} files to index", total_files);
167
168 let tsconfigs = crate::parsers::tsconfig::parse_all_tsconfigs(root)
171 .unwrap_or_else(|e| {
172 log::warn!("Failed to parse tsconfig.json files: {}", e);
173 HashMap::new()
174 });
175 if !tsconfigs.is_empty() {
176 log::info!("Found {} tsconfig.json files", tsconfigs.len());
177 for (config_dir, alias_map) in &tsconfigs {
178 log::debug!(" {} (base_url: {:?}, {} aliases)",
179 config_dir.display(),
180 alias_map.base_url,
181 alias_map.aliases.len());
182 }
183 }
184
185 if !existing_hashes.is_empty() && total_files == existing_hashes.len() {
188 let mut any_changed = false;
190
191 for file_path in &files {
192 let path_str = file_path.to_string_lossy().to_string();
194 let normalized_path = if let Ok(rel_path) = file_path.strip_prefix(root) {
195 rel_path.to_string_lossy().to_string()
197 } else {
198 path_str.trim_start_matches("./").to_string()
200 };
201
202 if let Some(existing_hash) = existing_hashes.get(&normalized_path) {
204 match std::fs::read_to_string(file_path) {
206 Ok(content) => {
207 let current_hash = self.hash_content(content.as_bytes());
208 if ¤t_hash != existing_hash {
209 any_changed = true;
210 log::debug!("File changed: {}", path_str);
211 break; }
213 }
214 Err(_) => {
215 any_changed = true;
216 break;
217 }
218 }
219 } else {
220 any_changed = true;
222 break;
223 }
224 }
225
226 if !any_changed {
227 log::info!("No files changed - skipping index rebuild");
228 let stats = self.cache.stats()?;
229 return Ok(stats);
230 }
231 } else if total_files != existing_hashes.len() {
232 log::info!("File count changed ({} -> {}) - full reindex required",
233 existing_hashes.len(), total_files);
234 }
235
236 let mut new_hashes = HashMap::new();
238 let mut files_indexed = 0;
239 let mut file_metadata: Vec<(String, String, String, usize)> = Vec::new(); let mut all_dependencies: Vec<(String, Vec<ImportInfo>)> = Vec::new(); let mut all_exports: Vec<(String, Vec<ExportInfo>)> = Vec::new(); let mut trigram_index = TrigramIndex::new();
245 let mut content_writer = ContentWriter::new();
246
247 if total_files > 10000 {
249 let temp_dir = self.cache.path().join("trigram_temp");
250 trigram_index.enable_batch_flush(temp_dir)
251 .context("Failed to enable batch-flush mode for trigram index")?;
252 log::info!("Enabled batch-flush mode for {} files", total_files);
253 }
254
255 let content_path = self.cache.path().join("content.bin");
257 content_writer.init(content_path.clone())
258 .context("Failed to initialize content writer")?;
259
260 let pb = if show_progress {
262 let pb = ProgressBar::new(total_files as u64);
263 pb.set_draw_target(ProgressDrawTarget::stderr());
264 pb.set_style(
265 ProgressStyle::default_bar()
266 .template("[{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} files ({percent}%) {msg}")
267 .unwrap()
268 .progress_chars("=>-")
269 );
270 pb.enable_steady_tick(std::time::Duration::from_millis(100));
272 pb
273 } else {
274 ProgressBar::hidden()
275 };
276
277 let progress_counter = Arc::new(AtomicU64::new(0));
279 let progress_status = Arc::new(Mutex::new("Indexing files...".to_string()));
281
282 let _start_time = Instant::now();
283
284 let counter_for_thread = Arc::clone(&progress_counter);
286 let status_for_thread = Arc::clone(&progress_status);
287 let pb_clone = pb.clone();
288 let callback_for_thread = progress_callback.clone();
289 let total_files_for_thread = total_files;
290 let progress_thread = if show_progress || callback_for_thread.is_some() {
291 Some(std::thread::spawn(move || {
292 loop {
293 let count = counter_for_thread.load(Ordering::Relaxed);
294 pb_clone.set_position(count);
295
296 if let Some(ref callback) = callback_for_thread {
298 let status = status_for_thread.lock().unwrap().clone();
299 callback(count as usize, total_files_for_thread, status);
300 }
301
302 if count >= total_files_for_thread as u64 {
303 break;
304 }
305 std::thread::sleep(std::time::Duration::from_millis(50));
306 }
307 }))
308 } else {
309 None
310 };
311
312 let pool = rayon::ThreadPoolBuilder::new()
314 .num_threads(num_threads)
315 .build()
316 .context("Failed to create thread pool")?;
317
318 const BATCH_SIZE: usize = 5000;
321 let num_batches = total_files.div_ceil(BATCH_SIZE);
322 log::info!("Processing {} files in {} batches of up to {} files",
323 total_files, num_batches, BATCH_SIZE);
324
325 for (batch_idx, batch_files) in files.chunks(BATCH_SIZE).enumerate() {
326 log::info!("Processing batch {}/{} ({} files)",
327 batch_idx + 1, num_batches, batch_files.len());
328
329 let counter_clone = Arc::clone(&progress_counter);
331 let results: Vec<Option<FileProcessingResult>> = pool.install(|| {
332 batch_files
333 .par_iter()
334 .map(|file_path| {
335 let path_str = file_path.to_string_lossy().to_string();
337 let normalized_path = if let Ok(rel_path) = file_path.strip_prefix(root) {
338 rel_path.to_string_lossy().to_string()
340 } else {
341 path_str.trim_start_matches("./").to_string()
343 };
344
345 let content = match std::fs::read_to_string(&file_path) {
347 Ok(c) => c,
348 Err(e) => {
349 log::warn!("Failed to read {}: {}", path_str, e);
350 counter_clone.fetch_add(1, Ordering::Relaxed);
352 return None;
353 }
354 };
355
356 let hash = self.hash_content(content.as_bytes());
358
359 let ext = file_path.extension()
361 .and_then(|e| e.to_str())
362 .unwrap_or("");
363 let language = Language::from_extension(ext);
364
365 let line_count = content.lines().count();
367
368 let dependencies = match language {
370 Language::Rust => {
371 match RustDependencyExtractor::extract_dependencies(&content) {
372 Ok(deps) => deps,
373 Err(e) => {
374 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
375 Vec::new()
376 }
377 }
378 }
379 Language::Python => {
380 match PythonDependencyExtractor::extract_dependencies(&content) {
381 Ok(deps) => deps,
382 Err(e) => {
383 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
384 Vec::new()
385 }
386 }
387 }
388 Language::TypeScript | Language::JavaScript => {
389 let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
391 match TypeScriptDependencyExtractor::extract_dependencies_with_alias_map(&content, alias_map) {
392 Ok(deps) => deps,
393 Err(e) => {
394 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
395 Vec::new()
396 }
397 }
398 }
399 Language::Go => {
400 match GoDependencyExtractor::extract_dependencies(&content) {
401 Ok(deps) => deps,
402 Err(e) => {
403 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
404 Vec::new()
405 }
406 }
407 }
408 Language::Java => {
409 match JavaDependencyExtractor::extract_dependencies(&content) {
410 Ok(deps) => deps,
411 Err(e) => {
412 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
413 Vec::new()
414 }
415 }
416 }
417 Language::C => {
418 match CDependencyExtractor::extract_dependencies(&content) {
419 Ok(deps) => deps,
420 Err(e) => {
421 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
422 Vec::new()
423 }
424 }
425 }
426 Language::Cpp => {
427 match CppDependencyExtractor::extract_dependencies(&content) {
428 Ok(deps) => deps,
429 Err(e) => {
430 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
431 Vec::new()
432 }
433 }
434 }
435 Language::CSharp => {
436 match CSharpDependencyExtractor::extract_dependencies(&content) {
437 Ok(deps) => deps,
438 Err(e) => {
439 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
440 Vec::new()
441 }
442 }
443 }
444 Language::PHP => {
445 match PhpDependencyExtractor::extract_dependencies(&content) {
446 Ok(deps) => deps,
447 Err(e) => {
448 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
449 Vec::new()
450 }
451 }
452 }
453 Language::Ruby => {
454 match RubyDependencyExtractor::extract_dependencies(&content) {
455 Ok(deps) => deps,
456 Err(e) => {
457 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
458 Vec::new()
459 }
460 }
461 }
462 Language::Kotlin => {
463 match KotlinDependencyExtractor::extract_dependencies(&content) {
464 Ok(deps) => deps,
465 Err(e) => {
466 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
467 Vec::new()
468 }
469 }
470 }
471 Language::Zig => {
472 match ZigDependencyExtractor::extract_dependencies(&content) {
473 Ok(deps) => deps,
474 Err(e) => {
475 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
476 Vec::new()
477 }
478 }
479 }
480 Language::Vue => {
481 let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
483 match VueDependencyExtractor::extract_dependencies_with_alias_map(&content, alias_map) {
484 Ok(deps) => deps,
485 Err(e) => {
486 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
487 Vec::new()
488 }
489 }
490 }
491 Language::Svelte => {
492 match SvelteDependencyExtractor::extract_dependencies(&content) {
493 Ok(deps) => deps,
494 Err(e) => {
495 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
496 Vec::new()
497 }
498 }
499 }
500 _ => Vec::new(),
502 };
503
504 let exports = match language {
506 Language::TypeScript | Language::JavaScript => {
507 let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
509 match TypeScriptDependencyExtractor::extract_export_declarations(&content, alias_map) {
510 Ok(exports) => exports,
511 Err(e) => {
512 log::warn!("Failed to extract exports from {}: {}", path_str, e);
513 Vec::new()
514 }
515 }
516 }
517 Language::Vue => {
518 let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
520 match VueDependencyExtractor::extract_export_declarations(&content, alias_map) {
521 Ok(exports) => exports,
522 Err(e) => {
523 log::warn!("Failed to extract exports from {}: {}", path_str, e);
524 Vec::new()
525 }
526 }
527 }
528 _ => Vec::new(),
530 };
531
532 counter_clone.fetch_add(1, Ordering::Relaxed);
534
535 Some(FileProcessingResult {
536 path: file_path.clone(),
537 path_str: normalized_path.to_string(),
538 hash,
539 content,
540 language,
541 line_count,
542 dependencies,
543 exports,
544 })
545 })
546 .collect()
547 });
548
549 for result in results.into_iter().flatten() {
551 let file_id = trigram_index.add_file(result.path.clone());
553
554 trigram_index.index_file(file_id, &result.content);
556
557 content_writer.add_file(result.path.clone(), &result.content);
559
560 files_indexed += 1;
561
562 file_metadata.push((
564 result.path_str.clone(),
565 result.hash.clone(),
566 format!("{:?}", result.language),
567 result.line_count
568 ));
569
570 if !result.dependencies.is_empty() {
572 all_dependencies.push((result.path_str.clone(), result.dependencies));
573 }
574
575 if !result.exports.is_empty() {
577 all_exports.push((result.path_str.clone(), result.exports));
578 }
579
580 new_hashes.insert(result.path_str, result.hash);
581 }
582
583 if total_files > 10000 {
585 let flush_msg = format!("Flushing batch {}/{}...", batch_idx + 1, num_batches);
586 if show_progress {
587 pb.set_message(flush_msg.clone());
588 }
589 *progress_status.lock().unwrap() = flush_msg;
590 trigram_index.flush_batch()
591 .context("Failed to flush trigram batch")?;
592 }
593 }
594
595 if let Some(thread) = progress_thread {
597 let _ = thread.join();
598 }
599
600 if show_progress {
602 let final_count = progress_counter.load(Ordering::Relaxed);
603 pb.set_position(final_count);
604 }
605
606 *progress_status.lock().unwrap() = "Finalizing trigram index...".to_string();
608 if show_progress {
609 pb.set_message("Finalizing trigram index...".to_string());
610 }
611 trigram_index.finalize();
612
613 *progress_status.lock().unwrap() = "Writing file metadata to database...".to_string();
615 if show_progress {
616 pb.set_message("Writing file metadata to database...".to_string());
617 }
618
619 if !file_metadata.is_empty() {
622 let files_without_hash: Vec<(String, String, usize)> = file_metadata
624 .iter()
625 .map(|(path, _hash, lang, lines)| (path.clone(), lang.clone(), *lines))
626 .collect();
627
628 *progress_status.lock().unwrap() = "Recording branch files...".to_string();
630 if show_progress {
631 pb.set_message("Recording branch files...".to_string());
632 }
633
634 let branch_files: Vec<(String, String)> = file_metadata
636 .iter()
637 .map(|(path, hash, _, _)| (path.clone(), hash.clone()))
638 .collect();
639
640 self.cache.batch_update_files_and_branch(
642 &files_without_hash,
643 &branch_files,
644 &branch,
645 git_state.as_ref().map(|s| s.commit.as_str()),
646 ).context("Failed to batch update files and branch hashes")?;
647
648 log::info!("Wrote metadata and hashes for {} files to database", file_metadata.len());
649 }
650
651 self.cache.update_branch_metadata(
653 &branch,
654 git_state.as_ref().map(|s| s.commit.as_str()),
655 file_metadata.len(),
656 git_state.as_ref().map(|s| s.dirty).unwrap_or(false),
657 )?;
658
659 self.cache.checkpoint_wal()
662 .context("Failed to checkpoint WAL")?;
663 log::debug!("WAL checkpoint completed - database is fully synced");
664
665 if !all_dependencies.is_empty() {
667 *progress_status.lock().unwrap() = "Extracting dependencies...".to_string();
668 if show_progress {
669 pb.set_message("Extracting dependencies...".to_string());
670 }
671
672 let go_modules = crate::parsers::go::parse_all_go_modules(root)
674 .unwrap_or_else(|e| {
675 log::warn!("Failed to parse go.mod files: {}", e);
676 Vec::new()
677 });
678 if !go_modules.is_empty() {
679 log::info!("Found {} Go modules", go_modules.len());
680 for module in &go_modules {
681 log::debug!(" {} (project: {})", module.name, module.project_root);
682 }
683 }
684
685 let java_projects = crate::parsers::java::parse_all_java_projects(root)
687 .unwrap_or_else(|e| {
688 log::warn!("Failed to parse Java project configs: {}", e);
689 Vec::new()
690 });
691 if !java_projects.is_empty() {
692 log::info!("Found {} Java projects", java_projects.len());
693 for project in &java_projects {
694 log::debug!(" {} (project: {})", project.package_name, project.project_root);
695 }
696 }
697
698 let python_packages = crate::parsers::python::parse_all_python_packages(root)
700 .unwrap_or_else(|e| {
701 log::warn!("Failed to parse Python package configs: {}", e);
702 Vec::new()
703 });
704 if !python_packages.is_empty() {
705 log::info!("Found {} Python packages", python_packages.len());
706 for package in &python_packages {
707 log::debug!(" {} (project: {})", package.name, package.project_root);
708 }
709 }
710
711 let ruby_projects = crate::parsers::ruby::parse_all_ruby_projects(root)
713 .unwrap_or_else(|e| {
714 log::warn!("Failed to parse Ruby project configs: {}", e);
715 Vec::new()
716 });
717 if !ruby_projects.is_empty() {
718 log::info!("Found {} Ruby projects", ruby_projects.len());
719 for project in &ruby_projects {
720 log::debug!(" {} (project: {})", project.gem_name, project.project_root);
721 }
722 }
723
724 let php_psr4_mappings = crate::parsers::php::parse_all_composer_psr4(root)
728 .unwrap_or_else(|e| {
729 log::warn!("Failed to parse composer.json files: {}", e);
730 Vec::new()
731 });
732 if !php_psr4_mappings.is_empty() {
733 log::info!("Found {} PSR-4 mappings from composer.json files", php_psr4_mappings.len());
734 for mapping in &php_psr4_mappings {
735 log::debug!(" {} => {} (project: {})", mapping.namespace_prefix, mapping.directory, mapping.project_root);
736 }
737 }
738
739 let tsconfigs = crate::parsers::tsconfig::parse_all_tsconfigs(root)
741 .unwrap_or_else(|e| {
742 log::warn!("Failed to parse tsconfig.json files: {}", e);
743 HashMap::new()
744 });
745 if !tsconfigs.is_empty() {
746 log::info!("Found {} tsconfig.json files", tsconfigs.len());
747 for (config_dir, alias_map) in &tsconfigs {
748 log::debug!(" {} (base_url: {:?}, {} aliases)",
749 config_dir.display(),
750 alias_map.base_url,
751 alias_map.aliases.len());
752 }
753 }
754
755 let cache_for_deps = CacheManager::new(root);
757 let dep_index = DependencyIndex::new(cache_for_deps);
758
759 let mut total_deps_inserted = 0;
760
761 for (file_path, import_infos) in all_dependencies {
763 let file_id = match dep_index.get_file_id_by_path(&file_path)? {
765 Some(id) => id,
766 None => {
767 log::warn!("File not found in database (skipping dependencies): {}", file_path);
768 continue;
769 }
770 };
771
772 let mut resolved_deps = Vec::new();
774
775 for mut import_info in import_infos {
776 if file_path.ends_with(".go") {
778 let mut reclassified = false;
780 for module in &go_modules {
781 import_info.import_type = crate::parsers::go::reclassify_go_import(
782 &import_info.imported_path,
783 Some(&module.name),
784 );
785 if matches!(import_info.import_type, ImportType::Internal) {
787 reclassified = true;
788 break;
789 }
790 }
791 if !reclassified {
793 import_info.import_type = crate::parsers::go::reclassify_go_import(
794 &import_info.imported_path,
795 None,
796 );
797 }
798 }
799
800 if file_path.ends_with(".java") {
802 let mut reclassified = false;
804 for project in &java_projects {
805 import_info.import_type = crate::parsers::java::reclassify_java_import(
806 &import_info.imported_path,
807 Some(&project.package_name),
808 );
809 if matches!(import_info.import_type, ImportType::Internal) {
811 reclassified = true;
812 break;
813 }
814 }
815 if !reclassified {
817 import_info.import_type = crate::parsers::java::reclassify_java_import(
818 &import_info.imported_path,
819 None,
820 );
821 }
822 }
823
824 if file_path.ends_with(".py") {
826 let mut reclassified = false;
828 for package in &python_packages {
829 import_info.import_type = crate::parsers::python::reclassify_python_import(
830 &import_info.imported_path,
831 Some(&package.name),
832 );
833 if matches!(import_info.import_type, ImportType::Internal) {
835 reclassified = true;
836 break;
837 }
838 }
839 if !reclassified {
841 import_info.import_type = crate::parsers::python::reclassify_python_import(
842 &import_info.imported_path,
843 None,
844 );
845 }
846 }
847
848 if file_path.ends_with(".rb") || file_path.ends_with(".rake") || file_path.ends_with(".gemspec") {
850 let mut reclassified = false;
852 for project in &ruby_projects {
853 let gem_names = vec![project.gem_name.clone()];
854 import_info.import_type = crate::parsers::ruby::reclassify_ruby_import(
855 &import_info.imported_path,
856 &gem_names,
857 );
858 if matches!(import_info.import_type, ImportType::Internal) {
860 reclassified = true;
861 break;
862 }
863 }
864 if !reclassified {
866 import_info.import_type = crate::parsers::ruby::reclassify_ruby_import(
867 &import_info.imported_path,
868 &[],
869 );
870 }
871 }
872
873 if file_path.ends_with(".kt") || file_path.ends_with(".kts") {
875 let mut reclassified = false;
877 for project in &java_projects {
878 import_info.import_type = crate::parsers::kotlin::reclassify_kotlin_import(
879 &import_info.imported_path,
880 Some(&project.package_name),
881 );
882 if matches!(import_info.import_type, ImportType::Internal) {
884 reclassified = true;
885 break;
886 }
887 }
888 if !reclassified {
890 import_info.import_type = crate::parsers::kotlin::reclassify_kotlin_import(
891 &import_info.imported_path,
892 None,
893 );
894 }
895 }
896
897 if !matches!(import_info.import_type, ImportType::Internal) {
899 continue;
900 }
901
902 let resolved_file_id = if file_path.ends_with(".php") && !php_psr4_mappings.is_empty() {
904 if let Some(resolved_path) = crate::parsers::php::resolve_php_namespace_to_path(
906 &import_info.imported_path,
907 &php_psr4_mappings,
908 ) {
909 match dep_index.get_file_id_by_path(&resolved_path)? {
911 Some(id) => {
912 log::trace!("Resolved PHP dependency: {} -> {} (file_id={})",
913 import_info.imported_path, resolved_path, id);
914 Some(id)
915 }
916 None => {
917 log::trace!("PHP dependency resolved to path but file not in index: {} -> {}",
918 import_info.imported_path, resolved_path);
919 None
920 }
921 }
922 } else {
923 log::trace!("Could not resolve PHP namespace using PSR-4: {}",
924 import_info.imported_path);
925 None
926 }
927 } else if file_path.ends_with(".py") && !python_packages.is_empty() {
928 if let Some(resolved_path) = crate::parsers::python::resolve_python_import_to_path(
930 &import_info.imported_path,
931 &python_packages,
932 Some(&file_path),
933 ) {
934 match dep_index.get_file_id_by_path(&resolved_path)? {
936 Some(id) => {
937 log::trace!("Resolved Python dependency: {} -> {} (file_id={})",
938 import_info.imported_path, resolved_path, id);
939 Some(id)
940 }
941 None => {
942 log::trace!("Python dependency resolved to path but file not in index: {} -> {}",
943 import_info.imported_path, resolved_path);
944 None
945 }
946 }
947 } else {
948 log::trace!("Could not resolve Python import: {}", import_info.imported_path);
949 None
950 }
951 } else if file_path.ends_with(".go") && !go_modules.is_empty() {
952 if let Some(resolved_path) = crate::parsers::go::resolve_go_import_to_path(
954 &import_info.imported_path,
955 &go_modules,
956 Some(&file_path),
957 ) {
958 match dep_index.get_file_id_by_path(&resolved_path)? {
960 Some(id) => {
961 log::trace!("Resolved Go dependency: {} -> {} (file_id={})",
962 import_info.imported_path, resolved_path, id);
963 Some(id)
964 }
965 None => {
966 log::trace!("Go dependency resolved to path but file not in index: {} -> {}",
967 import_info.imported_path, resolved_path);
968 None
969 }
970 }
971 } else {
972 log::trace!("Could not resolve Go import: {}", import_info.imported_path);
973 None
974 }
975 } else if file_path.ends_with(".ts") || file_path.ends_with(".tsx")
976 || file_path.ends_with(".js") || file_path.ends_with(".jsx")
977 || file_path.ends_with(".mts") || file_path.ends_with(".cts")
978 || file_path.ends_with(".mjs") || file_path.ends_with(".cjs") {
979 let alias_map = find_nearest_tsconfig(&file_path, root, &tsconfigs);
981 if let Some(candidates_str) = crate::parsers::typescript::resolve_ts_import_to_path(
982 &import_info.imported_path,
983 Some(&file_path),
984 alias_map,
985 ) {
986 let candidates: Vec<&str> = candidates_str.split('|').collect();
988
989 let mut resolved_id = None;
991 for candidate_path in candidates {
992 let normalized_candidate = if let Ok(rel_path) = std::path::Path::new(candidate_path).strip_prefix(root) {
995 rel_path.to_string_lossy().to_string()
996 } else {
997 candidate_path.to_string()
999 };
1000
1001 log::debug!("Looking up TS/JS candidate: '{}' (from '{}')", normalized_candidate, candidate_path);
1002 match dep_index.get_file_id_by_path(&normalized_candidate)? {
1003 Some(id) => {
1004 log::debug!("Resolved TS/JS dependency: {} -> {} (file_id={})",
1005 import_info.imported_path, normalized_candidate, id);
1006 resolved_id = Some(id);
1007 break; }
1009 None => {
1010 log::trace!("TS/JS candidate not in index: {}", candidate_path);
1011 }
1012 }
1013 }
1014
1015 if resolved_id.is_none() {
1016 log::trace!("TS/JS dependency: no matching file found in database for any candidate: {}",
1017 candidates_str);
1018 }
1019
1020 resolved_id
1021 } else {
1022 log::trace!("Could not resolve TS/JS import (non-relative or external): {}", import_info.imported_path);
1023 None
1024 }
1025 } else if file_path.ends_with(".rs") {
1026 if let Some(resolved_path) = crate::parsers::rust::resolve_rust_use_to_path(
1028 &import_info.imported_path,
1029 Some(&file_path),
1030 Some(root.to_str().unwrap_or("")),
1031 ) {
1032 match dep_index.get_file_id_by_path(&resolved_path)? {
1034 Some(id) => {
1035 log::trace!("Resolved Rust dependency: {} -> {} (file_id={})",
1036 import_info.imported_path, resolved_path, id);
1037 Some(id)
1038 }
1039 None => {
1040 log::trace!("Rust dependency resolved to path but file not in index: {} -> {}",
1041 import_info.imported_path, resolved_path);
1042 None
1043 }
1044 }
1045 } else {
1046 log::trace!("Could not resolve Rust import (external or stdlib): {}", import_info.imported_path);
1047 None
1048 }
1049 } else if file_path.ends_with(".java") && !java_projects.is_empty() {
1050 if let Some(resolved_path) = crate::parsers::java::resolve_java_import_to_path(
1052 &import_info.imported_path,
1053 &java_projects,
1054 Some(&file_path),
1055 ) {
1056 match dep_index.get_file_id_by_path(&resolved_path)? {
1058 Some(id) => {
1059 log::trace!("Resolved Java dependency: {} -> {} (file_id={})",
1060 import_info.imported_path, resolved_path, id);
1061 Some(id)
1062 }
1063 None => {
1064 log::trace!("Java dependency resolved to path but file not in index: {} -> {}",
1065 import_info.imported_path, resolved_path);
1066 None
1067 }
1068 }
1069 } else {
1070 log::trace!("Could not resolve Java import: {}", import_info.imported_path);
1071 None
1072 }
1073 } else if (file_path.ends_with(".kt") || file_path.ends_with(".kts")) && !java_projects.is_empty() {
1074 if let Some(resolved_path) = crate::parsers::java::resolve_kotlin_import_to_path(
1076 &import_info.imported_path,
1077 &java_projects,
1078 Some(&file_path),
1079 ) {
1080 match dep_index.get_file_id_by_path(&resolved_path)? {
1082 Some(id) => {
1083 log::trace!("Resolved Kotlin dependency: {} -> {} (file_id={})",
1084 import_info.imported_path, resolved_path, id);
1085 Some(id)
1086 }
1087 None => {
1088 log::trace!("Kotlin dependency resolved to path but file not in index: {} -> {}",
1089 import_info.imported_path, resolved_path);
1090 None
1091 }
1092 }
1093 } else {
1094 log::trace!("Could not resolve Kotlin import: {}", import_info.imported_path);
1095 None
1096 }
1097 } else if (file_path.ends_with(".rb") || file_path.ends_with(".rake") || file_path.ends_with(".gemspec")) && !ruby_projects.is_empty() {
1098 if let Some(resolved_path) = crate::parsers::ruby::resolve_ruby_require_to_path(
1100 &import_info.imported_path,
1101 &ruby_projects,
1102 Some(&file_path),
1103 ) {
1104 match dep_index.get_file_id_by_path(&resolved_path)? {
1106 Some(id) => {
1107 log::trace!("Resolved Ruby dependency: {} -> {} (file_id={})",
1108 import_info.imported_path, resolved_path, id);
1109 Some(id)
1110 }
1111 None => {
1112 log::trace!("Ruby dependency resolved to path but file not in index: {} -> {}",
1113 import_info.imported_path, resolved_path);
1114 None
1115 }
1116 }
1117 } else {
1118 log::trace!("Could not resolve Ruby require: {}", import_info.imported_path);
1119 None
1120 }
1121 } else if file_path.ends_with(".c") || file_path.ends_with(".h") {
1122 if let Some(resolved_path) = crate::parsers::c::resolve_c_include_to_path(
1124 &import_info.imported_path,
1125 Some(&file_path),
1126 ) {
1127 match dep_index.get_file_id_by_path(&resolved_path)? {
1129 Some(id) => {
1130 log::trace!("Resolved C dependency: {} -> {} (file_id={})",
1131 import_info.imported_path, resolved_path, id);
1132 Some(id)
1133 }
1134 None => {
1135 log::trace!("C dependency resolved to path but file not in index: {} -> {}",
1136 import_info.imported_path, resolved_path);
1137 None
1138 }
1139 }
1140 } else {
1141 log::trace!("Could not resolve C include (system header): {}", import_info.imported_path);
1142 None
1143 }
1144 } else if file_path.ends_with(".cpp") || file_path.ends_with(".cc") || file_path.ends_with(".cxx")
1145 || file_path.ends_with(".hpp") || file_path.ends_with(".hxx") || file_path.ends_with(".h++")
1146 || file_path.ends_with(".C") || file_path.ends_with(".H") {
1147 if let Some(resolved_path) = crate::parsers::cpp::resolve_cpp_include_to_path(
1149 &import_info.imported_path,
1150 Some(&file_path),
1151 ) {
1152 match dep_index.get_file_id_by_path(&resolved_path)? {
1154 Some(id) => {
1155 log::trace!("Resolved C++ dependency: {} -> {} (file_id={})",
1156 import_info.imported_path, resolved_path, id);
1157 Some(id)
1158 }
1159 None => {
1160 log::trace!("C++ dependency resolved to path but file not in index: {} -> {}",
1161 import_info.imported_path, resolved_path);
1162 None
1163 }
1164 }
1165 } else {
1166 log::trace!("Could not resolve C++ include (system header): {}", import_info.imported_path);
1167 None
1168 }
1169 } else if file_path.ends_with(".cs") {
1170 if let Some(resolved_path) = crate::parsers::csharp::resolve_csharp_using_to_path(
1172 &import_info.imported_path,
1173 Some(&file_path),
1174 ) {
1175 match dep_index.get_file_id_by_path(&resolved_path)? {
1177 Some(id) => {
1178 log::trace!("Resolved C# dependency: {} -> {} (file_id={})",
1179 import_info.imported_path, resolved_path, id);
1180 Some(id)
1181 }
1182 None => {
1183 log::trace!("C# dependency resolved to path but file not in index: {} -> {}",
1184 import_info.imported_path, resolved_path);
1185 None
1186 }
1187 }
1188 } else {
1189 log::trace!("Could not resolve C# using directive: {}", import_info.imported_path);
1190 None
1191 }
1192 } else if file_path.ends_with(".zig") {
1193 if let Some(resolved_path) = crate::parsers::zig::resolve_zig_import_to_path(
1195 &import_info.imported_path,
1196 Some(&file_path),
1197 ) {
1198 match dep_index.get_file_id_by_path(&resolved_path)? {
1200 Some(id) => {
1201 log::trace!("Resolved Zig dependency: {} -> {} (file_id={})",
1202 import_info.imported_path, resolved_path, id);
1203 Some(id)
1204 }
1205 None => {
1206 log::trace!("Zig dependency resolved to path but file not in index: {} -> {}",
1207 import_info.imported_path, resolved_path);
1208 None
1209 }
1210 }
1211 } else {
1212 log::trace!("Could not resolve Zig import (external or stdlib): {}", import_info.imported_path);
1213 None
1214 }
1215 } else if file_path.ends_with(".vue") || file_path.ends_with(".svelte") {
1216 let alias_map = find_nearest_tsconfig(&file_path, root, &tsconfigs);
1218 if let Some(candidates_str) = crate::parsers::typescript::resolve_ts_import_to_path(
1219 &import_info.imported_path,
1220 Some(&file_path),
1221 alias_map,
1222 ) {
1223 let candidates: Vec<&str> = candidates_str.split('|').collect();
1225
1226 let mut resolved_id = None;
1228 for candidate_path in candidates {
1229 let normalized_candidate = if let Ok(rel_path) = std::path::Path::new(candidate_path).strip_prefix(root) {
1232 rel_path.to_string_lossy().to_string()
1233 } else {
1234 candidate_path.to_string()
1236 };
1237
1238 match dep_index.get_file_id_by_path(&normalized_candidate)? {
1239 Some(id) => {
1240 log::trace!("Resolved Vue/Svelte dependency: {} -> {} (file_id={})",
1241 import_info.imported_path, candidate_path, id);
1242 resolved_id = Some(id);
1243 break; }
1245 None => {
1246 log::trace!("Vue/Svelte candidate not in index: {}", candidate_path);
1247 }
1248 }
1249 }
1250
1251 if resolved_id.is_none() {
1252 log::trace!("Vue/Svelte dependency: no matching file found in database for any candidate: {}",
1253 candidates_str);
1254 }
1255
1256 resolved_id
1257 } else {
1258 log::trace!("Could not resolve Vue/Svelte import (non-relative or external): {}", import_info.imported_path);
1259 None
1260 }
1261 } else {
1262 None
1263 };
1264
1265 resolved_deps.push(Dependency {
1268 file_id,
1269 imported_path: import_info.imported_path.clone(),
1270 resolved_file_id,
1271 import_type: import_info.import_type,
1272 line_number: import_info.line_number,
1273 imported_symbols: import_info.imported_symbols.clone(),
1274 });
1275 }
1276
1277 dep_index.clear_dependencies(file_id)?;
1279
1280 if !resolved_deps.is_empty() {
1282 dep_index.batch_insert_dependencies(&resolved_deps)?;
1283 total_deps_inserted += resolved_deps.len();
1284 }
1285 }
1286
1287 log::info!("Extracted {} dependencies", total_deps_inserted);
1288 }
1289
1290 if !all_exports.is_empty() {
1292 *progress_status.lock().unwrap() = "Extracting exports...".to_string();
1293 if show_progress {
1294 pb.set_message("Extracting exports...".to_string());
1295 }
1296
1297 let tsconfigs = crate::parsers::tsconfig::parse_all_tsconfigs(root)
1299 .unwrap_or_else(|e| {
1300 log::warn!("Failed to parse tsconfig.json files: {}", e);
1301 HashMap::new()
1302 });
1303
1304 let cache_for_exports = CacheManager::new(root);
1306 let dep_index = DependencyIndex::new(cache_for_exports);
1307
1308 let mut total_exports_inserted = 0;
1309
1310 for (file_path, export_infos) in all_exports {
1312 let file_id = match dep_index.get_file_id_by_path(&file_path)? {
1314 Some(id) => id,
1315 None => {
1316 log::warn!("File not found in database (skipping exports): {}", file_path);
1317 continue;
1318 }
1319 };
1320
1321 for export_info in export_infos {
1323 let resolved_source_id = if file_path.ends_with(".ts") || file_path.ends_with(".tsx")
1325 || file_path.ends_with(".js") || file_path.ends_with(".jsx")
1326 || file_path.ends_with(".mts") || file_path.ends_with(".cts")
1327 || file_path.ends_with(".mjs") || file_path.ends_with(".cjs")
1328 || file_path.ends_with(".vue") {
1329 let alias_map = find_nearest_tsconfig(&file_path, root, &tsconfigs);
1331 if let Some(candidates_str) = crate::parsers::typescript::resolve_ts_import_to_path(
1332 &export_info.source_path,
1333 Some(&file_path),
1334 alias_map,
1335 ) {
1336 let candidates: Vec<&str> = candidates_str.split('|').collect();
1338
1339 let mut resolved_id = None;
1341 for candidate_path in candidates {
1342 let normalized_candidate = if let Ok(rel_path) = std::path::Path::new(candidate_path).strip_prefix(root) {
1344 rel_path.to_string_lossy().to_string()
1345 } else {
1346 candidate_path.to_string()
1347 };
1348
1349 match dep_index.get_file_id_by_path(&normalized_candidate)? {
1350 Some(id) => {
1351 log::trace!("Resolved export source: {} -> {} (file_id={})",
1352 export_info.source_path, normalized_candidate, id);
1353 resolved_id = Some(id);
1354 break; }
1356 None => {
1357 log::trace!("Export source candidate not in index: {}", candidate_path);
1358 }
1359 }
1360 }
1361
1362 if resolved_id.is_none() {
1363 log::trace!("Export source: no matching file found in database for any candidate: {}",
1364 candidates_str);
1365 }
1366
1367 resolved_id
1368 } else {
1369 log::trace!("Could not resolve export source (non-relative or external): {}", export_info.source_path);
1370 None
1371 }
1372 } else {
1373 None
1374 };
1375
1376 dep_index.insert_export(
1378 file_id,
1379 export_info.exported_symbol,
1380 export_info.source_path,
1381 resolved_source_id,
1382 export_info.line_number,
1383 )?;
1384
1385 total_exports_inserted += 1;
1386 }
1387 }
1388
1389 log::info!("Extracted {} exports", total_exports_inserted);
1390 }
1391
1392 log::info!("Indexed {} files", files_indexed);
1393
1394 *progress_status.lock().unwrap() = "Writing trigram index...".to_string();
1396 if show_progress {
1397 pb.set_message("Writing trigram index...".to_string());
1398 }
1399 let trigrams_path = self.cache.path().join("trigrams.bin");
1400 log::info!("Writing trigram index with {} trigrams to trigrams.bin",
1401 trigram_index.trigram_count());
1402
1403 trigram_index.write(&trigrams_path)
1404 .context("Failed to write trigram index")?;
1405 log::info!("Wrote {} files to trigrams.bin", trigram_index.file_count());
1406
1407 *progress_status.lock().unwrap() = "Finalizing content store...".to_string();
1409 if show_progress {
1410 pb.set_message("Finalizing content store...".to_string());
1411 }
1412 content_writer.finalize_if_needed()
1413 .context("Failed to finalize content store")?;
1414 log::info!("Wrote {} files ({} bytes) to content.bin",
1415 content_writer.file_count(), content_writer.content_size());
1416
1417 *progress_status.lock().unwrap() = "Updating statistics...".to_string();
1419 if show_progress {
1420 pb.set_message("Updating statistics...".to_string());
1421 }
1422 self.cache.update_stats(&branch)?;
1424
1425 self.cache.update_schema_hash()?;
1427
1428 pb.finish_with_message("Indexing complete");
1429
1430 let stats = self.cache.stats()?;
1432 log::info!("Indexing complete: {} files",
1433 stats.total_files);
1434
1435 Ok(stats)
1436 }
1437
1438 fn discover_files(&self, root: &Path) -> Result<Vec<PathBuf>> {
1440 let mut files = Vec::new();
1441
1442 let walker = WalkBuilder::new(root)
1447 .follow_links(self.config.follow_symlinks)
1448 .git_ignore(true) .git_global(false) .git_exclude(false) .build();
1452
1453 for entry in walker {
1454 let entry = entry?;
1455 let path = entry.path();
1456
1457 if !entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) {
1459 continue;
1460 }
1461
1462 if self.should_index(path) {
1464 files.push(path.to_path_buf());
1465 }
1466 }
1467
1468 Ok(files)
1469 }
1470
1471 fn should_index(&self, path: &Path) -> bool {
1473 let ext = match path.extension() {
1475 Some(ext) => ext.to_string_lossy(),
1476 None => return false,
1477 };
1478
1479 let lang = Language::from_extension(&ext);
1480
1481 if !lang.is_supported() {
1483 if !matches!(lang, Language::Unknown) {
1484 log::debug!("Skipping {} ({:?} parser not yet implemented)",
1485 path.display(), lang);
1486 }
1487 return false;
1488 }
1489
1490 if let Ok(metadata) = std::fs::metadata(path) {
1492 if metadata.len() > self.config.max_file_size as u64 {
1493 log::debug!("Skipping {} (too large: {} bytes)",
1494 path.display(), metadata.len());
1495 return false;
1496 }
1497 }
1498
1499 true
1503 }
1504
1505 fn hash_content(&self, content: &[u8]) -> String {
1507 let hash = blake3::hash(content);
1508 hash.to_hex().to_string()
1509 }
1510
1511 fn check_disk_space(&self, root: &Path) -> Result<()> {
1516 let cache_path = self.cache.path();
1518
1519 #[cfg(unix)]
1521 {
1522 let test_file = cache_path.join(".space_check");
1525 match std::fs::write(&test_file, b"test") {
1526 Ok(_) => {
1527 let _ = std::fs::remove_file(&test_file);
1528
1529 if let Ok(output) = std::process::Command::new("df")
1531 .arg("-k")
1532 .arg(cache_path.parent().unwrap_or(root))
1533 .output()
1534 {
1535 if let Ok(df_output) = String::from_utf8(output.stdout) {
1536 if let Some(line) = df_output.lines().nth(1) {
1538 let parts: Vec<&str> = line.split_whitespace().collect();
1539 if parts.len() >= 4 {
1540 if let Ok(available_kb) = parts[3].parse::<u64>() {
1541 let available_mb = available_kb / 1024;
1542
1543 if available_mb < 100 {
1545 log::warn!("Low disk space: only {}MB available. Indexing may fail.", available_mb);
1546 output::warn(&format!("Low disk space ({}MB available). Consider freeing up space.", available_mb));
1547 } else {
1548 log::debug!("Available disk space: {}MB", available_mb);
1549 }
1550 }
1551 }
1552 }
1553 }
1554 }
1555
1556 Ok(())
1557 }
1558 Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
1559 anyhow::bail!(
1560 "Permission denied writing to cache directory: {}. Check file permissions.",
1561 cache_path.display()
1562 )
1563 }
1564 Err(e) => {
1565 log::warn!("Failed to write test file (possible disk space issue): {}", e);
1567 Err(e).context("Failed to verify disk space - indexing may fail due to insufficient space")
1568 }
1569 }
1570 }
1571
1572 #[cfg(not(unix))]
1573 {
1574 let test_file = cache_path.join(".space_check");
1576 match std::fs::write(&test_file, b"test") {
1577 Ok(_) => {
1578 let _ = std::fs::remove_file(&test_file);
1579 Ok(())
1580 }
1581 Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
1582 anyhow::bail!(
1583 "Permission denied writing to cache directory: {}. Check file permissions.",
1584 cache_path.display()
1585 )
1586 }
1587 Err(e) => {
1588 log::warn!("Failed to write test file (possible disk space issue): {}", e);
1589 Err(e).context("Failed to verify disk space - indexing may fail due to insufficient space")
1590 }
1591 }
1592 }
1593 }
1594}
1595
1596#[cfg(test)]
1597mod tests {
1598 use super::*;
1599 use tempfile::TempDir;
1600 use std::fs;
1601
1602 #[test]
1603 fn test_indexer_creation() {
1604 let temp = TempDir::new().unwrap();
1605 let cache = CacheManager::new(temp.path());
1606 let config = IndexConfig::default();
1607 let indexer = Indexer::new(cache, config);
1608
1609 assert!(indexer.cache.path().ends_with(".reflex"));
1610 }
1611
1612 #[test]
1613 fn test_hash_content() {
1614 let temp = TempDir::new().unwrap();
1615 let cache = CacheManager::new(temp.path());
1616 let config = IndexConfig::default();
1617 let indexer = Indexer::new(cache, config);
1618
1619 let content1 = b"hello world";
1620 let content2 = b"hello world";
1621 let content3 = b"different content";
1622
1623 let hash1 = indexer.hash_content(content1);
1624 let hash2 = indexer.hash_content(content2);
1625 let hash3 = indexer.hash_content(content3);
1626
1627 assert_eq!(hash1, hash2);
1629
1630 assert_ne!(hash1, hash3);
1632
1633 assert_eq!(hash1.len(), 64); }
1636
1637 #[test]
1638 fn test_should_index_rust_file() {
1639 let temp = TempDir::new().unwrap();
1640 let cache = CacheManager::new(temp.path());
1641 let config = IndexConfig::default();
1642 let indexer = Indexer::new(cache, config);
1643
1644 let rust_file = temp.path().join("test.rs");
1646 fs::write(&rust_file, "fn main() {}").unwrap();
1647
1648 assert!(indexer.should_index(&rust_file));
1649 }
1650
1651 #[test]
1652 fn test_should_index_unsupported_extension() {
1653 let temp = TempDir::new().unwrap();
1654 let cache = CacheManager::new(temp.path());
1655 let config = IndexConfig::default();
1656 let indexer = Indexer::new(cache, config);
1657
1658 let unsupported_file = temp.path().join("test.txt");
1659 fs::write(&unsupported_file, "plain text").unwrap();
1660
1661 assert!(!indexer.should_index(&unsupported_file));
1662 }
1663
1664 #[test]
1665 fn test_should_index_no_extension() {
1666 let temp = TempDir::new().unwrap();
1667 let cache = CacheManager::new(temp.path());
1668 let config = IndexConfig::default();
1669 let indexer = Indexer::new(cache, config);
1670
1671 let no_ext_file = temp.path().join("Makefile");
1672 fs::write(&no_ext_file, "all:\n\techo hello").unwrap();
1673
1674 assert!(!indexer.should_index(&no_ext_file));
1675 }
1676
1677 #[test]
1678 fn test_should_index_size_limit() {
1679 let temp = TempDir::new().unwrap();
1680 let cache = CacheManager::new(temp.path());
1681
1682 let mut config = IndexConfig::default();
1684 config.max_file_size = 100;
1685
1686 let indexer = Indexer::new(cache, config);
1687
1688 let small_file = temp.path().join("small.rs");
1690 fs::write(&small_file, "fn main() {}").unwrap();
1691 assert!(indexer.should_index(&small_file));
1692
1693 let large_file = temp.path().join("large.rs");
1695 let large_content = "a".repeat(150);
1696 fs::write(&large_file, large_content).unwrap();
1697 assert!(!indexer.should_index(&large_file));
1698 }
1699
1700 #[test]
1701 fn test_discover_files_empty_dir() {
1702 let temp = TempDir::new().unwrap();
1703 let cache = CacheManager::new(temp.path());
1704 let config = IndexConfig::default();
1705 let indexer = Indexer::new(cache, config);
1706
1707 let files = indexer.discover_files(temp.path()).unwrap();
1708 assert_eq!(files.len(), 0);
1709 }
1710
1711 #[test]
1712 fn test_discover_files_single_file() {
1713 let temp = TempDir::new().unwrap();
1714 let cache = CacheManager::new(temp.path());
1715 let config = IndexConfig::default();
1716 let indexer = Indexer::new(cache, config);
1717
1718 let rust_file = temp.path().join("main.rs");
1720 fs::write(&rust_file, "fn main() {}").unwrap();
1721
1722 let files = indexer.discover_files(temp.path()).unwrap();
1723 assert_eq!(files.len(), 1);
1724 assert!(files[0].ends_with("main.rs"));
1725 }
1726
1727 #[test]
1728 fn test_discover_files_multiple_languages() {
1729 let temp = TempDir::new().unwrap();
1730 let cache = CacheManager::new(temp.path());
1731 let config = IndexConfig::default();
1732 let indexer = Indexer::new(cache, config);
1733
1734 fs::write(temp.path().join("main.rs"), "fn main() {}").unwrap();
1736 fs::write(temp.path().join("script.py"), "print('hello')").unwrap();
1737 fs::write(temp.path().join("app.js"), "console.log('hi')").unwrap();
1738 fs::write(temp.path().join("README.md"), "# Project").unwrap(); let files = indexer.discover_files(temp.path()).unwrap();
1741 assert_eq!(files.len(), 3); }
1743
1744 #[test]
1745 fn test_discover_files_subdirectories() {
1746 let temp = TempDir::new().unwrap();
1747 let cache = CacheManager::new(temp.path());
1748 let config = IndexConfig::default();
1749 let indexer = Indexer::new(cache, config);
1750
1751 let src_dir = temp.path().join("src");
1753 fs::create_dir(&src_dir).unwrap();
1754 fs::write(src_dir.join("main.rs"), "fn main() {}").unwrap();
1755 fs::write(src_dir.join("lib.rs"), "pub mod test {}").unwrap();
1756
1757 let tests_dir = temp.path().join("tests");
1758 fs::create_dir(&tests_dir).unwrap();
1759 fs::write(tests_dir.join("test.rs"), "#[test] fn test() {}").unwrap();
1760
1761 let files = indexer.discover_files(temp.path()).unwrap();
1762 assert_eq!(files.len(), 3);
1763 }
1764
1765 #[test]
1766 fn test_discover_files_respects_gitignore() {
1767 let temp = TempDir::new().unwrap();
1768
1769 std::process::Command::new("git")
1771 .arg("init")
1772 .current_dir(temp.path())
1773 .output()
1774 .expect("Failed to initialize git repo");
1775
1776 let cache = CacheManager::new(temp.path());
1777 let config = IndexConfig::default();
1778 let indexer = Indexer::new(cache, config);
1779
1780 fs::write(temp.path().join(".gitignore"), "ignored/\n").unwrap();
1783
1784 fs::write(temp.path().join("included.rs"), "fn main() {}").unwrap();
1786 fs::write(temp.path().join("also_included.py"), "print('hi')").unwrap();
1787
1788 let ignored_dir = temp.path().join("ignored");
1789 fs::create_dir(&ignored_dir).unwrap();
1790 fs::write(ignored_dir.join("excluded.rs"), "fn test() {}").unwrap();
1791
1792 let files = indexer.discover_files(temp.path()).unwrap();
1793
1794 assert!(files.iter().any(|f| f.ends_with("included.rs")), "Should find included.rs");
1796 assert!(files.iter().any(|f| f.ends_with("also_included.py")), "Should find also_included.py");
1797
1798 assert!(!files.iter().any(|f| {
1801 let path_str = f.to_string_lossy();
1802 path_str.contains("ignored") && f.ends_with("excluded.rs")
1803 }), "Should NOT find excluded.rs in ignored/ directory (gitignore pattern)");
1804
1805 assert_eq!(files.len(), 2, "Should find exactly 2 files (not including .gitignore or ignored/excluded.rs)");
1808 }
1809
1810 #[test]
1811 fn test_index_empty_directory() {
1812 let temp = TempDir::new().unwrap();
1813 let cache = CacheManager::new(temp.path());
1814 let config = IndexConfig::default();
1815 let indexer = Indexer::new(cache, config);
1816
1817 let stats = indexer.index(temp.path(), false).unwrap();
1818
1819 assert_eq!(stats.total_files, 0);
1820 }
1821
1822 #[test]
1823 fn test_index_single_rust_file() {
1824 let temp = TempDir::new().unwrap();
1825 let project_root = temp.path().join("project");
1826 fs::create_dir(&project_root).unwrap();
1827
1828 let cache = CacheManager::new(&project_root);
1829 let config = IndexConfig::default();
1830 let indexer = Indexer::new(cache, config);
1831
1832 fs::write(
1834 project_root.join("main.rs"),
1835 "fn main() { println!(\"Hello\"); }"
1836 ).unwrap();
1837
1838 let stats = indexer.index(&project_root, false).unwrap();
1839
1840 assert_eq!(stats.total_files, 1);
1841 assert!(stats.files_by_language.get("Rust").is_some());
1842 }
1843
1844 #[test]
1845 fn test_index_multiple_files() {
1846 let temp = TempDir::new().unwrap();
1847 let project_root = temp.path().join("project");
1848 fs::create_dir(&project_root).unwrap();
1849
1850 let cache = CacheManager::new(&project_root);
1851 let config = IndexConfig::default();
1852 let indexer = Indexer::new(cache, config);
1853
1854 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1856 fs::write(project_root.join("lib.rs"), "pub fn test() {}").unwrap();
1857 fs::write(project_root.join("script.py"), "def main(): pass").unwrap();
1858
1859 let stats = indexer.index(&project_root, false).unwrap();
1860
1861 assert_eq!(stats.total_files, 3);
1862 assert_eq!(stats.files_by_language.get("Rust"), Some(&2));
1863 assert_eq!(stats.files_by_language.get("Python"), Some(&1));
1864 }
1865
1866 #[test]
1867 fn test_index_creates_trigram_index() {
1868 let temp = TempDir::new().unwrap();
1869 let project_root = temp.path().join("project");
1870 fs::create_dir(&project_root).unwrap();
1871
1872 let cache = CacheManager::new(&project_root);
1873 let config = IndexConfig::default();
1874 let indexer = Indexer::new(cache, config);
1875
1876 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1877
1878 indexer.index(&project_root, false).unwrap();
1879
1880 let trigrams_path = project_root.join(".reflex/trigrams.bin");
1882 assert!(trigrams_path.exists());
1883 }
1884
1885 #[test]
1886 fn test_index_creates_content_store() {
1887 let temp = TempDir::new().unwrap();
1888 let project_root = temp.path().join("project");
1889 fs::create_dir(&project_root).unwrap();
1890
1891 let cache = CacheManager::new(&project_root);
1892 let config = IndexConfig::default();
1893 let indexer = Indexer::new(cache, config);
1894
1895 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1896
1897 indexer.index(&project_root, false).unwrap();
1898
1899 let content_path = project_root.join(".reflex/content.bin");
1901 assert!(content_path.exists());
1902 }
1903
1904 #[test]
1905 fn test_index_incremental_no_changes() {
1906 let temp = TempDir::new().unwrap();
1907 let project_root = temp.path().join("project");
1908 fs::create_dir(&project_root).unwrap();
1909
1910 let cache = CacheManager::new(&project_root);
1911 let config = IndexConfig::default();
1912 let indexer = Indexer::new(cache, config);
1913
1914 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1915
1916 let stats1 = indexer.index(&project_root, false).unwrap();
1918 assert_eq!(stats1.total_files, 1);
1919
1920 let stats2 = indexer.index(&project_root, false).unwrap();
1922 assert_eq!(stats2.total_files, 1);
1923 }
1924
1925 #[test]
1926 fn test_index_incremental_with_changes() {
1927 let temp = TempDir::new().unwrap();
1928 let project_root = temp.path().join("project");
1929 fs::create_dir(&project_root).unwrap();
1930
1931 let cache = CacheManager::new(&project_root);
1932 let config = IndexConfig::default();
1933 let indexer = Indexer::new(cache, config);
1934
1935 let main_path = project_root.join("main.rs");
1936 fs::write(&main_path, "fn main() {}").unwrap();
1937
1938 indexer.index(&project_root, false).unwrap();
1940
1941 fs::write(&main_path, "fn main() { println!(\"changed\"); }").unwrap();
1943
1944 let stats = indexer.index(&project_root, false).unwrap();
1946 assert_eq!(stats.total_files, 1);
1947 }
1948
1949 #[test]
1950 fn test_index_incremental_new_file() {
1951 let temp = TempDir::new().unwrap();
1952 let project_root = temp.path().join("project");
1953 fs::create_dir(&project_root).unwrap();
1954
1955 let cache = CacheManager::new(&project_root);
1956 let config = IndexConfig::default();
1957 let indexer = Indexer::new(cache, config);
1958
1959 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1960
1961 let stats1 = indexer.index(&project_root, false).unwrap();
1963 assert_eq!(stats1.total_files, 1);
1964
1965 fs::write(project_root.join("lib.rs"), "pub fn test() {}").unwrap();
1967
1968 let stats2 = indexer.index(&project_root, false).unwrap();
1970 assert_eq!(stats2.total_files, 2);
1971 }
1972
1973 #[test]
1974 fn test_index_parallel_threads_config() {
1975 let temp = TempDir::new().unwrap();
1976 let project_root = temp.path().join("project");
1977 fs::create_dir(&project_root).unwrap();
1978
1979 let cache = CacheManager::new(&project_root);
1980
1981 let mut config = IndexConfig::default();
1983 config.parallel_threads = 2;
1984
1985 let indexer = Indexer::new(cache, config);
1986
1987 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1988
1989 let stats = indexer.index(&project_root, false).unwrap();
1990 assert_eq!(stats.total_files, 1);
1991 }
1992
1993 #[test]
1994 fn test_index_parallel_threads_auto() {
1995 let temp = TempDir::new().unwrap();
1996 let project_root = temp.path().join("project");
1997 fs::create_dir(&project_root).unwrap();
1998
1999 let cache = CacheManager::new(&project_root);
2000
2001 let mut config = IndexConfig::default();
2003 config.parallel_threads = 0;
2004
2005 let indexer = Indexer::new(cache, config);
2006
2007 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2008
2009 let stats = indexer.index(&project_root, false).unwrap();
2010 assert_eq!(stats.total_files, 1);
2011 }
2012
2013 #[test]
2014 fn test_index_respects_size_limit() {
2015 let temp = TempDir::new().unwrap();
2016 let project_root = temp.path().join("project");
2017 fs::create_dir(&project_root).unwrap();
2018
2019 let cache = CacheManager::new(&project_root);
2020
2021 let mut config = IndexConfig::default();
2023 config.max_file_size = 50;
2024
2025 let indexer = Indexer::new(cache, config);
2026
2027 fs::write(project_root.join("small.rs"), "fn a() {}").unwrap();
2029
2030 let large_content = "fn main() {}\n".repeat(10);
2032 fs::write(project_root.join("large.rs"), large_content).unwrap();
2033
2034 let stats = indexer.index(&project_root, false).unwrap();
2035
2036 assert_eq!(stats.total_files, 1);
2038 }
2039
2040 #[test]
2041 fn test_index_mixed_languages() {
2042 let temp = TempDir::new().unwrap();
2043 let project_root = temp.path().join("project");
2044 fs::create_dir(&project_root).unwrap();
2045
2046 let cache = CacheManager::new(&project_root);
2047 let config = IndexConfig::default();
2048 let indexer = Indexer::new(cache, config);
2049
2050 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2052 fs::write(project_root.join("test.py"), "def test(): pass").unwrap();
2053 fs::write(project_root.join("app.js"), "function main() {}").unwrap();
2054 fs::write(project_root.join("lib.go"), "func main() {}").unwrap();
2055
2056 let stats = indexer.index(&project_root, false).unwrap();
2057
2058 assert_eq!(stats.total_files, 4);
2059 assert!(stats.files_by_language.contains_key("Rust"));
2060 assert!(stats.files_by_language.contains_key("Python"));
2061 assert!(stats.files_by_language.contains_key("JavaScript"));
2062 assert!(stats.files_by_language.contains_key("Go"));
2063 }
2064
2065 #[test]
2066 fn test_index_updates_cache_stats() {
2067 let temp = TempDir::new().unwrap();
2068 let project_root = temp.path().join("project");
2069 fs::create_dir(&project_root).unwrap();
2070
2071 let cache = CacheManager::new(&project_root);
2072 let config = IndexConfig::default();
2073 let indexer = Indexer::new(cache, config);
2074
2075 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2076
2077 indexer.index(&project_root, false).unwrap();
2078
2079 let cache = CacheManager::new(&project_root);
2081 let stats = cache.stats().unwrap();
2082
2083 assert_eq!(stats.total_files, 1);
2084 assert!(stats.index_size_bytes > 0);
2085 }
2086}