1use anyhow::{Context, Result};
7use ignore::WalkBuilder;
8use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
9use rayon::prelude::*;
10use std::collections::HashMap;
11use std::path::{Path, PathBuf};
12use std::sync::atomic::{AtomicU64, Ordering};
13use std::sync::{Arc, Mutex};
14use std::time::Instant;
15
16use crate::cache::CacheManager;
17use crate::content_store::{ContentReader, ContentWriter};
18use crate::dependency::DependencyIndex;
19use crate::models::{Dependency, IndexConfig, IndexStats, Language, ImportType};
20use crate::output;
21use crate::parsers::{DependencyExtractor, ImportInfo, ExportInfo};
22use crate::parsers::rust::RustDependencyExtractor;
23use crate::parsers::python::PythonDependencyExtractor;
24use crate::parsers::typescript::TypeScriptDependencyExtractor;
25use crate::parsers::go::GoDependencyExtractor;
26use crate::parsers::java::JavaDependencyExtractor;
27use crate::parsers::c::CDependencyExtractor;
28use crate::parsers::cpp::CppDependencyExtractor;
29use crate::parsers::csharp::CSharpDependencyExtractor;
30use crate::parsers::php::PhpDependencyExtractor;
31use crate::parsers::ruby::RubyDependencyExtractor;
32use crate::parsers::kotlin::KotlinDependencyExtractor;
33use crate::parsers::zig::ZigDependencyExtractor;
34use crate::parsers::vue::VueDependencyExtractor;
35use crate::parsers::svelte::SvelteDependencyExtractor;
36use crate::trigram::TrigramIndex;
37
38pub type ProgressCallback = Arc<dyn Fn(usize, usize, String) + Send + Sync>;
41
42struct FileProcessingResult {
44 path: PathBuf,
45 path_str: String,
46 hash: String,
47 content: String,
48 language: Language,
49 line_count: usize,
50 dependencies: Vec<ImportInfo>,
51 exports: Vec<ExportInfo>,
52}
53
54fn find_nearest_tsconfig<'a>(
59 file_path: &str,
60 root: &Path,
61 tsconfigs: &'a HashMap<PathBuf, crate::parsers::tsconfig::PathAliasMap>,
62) -> Option<&'a crate::parsers::tsconfig::PathAliasMap> {
63 let abs_file_path = if Path::new(file_path).is_absolute() {
65 PathBuf::from(file_path)
66 } else {
67 root.join(file_path)
68 };
69
70 let mut current_dir = abs_file_path.parent()?;
72
73 loop {
74 if let Some(alias_map) = tsconfigs.get(current_dir) {
76 return Some(alias_map);
77 }
78
79 current_dir = current_dir.parent()?;
81
82 if current_dir == root || !current_dir.starts_with(root) {
84 break;
85 }
86 }
87
88 None
89}
90
91pub struct Indexer {
93 cache: CacheManager,
94 config: IndexConfig,
95}
96
97impl Indexer {
98 pub fn new(cache: CacheManager, config: IndexConfig) -> Self {
100 Self { cache, config }
101 }
102
103 pub fn index(&self, root: impl AsRef<Path>, show_progress: bool) -> Result<IndexStats> {
105 self.index_with_callback(root, show_progress, None)
106 }
107
108 pub fn index_with_callback(
110 &self,
111 root: impl AsRef<Path>,
112 show_progress: bool,
113 progress_callback: Option<ProgressCallback>,
114 ) -> Result<IndexStats> {
115 let root = root.as_ref();
116 log::info!("Indexing directory: {:?}", root);
117
118 let git_state = crate::git::get_git_state_optional(root)?;
120 let branch = git_state
121 .as_ref()
122 .map(|s| s.branch.clone())
123 .unwrap_or_else(|| "_default".to_string());
124
125 if let Some(ref state) = git_state {
126 log::info!(
127 "Git state: branch='{}', commit='{}', dirty={}",
128 state.branch,
129 state.commit,
130 state.dirty
131 );
132 } else {
133 log::info!("Not a git repository, using default branch");
134 }
135
136 let num_threads = if self.config.parallel_threads == 0 {
139 let available_cores = std::thread::available_parallelism()
140 .map(|n| n.get())
141 .unwrap_or(4);
142 ((available_cores as f64 * 0.8).ceil() as usize).max(1).min(8)
145 } else {
146 self.config.parallel_threads
147 };
148
149 log::info!("Using {} threads for parallel indexing (out of {} available)",
150 num_threads,
151 std::thread::available_parallelism().map(|n| n.get()).unwrap_or(4));
152
153 self.cache.init()?;
155
156 self.check_disk_space(root)?;
158
159 let existing_hashes = self.cache.load_hashes_for_branch(&branch)?;
161 log::debug!("Loaded {} existing file hashes for branch '{}'", existing_hashes.len(), branch);
162
163 let files = self.discover_files(root)?;
165 let total_files = files.len();
166 log::info!("Discovered {} files to index", total_files);
167
168 let tsconfigs = crate::parsers::tsconfig::parse_all_tsconfigs(root)
171 .unwrap_or_else(|e| {
172 log::warn!("Failed to parse tsconfig.json files: {}", e);
173 HashMap::new()
174 });
175 if !tsconfigs.is_empty() {
176 log::info!("Found {} tsconfig.json files", tsconfigs.len());
177 for (config_dir, alias_map) in &tsconfigs {
178 log::debug!(" {} (base_url: {:?}, {} aliases)",
179 config_dir.display(),
180 alias_map.base_url,
181 alias_map.aliases.len());
182 }
183 }
184
185 if !existing_hashes.is_empty() && total_files == existing_hashes.len() {
188 let mut any_changed = false;
190
191 for file_path in &files {
192 let path_str = file_path.to_string_lossy().to_string();
194 let normalized_path = if let Ok(rel_path) = file_path.strip_prefix(root) {
195 rel_path.to_string_lossy().to_string()
197 } else {
198 path_str.trim_start_matches("./").to_string()
200 };
201
202 if let Some(existing_hash) = existing_hashes.get(&normalized_path) {
204 match std::fs::read_to_string(file_path) {
206 Ok(content) => {
207 let current_hash = self.hash_content(content.as_bytes());
208 if ¤t_hash != existing_hash {
209 any_changed = true;
210 log::debug!("File changed: {}", path_str);
211 break; }
213 }
214 Err(_) => {
215 any_changed = true;
216 break;
217 }
218 }
219 } else {
220 any_changed = true;
222 break;
223 }
224 }
225
226 if !any_changed {
227 let content_path = self.cache.path().join("content.bin");
228 let trigrams_path = self.cache.path().join("trigrams.bin");
229
230 let schema_ok = self.cache.check_schema_hash().unwrap_or(false);
233
234 if schema_ok && content_path.exists() && trigrams_path.exists() {
235 if let Ok(reader) = ContentReader::open(&content_path) {
236 if reader.file_count() > 0 {
237 log::info!("No files changed - skipping index rebuild");
238 return Ok(self.cache.stats()?);
239 }
240 }
241 log::warn!("content.bin invalid despite hashes matching - forcing rebuild");
242 } else if !schema_ok {
243 log::info!("Schema hash changed - forcing full rebuild");
244 } else {
245 log::warn!("Binary index files missing - forcing rebuild");
246 }
247 }
248 } else if total_files != existing_hashes.len() {
249 log::info!("File count changed ({} -> {}) - full reindex required",
250 existing_hashes.len(), total_files);
251 }
252
253 let mut new_hashes = HashMap::new();
255 let mut files_indexed = 0;
256 let mut file_metadata: Vec<(String, String, String, usize)> = Vec::new(); let mut all_dependencies: Vec<(String, Vec<ImportInfo>)> = Vec::new(); let mut all_exports: Vec<(String, Vec<ExportInfo>)> = Vec::new(); let mut trigram_index = TrigramIndex::new();
262 let mut content_writer = ContentWriter::new();
263
264 if total_files > 10000 {
266 let temp_dir = self.cache.path().join("trigram_temp");
267 trigram_index.enable_batch_flush(temp_dir)
268 .context("Failed to enable batch-flush mode for trigram index")?;
269 log::info!("Enabled batch-flush mode for {} files", total_files);
270 }
271
272 let content_path = self.cache.path().join("content.bin");
274 content_writer.init(content_path.clone())
275 .context("Failed to initialize content writer")?;
276
277 let pb = if show_progress {
279 let pb = ProgressBar::new(total_files as u64);
280 pb.set_draw_target(ProgressDrawTarget::stderr());
281 pb.set_style(
282 ProgressStyle::default_bar()
283 .template("[{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} files ({percent}%) {msg}")
284 .unwrap()
285 .progress_chars("=>-")
286 );
287 pb.enable_steady_tick(std::time::Duration::from_millis(100));
289 pb
290 } else {
291 ProgressBar::hidden()
292 };
293
294 let progress_counter = Arc::new(AtomicU64::new(0));
296 let progress_status = Arc::new(Mutex::new("Indexing files...".to_string()));
298
299 let _start_time = Instant::now();
300
301 let counter_for_thread = Arc::clone(&progress_counter);
303 let status_for_thread = Arc::clone(&progress_status);
304 let pb_clone = pb.clone();
305 let callback_for_thread = progress_callback.clone();
306 let total_files_for_thread = total_files;
307 let progress_thread = if show_progress || callback_for_thread.is_some() {
308 Some(std::thread::spawn(move || {
309 loop {
310 let count = counter_for_thread.load(Ordering::Relaxed);
311 pb_clone.set_position(count);
312
313 if let Some(ref callback) = callback_for_thread {
315 let status = status_for_thread.lock().unwrap().clone();
316 callback(count as usize, total_files_for_thread, status);
317 }
318
319 if count >= total_files_for_thread as u64 {
320 break;
321 }
322 std::thread::sleep(std::time::Duration::from_millis(50));
323 }
324 }))
325 } else {
326 None
327 };
328
329 let pool = rayon::ThreadPoolBuilder::new()
331 .num_threads(num_threads)
332 .build()
333 .context("Failed to create thread pool")?;
334
335 const BATCH_SIZE: usize = 5000;
338 let num_batches = total_files.div_ceil(BATCH_SIZE);
339 log::info!("Processing {} files in {} batches of up to {} files",
340 total_files, num_batches, BATCH_SIZE);
341
342 for (batch_idx, batch_files) in files.chunks(BATCH_SIZE).enumerate() {
343 log::info!("Processing batch {}/{} ({} files)",
344 batch_idx + 1, num_batches, batch_files.len());
345
346 let counter_clone = Arc::clone(&progress_counter);
348 let results: Vec<Option<FileProcessingResult>> = pool.install(|| {
349 batch_files
350 .par_iter()
351 .map(|file_path| {
352 let path_str = file_path.to_string_lossy().to_string();
354 let normalized_path = if let Ok(rel_path) = file_path.strip_prefix(root) {
355 rel_path.to_string_lossy().to_string()
357 } else {
358 path_str.trim_start_matches("./").to_string()
360 };
361
362 let content = match std::fs::read_to_string(&file_path) {
364 Ok(c) => c,
365 Err(e) => {
366 log::warn!("Failed to read {}: {}", path_str, e);
367 counter_clone.fetch_add(1, Ordering::Relaxed);
369 return None;
370 }
371 };
372
373 let hash = self.hash_content(content.as_bytes());
375
376 let ext = file_path.extension()
378 .and_then(|e| e.to_str())
379 .unwrap_or("");
380 let language = Language::from_extension(ext);
381
382 let line_count = content.lines().count();
384
385 let dependencies = match language {
387 Language::Rust => {
388 match RustDependencyExtractor::extract_dependencies(&content) {
389 Ok(deps) => deps,
390 Err(e) => {
391 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
392 Vec::new()
393 }
394 }
395 }
396 Language::Python => {
397 match PythonDependencyExtractor::extract_dependencies(&content) {
398 Ok(deps) => deps,
399 Err(e) => {
400 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
401 Vec::new()
402 }
403 }
404 }
405 Language::TypeScript | Language::JavaScript => {
406 let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
408 match TypeScriptDependencyExtractor::extract_dependencies_with_alias_map(&content, alias_map) {
409 Ok(deps) => deps,
410 Err(e) => {
411 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
412 Vec::new()
413 }
414 }
415 }
416 Language::Go => {
417 match GoDependencyExtractor::extract_dependencies(&content) {
418 Ok(deps) => deps,
419 Err(e) => {
420 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
421 Vec::new()
422 }
423 }
424 }
425 Language::Java => {
426 match JavaDependencyExtractor::extract_dependencies(&content) {
427 Ok(deps) => deps,
428 Err(e) => {
429 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
430 Vec::new()
431 }
432 }
433 }
434 Language::C => {
435 match CDependencyExtractor::extract_dependencies(&content) {
436 Ok(deps) => deps,
437 Err(e) => {
438 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
439 Vec::new()
440 }
441 }
442 }
443 Language::Cpp => {
444 match CppDependencyExtractor::extract_dependencies(&content) {
445 Ok(deps) => deps,
446 Err(e) => {
447 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
448 Vec::new()
449 }
450 }
451 }
452 Language::CSharp => {
453 match CSharpDependencyExtractor::extract_dependencies(&content) {
454 Ok(deps) => deps,
455 Err(e) => {
456 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
457 Vec::new()
458 }
459 }
460 }
461 Language::PHP => {
462 match PhpDependencyExtractor::extract_dependencies(&content) {
463 Ok(deps) => deps,
464 Err(e) => {
465 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
466 Vec::new()
467 }
468 }
469 }
470 Language::Ruby => {
471 match RubyDependencyExtractor::extract_dependencies(&content) {
472 Ok(deps) => deps,
473 Err(e) => {
474 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
475 Vec::new()
476 }
477 }
478 }
479 Language::Kotlin => {
480 match KotlinDependencyExtractor::extract_dependencies(&content) {
481 Ok(deps) => deps,
482 Err(e) => {
483 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
484 Vec::new()
485 }
486 }
487 }
488 Language::Zig => {
489 match ZigDependencyExtractor::extract_dependencies(&content) {
490 Ok(deps) => deps,
491 Err(e) => {
492 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
493 Vec::new()
494 }
495 }
496 }
497 Language::Vue => {
498 let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
500 match VueDependencyExtractor::extract_dependencies_with_alias_map(&content, alias_map) {
501 Ok(deps) => deps,
502 Err(e) => {
503 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
504 Vec::new()
505 }
506 }
507 }
508 Language::Svelte => {
509 match SvelteDependencyExtractor::extract_dependencies(&content) {
510 Ok(deps) => deps,
511 Err(e) => {
512 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
513 Vec::new()
514 }
515 }
516 }
517 _ => Vec::new(),
519 };
520
521 let exports = match language {
523 Language::TypeScript | Language::JavaScript => {
524 let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
526 match TypeScriptDependencyExtractor::extract_export_declarations(&content, alias_map) {
527 Ok(exports) => exports,
528 Err(e) => {
529 log::warn!("Failed to extract exports from {}: {}", path_str, e);
530 Vec::new()
531 }
532 }
533 }
534 Language::Vue => {
535 let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
537 match VueDependencyExtractor::extract_export_declarations(&content, alias_map) {
538 Ok(exports) => exports,
539 Err(e) => {
540 log::warn!("Failed to extract exports from {}: {}", path_str, e);
541 Vec::new()
542 }
543 }
544 }
545 _ => Vec::new(),
547 };
548
549 counter_clone.fetch_add(1, Ordering::Relaxed);
551
552 Some(FileProcessingResult {
553 path: file_path.clone(),
554 path_str: normalized_path.to_string(),
555 hash,
556 content,
557 language,
558 line_count,
559 dependencies,
560 exports,
561 })
562 })
563 .collect()
564 });
565
566 for result in results.into_iter().flatten() {
568 let file_id = trigram_index.add_file(result.path.clone());
570
571 trigram_index.index_file(file_id, &result.content);
573
574 content_writer.add_file(result.path.clone(), &result.content);
576
577 files_indexed += 1;
578
579 file_metadata.push((
581 result.path_str.clone(),
582 result.hash.clone(),
583 format!("{:?}", result.language),
584 result.line_count
585 ));
586
587 if !result.dependencies.is_empty() {
589 all_dependencies.push((result.path_str.clone(), result.dependencies));
590 }
591
592 if !result.exports.is_empty() {
594 all_exports.push((result.path_str.clone(), result.exports));
595 }
596
597 new_hashes.insert(result.path_str, result.hash);
598 }
599
600 if total_files > 10000 {
602 let flush_msg = format!("Flushing batch {}/{}...", batch_idx + 1, num_batches);
603 if show_progress {
604 pb.set_message(flush_msg.clone());
605 }
606 *progress_status.lock().unwrap() = flush_msg;
607 trigram_index.flush_batch()
608 .context("Failed to flush trigram batch")?;
609 }
610 }
611
612 if let Some(thread) = progress_thread {
614 let _ = thread.join();
615 }
616
617 if show_progress {
619 let final_count = progress_counter.load(Ordering::Relaxed);
620 pb.set_position(final_count);
621 }
622
623 *progress_status.lock().unwrap() = "Finalizing trigram index...".to_string();
625 if show_progress {
626 pb.set_message("Finalizing trigram index...".to_string());
627 }
628 trigram_index.finalize();
629
630 *progress_status.lock().unwrap() = "Writing file metadata to database...".to_string();
632 if show_progress {
633 pb.set_message("Writing file metadata to database...".to_string());
634 }
635
636 if !file_metadata.is_empty() {
639 let files_without_hash: Vec<(String, String, usize)> = file_metadata
641 .iter()
642 .map(|(path, _hash, lang, lines)| (path.clone(), lang.clone(), *lines))
643 .collect();
644
645 *progress_status.lock().unwrap() = "Recording branch files...".to_string();
647 if show_progress {
648 pb.set_message("Recording branch files...".to_string());
649 }
650
651 let branch_files: Vec<(String, String)> = file_metadata
653 .iter()
654 .map(|(path, hash, _, _)| (path.clone(), hash.clone()))
655 .collect();
656
657 self.cache.batch_update_files_and_branch(
659 &files_without_hash,
660 &branch_files,
661 &branch,
662 git_state.as_ref().map(|s| s.commit.as_str()),
663 ).context("Failed to batch update files and branch hashes")?;
664
665 log::info!("Wrote metadata and hashes for {} files to database", file_metadata.len());
666 }
667
668 self.cache.update_branch_metadata(
670 &branch,
671 git_state.as_ref().map(|s| s.commit.as_str()),
672 file_metadata.len(),
673 git_state.as_ref().map(|s| s.dirty).unwrap_or(false),
674 )?;
675
676 self.cache.checkpoint_wal()
679 .context("Failed to checkpoint WAL")?;
680 log::debug!("WAL checkpoint completed - database is fully synced");
681
682 if !all_dependencies.is_empty() {
684 *progress_status.lock().unwrap() = "Extracting dependencies...".to_string();
685 if show_progress {
686 pb.set_message("Extracting dependencies...".to_string());
687 }
688
689 let go_modules = crate::parsers::go::parse_all_go_modules(root)
691 .unwrap_or_else(|e| {
692 log::warn!("Failed to parse go.mod files: {}", e);
693 Vec::new()
694 });
695 if !go_modules.is_empty() {
696 log::info!("Found {} Go modules", go_modules.len());
697 for module in &go_modules {
698 log::debug!(" {} (project: {})", module.name, module.project_root);
699 }
700 }
701
702 let java_projects = crate::parsers::java::parse_all_java_projects(root)
704 .unwrap_or_else(|e| {
705 log::warn!("Failed to parse Java project configs: {}", e);
706 Vec::new()
707 });
708 if !java_projects.is_empty() {
709 log::info!("Found {} Java projects", java_projects.len());
710 for project in &java_projects {
711 log::debug!(" {} (project: {})", project.package_name, project.project_root);
712 }
713 }
714
715 let python_packages = crate::parsers::python::parse_all_python_packages(root)
717 .unwrap_or_else(|e| {
718 log::warn!("Failed to parse Python package configs: {}", e);
719 Vec::new()
720 });
721 if !python_packages.is_empty() {
722 log::info!("Found {} Python packages", python_packages.len());
723 for package in &python_packages {
724 log::debug!(" {} (project: {})", package.name, package.project_root);
725 }
726 }
727
728 let ruby_projects = crate::parsers::ruby::parse_all_ruby_projects(root)
730 .unwrap_or_else(|e| {
731 log::warn!("Failed to parse Ruby project configs: {}", e);
732 Vec::new()
733 });
734 if !ruby_projects.is_empty() {
735 log::info!("Found {} Ruby projects", ruby_projects.len());
736 for project in &ruby_projects {
737 log::debug!(" {} (project: {})", project.gem_name, project.project_root);
738 }
739 }
740
741 let rust_crates = crate::parsers::rust::parse_all_rust_crates(root)
743 .unwrap_or_else(|e| {
744 log::warn!("Failed to parse Cargo.toml files: {}", e);
745 Vec::new()
746 });
747 if !rust_crates.is_empty() {
748 log::info!("Found {} Rust workspace crates", rust_crates.len());
749 for krate in &rust_crates {
750 log::debug!(" {} (root: {})", krate.name, krate.root_path.display());
751 }
752 }
753
754 let php_psr4_mappings = crate::parsers::php::parse_all_composer_psr4(root)
758 .unwrap_or_else(|e| {
759 log::warn!("Failed to parse composer.json files: {}", e);
760 Vec::new()
761 });
762 if !php_psr4_mappings.is_empty() {
763 log::info!("Found {} PSR-4 mappings from composer.json files", php_psr4_mappings.len());
764 for mapping in &php_psr4_mappings {
765 log::debug!(" {} => {} (project: {})", mapping.namespace_prefix, mapping.directory, mapping.project_root);
766 }
767 }
768
769 let tsconfigs = crate::parsers::tsconfig::parse_all_tsconfigs(root)
771 .unwrap_or_else(|e| {
772 log::warn!("Failed to parse tsconfig.json files: {}", e);
773 HashMap::new()
774 });
775 if !tsconfigs.is_empty() {
776 log::info!("Found {} tsconfig.json files", tsconfigs.len());
777 for (config_dir, alias_map) in &tsconfigs {
778 log::debug!(" {} (base_url: {:?}, {} aliases)",
779 config_dir.display(),
780 alias_map.base_url,
781 alias_map.aliases.len());
782 }
783 }
784
785 let cache_for_deps = CacheManager::new(root);
787 let dep_index = DependencyIndex::new(cache_for_deps);
788
789 let mut total_deps_inserted = 0;
790
791 for (file_path, import_infos) in all_dependencies {
793 let file_id = match dep_index.get_file_id_by_path(&file_path)? {
795 Some(id) => id,
796 None => {
797 log::warn!("File not found in database (skipping dependencies): {}", file_path);
798 continue;
799 }
800 };
801
802 let mut resolved_deps = Vec::new();
804
805 for mut import_info in import_infos {
806 if file_path.ends_with(".go") {
808 let mut reclassified = false;
810 for module in &go_modules {
811 import_info.import_type = crate::parsers::go::reclassify_go_import(
812 &import_info.imported_path,
813 Some(&module.name),
814 );
815 if matches!(import_info.import_type, ImportType::Internal) {
817 reclassified = true;
818 break;
819 }
820 }
821 if !reclassified {
823 import_info.import_type = crate::parsers::go::reclassify_go_import(
824 &import_info.imported_path,
825 None,
826 );
827 }
828 }
829
830 if file_path.ends_with(".java") {
832 let mut reclassified = false;
834 for project in &java_projects {
835 import_info.import_type = crate::parsers::java::reclassify_java_import(
836 &import_info.imported_path,
837 Some(&project.package_name),
838 );
839 if matches!(import_info.import_type, ImportType::Internal) {
841 reclassified = true;
842 break;
843 }
844 }
845 if !reclassified {
847 import_info.import_type = crate::parsers::java::reclassify_java_import(
848 &import_info.imported_path,
849 None,
850 );
851 }
852 }
853
854 if file_path.ends_with(".py") {
856 let mut reclassified = false;
858 for package in &python_packages {
859 import_info.import_type = crate::parsers::python::reclassify_python_import(
860 &import_info.imported_path,
861 Some(&package.name),
862 );
863 if matches!(import_info.import_type, ImportType::Internal) {
865 reclassified = true;
866 break;
867 }
868 }
869 if !reclassified {
871 import_info.import_type = crate::parsers::python::reclassify_python_import(
872 &import_info.imported_path,
873 None,
874 );
875 }
876 }
877
878 if file_path.ends_with(".rb") || file_path.ends_with(".rake") || file_path.ends_with(".gemspec") {
880 let mut reclassified = false;
882 for project in &ruby_projects {
883 let gem_names = vec![project.gem_name.clone()];
884 import_info.import_type = crate::parsers::ruby::reclassify_ruby_import(
885 &import_info.imported_path,
886 &gem_names,
887 );
888 if matches!(import_info.import_type, ImportType::Internal) {
890 reclassified = true;
891 break;
892 }
893 }
894 if !reclassified {
896 import_info.import_type = crate::parsers::ruby::reclassify_ruby_import(
897 &import_info.imported_path,
898 &[],
899 );
900 }
901 }
902
903 if file_path.ends_with(".kt") || file_path.ends_with(".kts") {
905 let mut reclassified = false;
907 for project in &java_projects {
908 import_info.import_type = crate::parsers::kotlin::reclassify_kotlin_import(
909 &import_info.imported_path,
910 Some(&project.package_name),
911 );
912 if matches!(import_info.import_type, ImportType::Internal) {
914 reclassified = true;
915 break;
916 }
917 }
918 if !reclassified {
920 import_info.import_type = crate::parsers::kotlin::reclassify_kotlin_import(
921 &import_info.imported_path,
922 None,
923 );
924 }
925 }
926
927 if file_path.ends_with(".rs") && !rust_crates.is_empty() {
929 let new_type = crate::parsers::rust::reclassify_rust_import(
930 &import_info.imported_path,
931 &rust_crates,
932 );
933 if matches!(new_type, ImportType::Internal) {
934 import_info.import_type = new_type;
935 }
936 }
937
938 if !matches!(import_info.import_type, ImportType::Internal) {
940 continue;
941 }
942
943 let resolved_file_id = if file_path.ends_with(".php") && !php_psr4_mappings.is_empty() {
945 if let Some(resolved_path) = crate::parsers::php::resolve_php_namespace_to_path(
947 &import_info.imported_path,
948 &php_psr4_mappings,
949 ) {
950 match dep_index.get_file_id_by_path(&resolved_path) {
952 Ok(Some(id)) => {
953 log::trace!("Resolved PHP dependency: {} -> {} (file_id={})",
954 import_info.imported_path, resolved_path, id);
955 Some(id)
956 }
957 Ok(None) => {
958 log::trace!("PHP dependency resolved to path but file not in index: {} -> {}",
959 import_info.imported_path, resolved_path);
960 None
961 }
962 Err(e) => {
963 log::debug!("Skipping PHP dependency resolution for '{}': {}", resolved_path, e);
964 None
965 }
966 }
967 } else {
968 log::trace!("Could not resolve PHP namespace using PSR-4: {}",
969 import_info.imported_path);
970 None
971 }
972 } else if file_path.ends_with(".py") && !python_packages.is_empty() {
973 if let Some(resolved_path) = crate::parsers::python::resolve_python_import_to_path(
975 &import_info.imported_path,
976 &python_packages,
977 Some(&file_path),
978 ) {
979 match dep_index.get_file_id_by_path(&resolved_path) {
981 Ok(Some(id)) => {
982 log::trace!("Resolved Python dependency: {} -> {} (file_id={})",
983 import_info.imported_path, resolved_path, id);
984 Some(id)
985 }
986 Ok(None) => {
987 log::trace!("Python dependency resolved to path but file not in index: {} -> {}",
988 import_info.imported_path, resolved_path);
989 None
990 }
991 Err(e) => {
992 log::debug!("Skipping Python dependency resolution for '{}': {}", resolved_path, e);
993 None
994 }
995 }
996 } else {
997 log::trace!("Could not resolve Python import: {}", import_info.imported_path);
998 None
999 }
1000 } else if file_path.ends_with(".go") && !go_modules.is_empty() {
1001 if let Some(resolved_path) = crate::parsers::go::resolve_go_import_to_path(
1003 &import_info.imported_path,
1004 &go_modules,
1005 Some(&file_path),
1006 ) {
1007 match dep_index.get_file_id_by_path(&resolved_path) {
1009 Ok(Some(id)) => {
1010 log::trace!("Resolved Go dependency: {} -> {} (file_id={})",
1011 import_info.imported_path, resolved_path, id);
1012 Some(id)
1013 }
1014 Ok(None) => {
1015 log::trace!("Go dependency resolved to path but file not in index: {} -> {}",
1016 import_info.imported_path, resolved_path);
1017 None
1018 }
1019 Err(e) => {
1020 log::debug!("Skipping Go dependency resolution for '{}': {}", resolved_path, e);
1021 None
1022 }
1023 }
1024 } else {
1025 log::trace!("Could not resolve Go import: {}", import_info.imported_path);
1026 None
1027 }
1028 } else if file_path.ends_with(".ts") || file_path.ends_with(".tsx")
1029 || file_path.ends_with(".js") || file_path.ends_with(".jsx")
1030 || file_path.ends_with(".mts") || file_path.ends_with(".cts")
1031 || file_path.ends_with(".mjs") || file_path.ends_with(".cjs") {
1032 let alias_map = find_nearest_tsconfig(&file_path, root, &tsconfigs);
1034 if let Some(candidates_str) = crate::parsers::typescript::resolve_ts_import_to_path(
1035 &import_info.imported_path,
1036 Some(&file_path),
1037 alias_map,
1038 ) {
1039 let candidates: Vec<&str> = candidates_str.split('|').collect();
1041
1042 let mut resolved_id = None;
1044 for candidate_path in candidates {
1045 let normalized_candidate = if let Ok(rel_path) = std::path::Path::new(candidate_path).strip_prefix(root) {
1048 rel_path.to_string_lossy().to_string()
1049 } else {
1050 candidate_path.to_string()
1052 };
1053
1054 log::debug!("Looking up TS/JS candidate: '{}' (from '{}')", normalized_candidate, candidate_path);
1055 match dep_index.get_file_id_by_path(&normalized_candidate) {
1056 Ok(Some(id)) => {
1057 log::debug!("Resolved TS/JS dependency: {} -> {} (file_id={})",
1058 import_info.imported_path, normalized_candidate, id);
1059 resolved_id = Some(id);
1060 break; }
1062 Ok(None) => {
1063 log::trace!("TS/JS candidate not in index: {}", candidate_path);
1064 }
1065 Err(e) => {
1066 log::debug!("Skipping TS/JS dependency resolution for '{}': {}", normalized_candidate, e);
1067 }
1068 }
1069 }
1070
1071 if resolved_id.is_none() {
1072 log::trace!("TS/JS dependency: no matching file found in database for any candidate: {}",
1073 candidates_str);
1074 }
1075
1076 resolved_id
1077 } else {
1078 log::trace!("Could not resolve TS/JS import (non-relative or external): {}", import_info.imported_path);
1079 None
1080 }
1081 } else if file_path.ends_with(".rs") {
1082 let resolved_path_opt = crate::parsers::rust::resolve_rust_use_to_path(
1085 &import_info.imported_path,
1086 Some(&file_path),
1087 Some(root.to_str().unwrap_or("")),
1088 ).or_else(|| {
1089 crate::parsers::rust::resolve_rust_workspace_path(
1090 &import_info.imported_path,
1091 &rust_crates,
1092 )
1093 });
1094
1095 if let Some(resolved_path) = resolved_path_opt {
1096 match dep_index.get_file_id_by_path(&resolved_path) {
1098 Ok(Some(id)) => {
1099 log::trace!("Resolved Rust dependency: {} -> {} (file_id={})",
1100 import_info.imported_path, resolved_path, id);
1101 Some(id)
1102 }
1103 Ok(None) => {
1104 log::trace!("Rust dependency resolved to path but file not in index: {} -> {}",
1105 import_info.imported_path, resolved_path);
1106 None
1107 }
1108 Err(e) => {
1109 log::debug!("Skipping Rust dependency resolution for '{}': {}", resolved_path, e);
1110 None
1111 }
1112 }
1113 } else {
1114 log::trace!("Could not resolve Rust import (external or stdlib): {}", import_info.imported_path);
1115 None
1116 }
1117 } else if file_path.ends_with(".java") && !java_projects.is_empty() {
1118 if let Some(resolved_path) = crate::parsers::java::resolve_java_import_to_path(
1120 &import_info.imported_path,
1121 &java_projects,
1122 Some(&file_path),
1123 ) {
1124 match dep_index.get_file_id_by_path(&resolved_path) {
1126 Ok(Some(id)) => {
1127 log::trace!("Resolved Java dependency: {} -> {} (file_id={})",
1128 import_info.imported_path, resolved_path, id);
1129 Some(id)
1130 }
1131 Ok(None) => {
1132 log::trace!("Java dependency resolved to path but file not in index: {} -> {}",
1133 import_info.imported_path, resolved_path);
1134 None
1135 }
1136 Err(e) => {
1137 log::debug!("Skipping Java dependency resolution for '{}': {}", resolved_path, e);
1138 None
1139 }
1140 }
1141 } else {
1142 log::trace!("Could not resolve Java import: {}", import_info.imported_path);
1143 None
1144 }
1145 } else if (file_path.ends_with(".kt") || file_path.ends_with(".kts")) && !java_projects.is_empty() {
1146 if let Some(resolved_path) = crate::parsers::java::resolve_kotlin_import_to_path(
1148 &import_info.imported_path,
1149 &java_projects,
1150 Some(&file_path),
1151 ) {
1152 match dep_index.get_file_id_by_path(&resolved_path) {
1154 Ok(Some(id)) => {
1155 log::trace!("Resolved Kotlin dependency: {} -> {} (file_id={})",
1156 import_info.imported_path, resolved_path, id);
1157 Some(id)
1158 }
1159 Ok(None) => {
1160 log::trace!("Kotlin dependency resolved to path but file not in index: {} -> {}",
1161 import_info.imported_path, resolved_path);
1162 None
1163 }
1164 Err(e) => {
1165 log::debug!("Skipping Kotlin dependency resolution for '{}': {}", resolved_path, e);
1166 None
1167 }
1168 }
1169 } else {
1170 log::trace!("Could not resolve Kotlin import: {}", import_info.imported_path);
1171 None
1172 }
1173 } else if (file_path.ends_with(".rb") || file_path.ends_with(".rake") || file_path.ends_with(".gemspec")) && !ruby_projects.is_empty() {
1174 if let Some(resolved_path) = crate::parsers::ruby::resolve_ruby_require_to_path(
1176 &import_info.imported_path,
1177 &ruby_projects,
1178 Some(&file_path),
1179 ) {
1180 match dep_index.get_file_id_by_path(&resolved_path) {
1182 Ok(Some(id)) => {
1183 log::trace!("Resolved Ruby dependency: {} -> {} (file_id={})",
1184 import_info.imported_path, resolved_path, id);
1185 Some(id)
1186 }
1187 Ok(None) => {
1188 log::trace!("Ruby dependency resolved to path but file not in index: {} -> {}",
1189 import_info.imported_path, resolved_path);
1190 None
1191 }
1192 Err(e) => {
1193 log::debug!("Skipping Ruby dependency resolution for '{}': {}", resolved_path, e);
1194 None
1195 }
1196 }
1197 } else {
1198 log::trace!("Could not resolve Ruby require: {}", import_info.imported_path);
1199 None
1200 }
1201 } else if file_path.ends_with(".c") || file_path.ends_with(".h") {
1202 if let Some(resolved_path) = crate::parsers::c::resolve_c_include_to_path(
1204 &import_info.imported_path,
1205 Some(&file_path),
1206 ) {
1207 match dep_index.get_file_id_by_path(&resolved_path) {
1209 Ok(Some(id)) => {
1210 log::trace!("Resolved C dependency: {} -> {} (file_id={})",
1211 import_info.imported_path, resolved_path, id);
1212 Some(id)
1213 }
1214 Ok(None) => {
1215 log::trace!("C dependency resolved to path but file not in index: {} -> {}",
1216 import_info.imported_path, resolved_path);
1217 None
1218 }
1219 Err(e) => {
1220 log::debug!("Skipping C dependency resolution for '{}': {}", resolved_path, e);
1221 None
1222 }
1223 }
1224 } else {
1225 log::trace!("Could not resolve C include (system header): {}", import_info.imported_path);
1226 None
1227 }
1228 } else if file_path.ends_with(".cpp") || file_path.ends_with(".cc") || file_path.ends_with(".cxx")
1229 || file_path.ends_with(".hpp") || file_path.ends_with(".hxx") || file_path.ends_with(".h++")
1230 || file_path.ends_with(".C") || file_path.ends_with(".H") {
1231 if let Some(resolved_path) = crate::parsers::cpp::resolve_cpp_include_to_path(
1233 &import_info.imported_path,
1234 Some(&file_path),
1235 ) {
1236 match dep_index.get_file_id_by_path(&resolved_path) {
1238 Ok(Some(id)) => {
1239 log::trace!("Resolved C++ dependency: {} -> {} (file_id={})",
1240 import_info.imported_path, resolved_path, id);
1241 Some(id)
1242 }
1243 Ok(None) => {
1244 log::trace!("C++ dependency resolved to path but file not in index: {} -> {}",
1245 import_info.imported_path, resolved_path);
1246 None
1247 }
1248 Err(e) => {
1249 log::debug!("Skipping C++ dependency resolution for '{}': {}", resolved_path, e);
1250 None
1251 }
1252 }
1253 } else {
1254 log::trace!("Could not resolve C++ include (system header): {}", import_info.imported_path);
1255 None
1256 }
1257 } else if file_path.ends_with(".cs") {
1258 if let Some(resolved_path) = crate::parsers::csharp::resolve_csharp_using_to_path(
1260 &import_info.imported_path,
1261 Some(&file_path),
1262 ) {
1263 match dep_index.get_file_id_by_path(&resolved_path) {
1265 Ok(Some(id)) => {
1266 log::trace!("Resolved C# dependency: {} -> {} (file_id={})",
1267 import_info.imported_path, resolved_path, id);
1268 Some(id)
1269 }
1270 Ok(None) => {
1271 log::trace!("C# dependency resolved to path but file not in index: {} -> {}",
1272 import_info.imported_path, resolved_path);
1273 None
1274 }
1275 Err(e) => {
1276 log::debug!("Skipping C# dependency resolution for '{}': {}", resolved_path, e);
1277 None
1278 }
1279 }
1280 } else {
1281 log::trace!("Could not resolve C# using directive: {}", import_info.imported_path);
1282 None
1283 }
1284 } else if file_path.ends_with(".zig") {
1285 if let Some(resolved_path) = crate::parsers::zig::resolve_zig_import_to_path(
1287 &import_info.imported_path,
1288 Some(&file_path),
1289 ) {
1290 match dep_index.get_file_id_by_path(&resolved_path) {
1292 Ok(Some(id)) => {
1293 log::trace!("Resolved Zig dependency: {} -> {} (file_id={})",
1294 import_info.imported_path, resolved_path, id);
1295 Some(id)
1296 }
1297 Ok(None) => {
1298 log::trace!("Zig dependency resolved to path but file not in index: {} -> {}",
1299 import_info.imported_path, resolved_path);
1300 None
1301 }
1302 Err(e) => {
1303 log::debug!("Skipping Zig dependency resolution for '{}': {}", resolved_path, e);
1304 None
1305 }
1306 }
1307 } else {
1308 log::trace!("Could not resolve Zig import (external or stdlib): {}", import_info.imported_path);
1309 None
1310 }
1311 } else if file_path.ends_with(".vue") || file_path.ends_with(".svelte") {
1312 let alias_map = find_nearest_tsconfig(&file_path, root, &tsconfigs);
1314 if let Some(candidates_str) = crate::parsers::typescript::resolve_ts_import_to_path(
1315 &import_info.imported_path,
1316 Some(&file_path),
1317 alias_map,
1318 ) {
1319 let candidates: Vec<&str> = candidates_str.split('|').collect();
1321
1322 let mut resolved_id = None;
1324 for candidate_path in candidates {
1325 let normalized_candidate = if let Ok(rel_path) = std::path::Path::new(candidate_path).strip_prefix(root) {
1328 rel_path.to_string_lossy().to_string()
1329 } else {
1330 candidate_path.to_string()
1332 };
1333
1334 match dep_index.get_file_id_by_path(&normalized_candidate) {
1335 Ok(Some(id)) => {
1336 log::trace!("Resolved Vue/Svelte dependency: {} -> {} (file_id={})",
1337 import_info.imported_path, candidate_path, id);
1338 resolved_id = Some(id);
1339 break; }
1341 Ok(None) => {
1342 log::trace!("Vue/Svelte candidate not in index: {}", candidate_path);
1343 }
1344 Err(e) => {
1345 log::debug!("Skipping Vue/Svelte dependency resolution for '{}': {}", normalized_candidate, e);
1346 }
1347 }
1348 }
1349
1350 if resolved_id.is_none() {
1351 log::trace!("Vue/Svelte dependency: no matching file found in database for any candidate: {}",
1352 candidates_str);
1353 }
1354
1355 resolved_id
1356 } else {
1357 log::trace!("Could not resolve Vue/Svelte import (non-relative or external): {}", import_info.imported_path);
1358 None
1359 }
1360 } else {
1361 None
1362 };
1363
1364 resolved_deps.push(Dependency {
1367 file_id,
1368 imported_path: import_info.imported_path.clone(),
1369 resolved_file_id,
1370 import_type: import_info.import_type,
1371 line_number: import_info.line_number,
1372 imported_symbols: import_info.imported_symbols.clone(),
1373 });
1374 }
1375
1376 dep_index.clear_dependencies(file_id)?;
1378
1379 if !resolved_deps.is_empty() {
1381 dep_index.batch_insert_dependencies(&resolved_deps)?;
1382 total_deps_inserted += resolved_deps.len();
1383 }
1384 }
1385
1386 log::info!("Extracted {} dependencies", total_deps_inserted);
1387 }
1388
1389 if !all_exports.is_empty() {
1391 *progress_status.lock().unwrap() = "Extracting exports...".to_string();
1392 if show_progress {
1393 pb.set_message("Extracting exports...".to_string());
1394 }
1395
1396 let tsconfigs = crate::parsers::tsconfig::parse_all_tsconfigs(root)
1398 .unwrap_or_else(|e| {
1399 log::warn!("Failed to parse tsconfig.json files: {}", e);
1400 HashMap::new()
1401 });
1402
1403 let cache_for_exports = CacheManager::new(root);
1405 let dep_index = DependencyIndex::new(cache_for_exports);
1406
1407 let mut total_exports_inserted = 0;
1408
1409 for (file_path, export_infos) in all_exports {
1411 let file_id = match dep_index.get_file_id_by_path(&file_path)? {
1413 Some(id) => id,
1414 None => {
1415 log::warn!("File not found in database (skipping exports): {}", file_path);
1416 continue;
1417 }
1418 };
1419
1420 for export_info in export_infos {
1422 let resolved_source_id = if file_path.ends_with(".ts") || file_path.ends_with(".tsx")
1424 || file_path.ends_with(".js") || file_path.ends_with(".jsx")
1425 || file_path.ends_with(".mts") || file_path.ends_with(".cts")
1426 || file_path.ends_with(".mjs") || file_path.ends_with(".cjs")
1427 || file_path.ends_with(".vue") {
1428 let alias_map = find_nearest_tsconfig(&file_path, root, &tsconfigs);
1430 if let Some(candidates_str) = crate::parsers::typescript::resolve_ts_import_to_path(
1431 &export_info.source_path,
1432 Some(&file_path),
1433 alias_map,
1434 ) {
1435 let candidates: Vec<&str> = candidates_str.split('|').collect();
1437
1438 let mut resolved_id = None;
1440 for candidate_path in candidates {
1441 let normalized_candidate = if let Ok(rel_path) = std::path::Path::new(candidate_path).strip_prefix(root) {
1443 rel_path.to_string_lossy().to_string()
1444 } else {
1445 candidate_path.to_string()
1446 };
1447
1448 match dep_index.get_file_id_by_path(&normalized_candidate) {
1449 Ok(Some(id)) => {
1450 log::trace!("Resolved export source: {} -> {} (file_id={})",
1451 export_info.source_path, normalized_candidate, id);
1452 resolved_id = Some(id);
1453 break; }
1455 Ok(None) => {
1456 log::trace!("Export source candidate not in index: {}", candidate_path);
1457 }
1458 Err(e) => {
1459 log::debug!("Skipping export source resolution for '{}': {}", normalized_candidate, e);
1460 }
1461 }
1462 }
1463
1464 if resolved_id.is_none() {
1465 log::trace!("Export source: no matching file found in database for any candidate: {}",
1466 candidates_str);
1467 }
1468
1469 resolved_id
1470 } else {
1471 log::trace!("Could not resolve export source (non-relative or external): {}", export_info.source_path);
1472 None
1473 }
1474 } else {
1475 None
1476 };
1477
1478 dep_index.insert_export(
1480 file_id,
1481 export_info.exported_symbol,
1482 export_info.source_path,
1483 resolved_source_id,
1484 export_info.line_number,
1485 )?;
1486
1487 total_exports_inserted += 1;
1488 }
1489 }
1490
1491 log::info!("Extracted {} exports", total_exports_inserted);
1492 }
1493
1494 log::info!("Indexed {} files", files_indexed);
1495
1496 *progress_status.lock().unwrap() = "Writing trigram index...".to_string();
1498 if show_progress {
1499 pb.set_message("Writing trigram index...".to_string());
1500 }
1501 let trigrams_path = self.cache.path().join("trigrams.bin");
1502 log::info!("Writing trigram index with {} trigrams to trigrams.bin",
1503 trigram_index.trigram_count());
1504
1505 trigram_index.write(&trigrams_path)
1506 .context("Failed to write trigram index")?;
1507 log::info!("Wrote {} files to trigrams.bin", trigram_index.file_count());
1508
1509 *progress_status.lock().unwrap() = "Finalizing content store...".to_string();
1511 if show_progress {
1512 pb.set_message("Finalizing content store...".to_string());
1513 }
1514 content_writer.finalize_if_needed()
1515 .context("Failed to finalize content store")?;
1516 log::info!("Wrote {} files ({} bytes) to content.bin",
1517 content_writer.file_count(), content_writer.content_size());
1518
1519 *progress_status.lock().unwrap() = "Updating statistics...".to_string();
1521 if show_progress {
1522 pb.set_message("Updating statistics...".to_string());
1523 }
1524 self.cache.update_stats(&branch)?;
1526
1527 self.cache.update_schema_hash()?;
1529
1530 pb.finish_with_message("Indexing complete");
1531
1532 let stats = self.cache.stats()?;
1534 log::info!("Indexing complete: {} files",
1535 stats.total_files);
1536
1537 Ok(stats)
1538 }
1539
1540 fn discover_files(&self, root: &Path) -> Result<Vec<PathBuf>> {
1542 let mut files = Vec::new();
1543
1544 let walker = WalkBuilder::new(root)
1549 .follow_links(self.config.follow_symlinks)
1550 .git_ignore(true) .git_global(false) .git_exclude(false) .build();
1554
1555 for entry in walker {
1556 let entry = entry?;
1557 let path = entry.path();
1558
1559 if !entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) {
1561 continue;
1562 }
1563
1564 if self.should_index(path) {
1566 files.push(path.to_path_buf());
1567 }
1568 }
1569
1570 Ok(files)
1571 }
1572
1573 fn should_index(&self, path: &Path) -> bool {
1575 let ext = match path.extension() {
1577 Some(ext) => ext.to_string_lossy(),
1578 None => return false,
1579 };
1580
1581 let lang = Language::from_extension(&ext);
1582
1583 if !lang.is_supported() {
1585 if !matches!(lang, Language::Unknown) {
1586 log::debug!("Skipping {} ({:?} parser not yet implemented)",
1587 path.display(), lang);
1588 }
1589 return false;
1590 }
1591
1592 if !self.config.languages.is_empty() && !self.config.languages.contains(&lang) {
1594 log::debug!("Skipping {} ({:?} not in configured languages)", path.display(), lang);
1595 return false;
1596 }
1597
1598 if let Ok(metadata) = std::fs::metadata(path) {
1600 if metadata.len() > self.config.max_file_size as u64 {
1601 log::debug!("Skipping {} (too large: {} bytes)",
1602 path.display(), metadata.len());
1603 return false;
1604 }
1605 }
1606
1607 true
1611 }
1612
1613 fn hash_content(&self, content: &[u8]) -> String {
1615 let hash = blake3::hash(content);
1616 hash.to_hex().to_string()
1617 }
1618
1619 fn check_disk_space(&self, root: &Path) -> Result<()> {
1624 let cache_path = self.cache.path();
1626
1627 #[cfg(unix)]
1629 {
1630 let test_file = cache_path.join(".space_check");
1633 match std::fs::write(&test_file, b"test") {
1634 Ok(_) => {
1635 let _ = std::fs::remove_file(&test_file);
1636
1637 if let Ok(output) = std::process::Command::new("df")
1639 .arg("-k")
1640 .arg(cache_path.parent().unwrap_or(root))
1641 .output()
1642 {
1643 if let Ok(df_output) = String::from_utf8(output.stdout) {
1644 if let Some(line) = df_output.lines().nth(1) {
1646 let parts: Vec<&str> = line.split_whitespace().collect();
1647 if parts.len() >= 4 {
1648 if let Ok(available_kb) = parts[3].parse::<u64>() {
1649 let available_mb = available_kb / 1024;
1650
1651 if available_mb < 100 {
1653 log::warn!("Low disk space: only {}MB available. Indexing may fail.", available_mb);
1654 output::warn(&format!("Low disk space ({}MB available). Consider freeing up space.", available_mb));
1655 } else {
1656 log::debug!("Available disk space: {}MB", available_mb);
1657 }
1658 }
1659 }
1660 }
1661 }
1662 }
1663
1664 Ok(())
1665 }
1666 Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
1667 anyhow::bail!(
1668 "Permission denied writing to cache directory: {}. Check file permissions.",
1669 cache_path.display()
1670 )
1671 }
1672 Err(e) => {
1673 log::warn!("Failed to write test file (possible disk space issue): {}", e);
1675 Err(e).context("Failed to verify disk space - indexing may fail due to insufficient space")
1676 }
1677 }
1678 }
1679
1680 #[cfg(not(unix))]
1681 {
1682 let test_file = cache_path.join(".space_check");
1684 match std::fs::write(&test_file, b"test") {
1685 Ok(_) => {
1686 let _ = std::fs::remove_file(&test_file);
1687 Ok(())
1688 }
1689 Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
1690 anyhow::bail!(
1691 "Permission denied writing to cache directory: {}. Check file permissions.",
1692 cache_path.display()
1693 )
1694 }
1695 Err(e) => {
1696 log::warn!("Failed to write test file (possible disk space issue): {}", e);
1697 Err(e).context("Failed to verify disk space - indexing may fail due to insufficient space")
1698 }
1699 }
1700 }
1701 }
1702}
1703
1704#[cfg(test)]
1705mod tests {
1706 use super::*;
1707 use tempfile::TempDir;
1708 use std::fs;
1709
1710 #[test]
1711 fn test_indexer_creation() {
1712 let temp = TempDir::new().unwrap();
1713 let cache = CacheManager::new(temp.path());
1714 let config = IndexConfig::default();
1715 let indexer = Indexer::new(cache, config);
1716
1717 assert!(indexer.cache.path().ends_with(".reflex"));
1718 }
1719
1720 #[test]
1721 fn test_hash_content() {
1722 let temp = TempDir::new().unwrap();
1723 let cache = CacheManager::new(temp.path());
1724 let config = IndexConfig::default();
1725 let indexer = Indexer::new(cache, config);
1726
1727 let content1 = b"hello world";
1728 let content2 = b"hello world";
1729 let content3 = b"different content";
1730
1731 let hash1 = indexer.hash_content(content1);
1732 let hash2 = indexer.hash_content(content2);
1733 let hash3 = indexer.hash_content(content3);
1734
1735 assert_eq!(hash1, hash2);
1737
1738 assert_ne!(hash1, hash3);
1740
1741 assert_eq!(hash1.len(), 64); }
1744
1745 #[test]
1746 fn test_should_index_rust_file() {
1747 let temp = TempDir::new().unwrap();
1748 let cache = CacheManager::new(temp.path());
1749 let config = IndexConfig::default();
1750 let indexer = Indexer::new(cache, config);
1751
1752 let rust_file = temp.path().join("test.rs");
1754 fs::write(&rust_file, "fn main() {}").unwrap();
1755
1756 assert!(indexer.should_index(&rust_file));
1757 }
1758
1759 #[test]
1760 fn test_should_index_unsupported_extension() {
1761 let temp = TempDir::new().unwrap();
1762 let cache = CacheManager::new(temp.path());
1763 let config = IndexConfig::default();
1764 let indexer = Indexer::new(cache, config);
1765
1766 let unsupported_file = temp.path().join("test.txt");
1767 fs::write(&unsupported_file, "plain text").unwrap();
1768
1769 assert!(!indexer.should_index(&unsupported_file));
1770 }
1771
1772 #[test]
1773 fn test_should_index_no_extension() {
1774 let temp = TempDir::new().unwrap();
1775 let cache = CacheManager::new(temp.path());
1776 let config = IndexConfig::default();
1777 let indexer = Indexer::new(cache, config);
1778
1779 let no_ext_file = temp.path().join("Makefile");
1780 fs::write(&no_ext_file, "all:\n\techo hello").unwrap();
1781
1782 assert!(!indexer.should_index(&no_ext_file));
1783 }
1784
1785 #[test]
1786 fn test_should_index_size_limit() {
1787 let temp = TempDir::new().unwrap();
1788 let cache = CacheManager::new(temp.path());
1789
1790 let mut config = IndexConfig::default();
1792 config.max_file_size = 100;
1793
1794 let indexer = Indexer::new(cache, config);
1795
1796 let small_file = temp.path().join("small.rs");
1798 fs::write(&small_file, "fn main() {}").unwrap();
1799 assert!(indexer.should_index(&small_file));
1800
1801 let large_file = temp.path().join("large.rs");
1803 let large_content = "a".repeat(150);
1804 fs::write(&large_file, large_content).unwrap();
1805 assert!(!indexer.should_index(&large_file));
1806 }
1807
1808 #[test]
1809 fn test_discover_files_empty_dir() {
1810 let temp = TempDir::new().unwrap();
1811 let cache = CacheManager::new(temp.path());
1812 let config = IndexConfig::default();
1813 let indexer = Indexer::new(cache, config);
1814
1815 let files = indexer.discover_files(temp.path()).unwrap();
1816 assert_eq!(files.len(), 0);
1817 }
1818
1819 #[test]
1820 fn test_discover_files_single_file() {
1821 let temp = TempDir::new().unwrap();
1822 let cache = CacheManager::new(temp.path());
1823 let config = IndexConfig::default();
1824 let indexer = Indexer::new(cache, config);
1825
1826 let rust_file = temp.path().join("main.rs");
1828 fs::write(&rust_file, "fn main() {}").unwrap();
1829
1830 let files = indexer.discover_files(temp.path()).unwrap();
1831 assert_eq!(files.len(), 1);
1832 assert!(files[0].ends_with("main.rs"));
1833 }
1834
1835 #[test]
1836 fn test_discover_files_multiple_languages() {
1837 let temp = TempDir::new().unwrap();
1838 let cache = CacheManager::new(temp.path());
1839 let config = IndexConfig::default();
1840 let indexer = Indexer::new(cache, config);
1841
1842 fs::write(temp.path().join("main.rs"), "fn main() {}").unwrap();
1844 fs::write(temp.path().join("script.py"), "print('hello')").unwrap();
1845 fs::write(temp.path().join("app.js"), "console.log('hi')").unwrap();
1846 fs::write(temp.path().join("README.md"), "# Project").unwrap(); let files = indexer.discover_files(temp.path()).unwrap();
1849 assert_eq!(files.len(), 3); }
1851
1852 #[test]
1853 fn test_discover_files_subdirectories() {
1854 let temp = TempDir::new().unwrap();
1855 let cache = CacheManager::new(temp.path());
1856 let config = IndexConfig::default();
1857 let indexer = Indexer::new(cache, config);
1858
1859 let src_dir = temp.path().join("src");
1861 fs::create_dir(&src_dir).unwrap();
1862 fs::write(src_dir.join("main.rs"), "fn main() {}").unwrap();
1863 fs::write(src_dir.join("lib.rs"), "pub mod test {}").unwrap();
1864
1865 let tests_dir = temp.path().join("tests");
1866 fs::create_dir(&tests_dir).unwrap();
1867 fs::write(tests_dir.join("test.rs"), "#[test] fn test() {}").unwrap();
1868
1869 let files = indexer.discover_files(temp.path()).unwrap();
1870 assert_eq!(files.len(), 3);
1871 }
1872
1873 #[test]
1874 fn test_discover_files_respects_gitignore() {
1875 let temp = TempDir::new().unwrap();
1876
1877 std::process::Command::new("git")
1879 .arg("init")
1880 .current_dir(temp.path())
1881 .output()
1882 .expect("Failed to initialize git repo");
1883
1884 let cache = CacheManager::new(temp.path());
1885 let config = IndexConfig::default();
1886 let indexer = Indexer::new(cache, config);
1887
1888 fs::write(temp.path().join(".gitignore"), "ignored/\n").unwrap();
1891
1892 fs::write(temp.path().join("included.rs"), "fn main() {}").unwrap();
1894 fs::write(temp.path().join("also_included.py"), "print('hi')").unwrap();
1895
1896 let ignored_dir = temp.path().join("ignored");
1897 fs::create_dir(&ignored_dir).unwrap();
1898 fs::write(ignored_dir.join("excluded.rs"), "fn test() {}").unwrap();
1899
1900 let files = indexer.discover_files(temp.path()).unwrap();
1901
1902 assert!(files.iter().any(|f| f.ends_with("included.rs")), "Should find included.rs");
1904 assert!(files.iter().any(|f| f.ends_with("also_included.py")), "Should find also_included.py");
1905
1906 assert!(!files.iter().any(|f| {
1909 let path_str = f.to_string_lossy();
1910 path_str.contains("ignored") && f.ends_with("excluded.rs")
1911 }), "Should NOT find excluded.rs in ignored/ directory (gitignore pattern)");
1912
1913 assert_eq!(files.len(), 2, "Should find exactly 2 files (not including .gitignore or ignored/excluded.rs)");
1916 }
1917
1918 #[test]
1919 fn test_index_empty_directory() {
1920 let temp = TempDir::new().unwrap();
1921 let cache = CacheManager::new(temp.path());
1922 let config = IndexConfig::default();
1923 let indexer = Indexer::new(cache, config);
1924
1925 let stats = indexer.index(temp.path(), false).unwrap();
1926
1927 assert_eq!(stats.total_files, 0);
1928 }
1929
1930 #[test]
1931 fn test_index_single_rust_file() {
1932 let temp = TempDir::new().unwrap();
1933 let project_root = temp.path().join("project");
1934 fs::create_dir(&project_root).unwrap();
1935
1936 let cache = CacheManager::new(&project_root);
1937 let config = IndexConfig::default();
1938 let indexer = Indexer::new(cache, config);
1939
1940 fs::write(
1942 project_root.join("main.rs"),
1943 "fn main() { println!(\"Hello\"); }"
1944 ).unwrap();
1945
1946 let stats = indexer.index(&project_root, false).unwrap();
1947
1948 assert_eq!(stats.total_files, 1);
1949 assert!(stats.files_by_language.get("Rust").is_some());
1950 }
1951
1952 #[test]
1953 fn test_index_multiple_files() {
1954 let temp = TempDir::new().unwrap();
1955 let project_root = temp.path().join("project");
1956 fs::create_dir(&project_root).unwrap();
1957
1958 let cache = CacheManager::new(&project_root);
1959 let config = IndexConfig::default();
1960 let indexer = Indexer::new(cache, config);
1961
1962 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1964 fs::write(project_root.join("lib.rs"), "pub fn test() {}").unwrap();
1965 fs::write(project_root.join("script.py"), "def main(): pass").unwrap();
1966
1967 let stats = indexer.index(&project_root, false).unwrap();
1968
1969 assert_eq!(stats.total_files, 3);
1970 assert_eq!(stats.files_by_language.get("Rust"), Some(&2));
1971 assert_eq!(stats.files_by_language.get("Python"), Some(&1));
1972 }
1973
1974 #[test]
1975 fn test_index_creates_trigram_index() {
1976 let temp = TempDir::new().unwrap();
1977 let project_root = temp.path().join("project");
1978 fs::create_dir(&project_root).unwrap();
1979
1980 let cache = CacheManager::new(&project_root);
1981 let config = IndexConfig::default();
1982 let indexer = Indexer::new(cache, config);
1983
1984 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1985
1986 indexer.index(&project_root, false).unwrap();
1987
1988 let trigrams_path = project_root.join(".reflex/trigrams.bin");
1990 assert!(trigrams_path.exists());
1991 }
1992
1993 #[test]
1994 fn test_index_creates_content_store() {
1995 let temp = TempDir::new().unwrap();
1996 let project_root = temp.path().join("project");
1997 fs::create_dir(&project_root).unwrap();
1998
1999 let cache = CacheManager::new(&project_root);
2000 let config = IndexConfig::default();
2001 let indexer = Indexer::new(cache, config);
2002
2003 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2004
2005 indexer.index(&project_root, false).unwrap();
2006
2007 let content_path = project_root.join(".reflex/content.bin");
2009 assert!(content_path.exists());
2010 }
2011
2012 #[test]
2013 fn test_index_incremental_no_changes() {
2014 let temp = TempDir::new().unwrap();
2015 let project_root = temp.path().join("project");
2016 fs::create_dir(&project_root).unwrap();
2017
2018 let cache = CacheManager::new(&project_root);
2019 let config = IndexConfig::default();
2020 let indexer = Indexer::new(cache, config);
2021
2022 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2023
2024 let stats1 = indexer.index(&project_root, false).unwrap();
2026 assert_eq!(stats1.total_files, 1);
2027
2028 let stats2 = indexer.index(&project_root, false).unwrap();
2030 assert_eq!(stats2.total_files, 1);
2031 }
2032
2033 #[test]
2034 fn test_index_incremental_with_changes() {
2035 let temp = TempDir::new().unwrap();
2036 let project_root = temp.path().join("project");
2037 fs::create_dir(&project_root).unwrap();
2038
2039 let cache = CacheManager::new(&project_root);
2040 let config = IndexConfig::default();
2041 let indexer = Indexer::new(cache, config);
2042
2043 let main_path = project_root.join("main.rs");
2044 fs::write(&main_path, "fn main() {}").unwrap();
2045
2046 indexer.index(&project_root, false).unwrap();
2048
2049 fs::write(&main_path, "fn main() { println!(\"changed\"); }").unwrap();
2051
2052 let stats = indexer.index(&project_root, false).unwrap();
2054 assert_eq!(stats.total_files, 1);
2055 }
2056
2057 #[test]
2058 fn test_index_incremental_new_file() {
2059 let temp = TempDir::new().unwrap();
2060 let project_root = temp.path().join("project");
2061 fs::create_dir(&project_root).unwrap();
2062
2063 let cache = CacheManager::new(&project_root);
2064 let config = IndexConfig::default();
2065 let indexer = Indexer::new(cache, config);
2066
2067 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2068
2069 let stats1 = indexer.index(&project_root, false).unwrap();
2071 assert_eq!(stats1.total_files, 1);
2072
2073 fs::write(project_root.join("lib.rs"), "pub fn test() {}").unwrap();
2075
2076 let stats2 = indexer.index(&project_root, false).unwrap();
2078 assert_eq!(stats2.total_files, 2);
2079 }
2080
2081 #[test]
2082 fn test_index_parallel_threads_config() {
2083 let temp = TempDir::new().unwrap();
2084 let project_root = temp.path().join("project");
2085 fs::create_dir(&project_root).unwrap();
2086
2087 let cache = CacheManager::new(&project_root);
2088
2089 let mut config = IndexConfig::default();
2091 config.parallel_threads = 2;
2092
2093 let indexer = Indexer::new(cache, config);
2094
2095 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2096
2097 let stats = indexer.index(&project_root, false).unwrap();
2098 assert_eq!(stats.total_files, 1);
2099 }
2100
2101 #[test]
2102 fn test_index_parallel_threads_auto() {
2103 let temp = TempDir::new().unwrap();
2104 let project_root = temp.path().join("project");
2105 fs::create_dir(&project_root).unwrap();
2106
2107 let cache = CacheManager::new(&project_root);
2108
2109 let mut config = IndexConfig::default();
2111 config.parallel_threads = 0;
2112
2113 let indexer = Indexer::new(cache, config);
2114
2115 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2116
2117 let stats = indexer.index(&project_root, false).unwrap();
2118 assert_eq!(stats.total_files, 1);
2119 }
2120
2121 #[test]
2122 fn test_index_respects_size_limit() {
2123 let temp = TempDir::new().unwrap();
2124 let project_root = temp.path().join("project");
2125 fs::create_dir(&project_root).unwrap();
2126
2127 let cache = CacheManager::new(&project_root);
2128
2129 let mut config = IndexConfig::default();
2131 config.max_file_size = 50;
2132
2133 let indexer = Indexer::new(cache, config);
2134
2135 fs::write(project_root.join("small.rs"), "fn a() {}").unwrap();
2137
2138 let large_content = "fn main() {}\n".repeat(10);
2140 fs::write(project_root.join("large.rs"), large_content).unwrap();
2141
2142 let stats = indexer.index(&project_root, false).unwrap();
2143
2144 assert_eq!(stats.total_files, 1);
2146 }
2147
2148 #[test]
2149 fn test_index_mixed_languages() {
2150 let temp = TempDir::new().unwrap();
2151 let project_root = temp.path().join("project");
2152 fs::create_dir(&project_root).unwrap();
2153
2154 let cache = CacheManager::new(&project_root);
2155 let config = IndexConfig::default();
2156 let indexer = Indexer::new(cache, config);
2157
2158 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2160 fs::write(project_root.join("test.py"), "def test(): pass").unwrap();
2161 fs::write(project_root.join("app.js"), "function main() {}").unwrap();
2162 fs::write(project_root.join("lib.go"), "func main() {}").unwrap();
2163
2164 let stats = indexer.index(&project_root, false).unwrap();
2165
2166 assert_eq!(stats.total_files, 4);
2167 assert!(stats.files_by_language.contains_key("Rust"));
2168 assert!(stats.files_by_language.contains_key("Python"));
2169 assert!(stats.files_by_language.contains_key("JavaScript"));
2170 assert!(stats.files_by_language.contains_key("Go"));
2171 }
2172
2173 #[test]
2174 fn test_index_updates_cache_stats() {
2175 let temp = TempDir::new().unwrap();
2176 let project_root = temp.path().join("project");
2177 fs::create_dir(&project_root).unwrap();
2178
2179 let cache = CacheManager::new(&project_root);
2180 let config = IndexConfig::default();
2181 let indexer = Indexer::new(cache, config);
2182
2183 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2184
2185 indexer.index(&project_root, false).unwrap();
2186
2187 let cache = CacheManager::new(&project_root);
2189 let stats = cache.stats().unwrap();
2190
2191 assert_eq!(stats.total_files, 1);
2192 assert!(stats.index_size_bytes > 0);
2193 }
2194}