1use anyhow::{Context, Result};
7use ignore::WalkBuilder;
8use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
9use rayon::prelude::*;
10use std::collections::HashMap;
11use std::path::{Path, PathBuf};
12use std::sync::atomic::{AtomicU64, Ordering};
13use std::sync::{Arc, Mutex};
14use std::time::Instant;
15
16use crate::cache::CacheManager;
17use crate::content_store::{ContentReader, ContentWriter};
18use crate::dependency::DependencyIndex;
19use crate::models::{Dependency, IndexConfig, IndexStats, Language, ImportType};
20use crate::output;
21use crate::parsers::{DependencyExtractor, ImportInfo, ExportInfo};
22use crate::parsers::rust::RustDependencyExtractor;
23use crate::parsers::python::PythonDependencyExtractor;
24use crate::parsers::typescript::TypeScriptDependencyExtractor;
25use crate::parsers::go::GoDependencyExtractor;
26use crate::parsers::java::JavaDependencyExtractor;
27use crate::parsers::c::CDependencyExtractor;
28use crate::parsers::cpp::CppDependencyExtractor;
29use crate::parsers::csharp::CSharpDependencyExtractor;
30use crate::parsers::php::PhpDependencyExtractor;
31use crate::parsers::ruby::RubyDependencyExtractor;
32use crate::parsers::kotlin::KotlinDependencyExtractor;
33use crate::parsers::zig::ZigDependencyExtractor;
34use crate::parsers::vue::VueDependencyExtractor;
35use crate::parsers::svelte::SvelteDependencyExtractor;
36use crate::trigram::TrigramIndex;
37
38pub type ProgressCallback = Arc<dyn Fn(usize, usize, String) + Send + Sync>;
41
42struct FileProcessingResult {
44 path: PathBuf,
45 path_str: String,
46 hash: String,
47 content: String,
48 language: Language,
49 line_count: usize,
50 dependencies: Vec<ImportInfo>,
51 exports: Vec<ExportInfo>,
52}
53
54fn find_nearest_tsconfig<'a>(
59 file_path: &str,
60 root: &Path,
61 tsconfigs: &'a HashMap<PathBuf, crate::parsers::tsconfig::PathAliasMap>,
62) -> Option<&'a crate::parsers::tsconfig::PathAliasMap> {
63 let abs_file_path = if Path::new(file_path).is_absolute() {
65 PathBuf::from(file_path)
66 } else {
67 root.join(file_path)
68 };
69
70 let mut current_dir = abs_file_path.parent()?;
72
73 loop {
74 if let Some(alias_map) = tsconfigs.get(current_dir) {
76 return Some(alias_map);
77 }
78
79 current_dir = current_dir.parent()?;
81
82 if current_dir == root || !current_dir.starts_with(root) {
84 break;
85 }
86 }
87
88 None
89}
90
91pub struct Indexer {
93 cache: CacheManager,
94 config: IndexConfig,
95}
96
97impl Indexer {
98 pub fn new(cache: CacheManager, config: IndexConfig) -> Self {
100 Self { cache, config }
101 }
102
103 pub fn index(&self, root: impl AsRef<Path>, show_progress: bool) -> Result<IndexStats> {
105 self.index_with_callback(root, show_progress, None)
106 }
107
108 pub fn index_with_callback(
110 &self,
111 root: impl AsRef<Path>,
112 show_progress: bool,
113 progress_callback: Option<ProgressCallback>,
114 ) -> Result<IndexStats> {
115 let root = root.as_ref();
116 log::info!("Indexing directory: {:?}", root);
117
118 let git_state = crate::git::get_git_state_optional(root)?;
120 let branch = git_state
121 .as_ref()
122 .map(|s| s.branch.clone())
123 .unwrap_or_else(|| "_default".to_string());
124
125 if let Some(ref state) = git_state {
126 log::info!(
127 "Git state: branch='{}', commit='{}', dirty={}",
128 state.branch,
129 state.commit,
130 state.dirty
131 );
132 } else {
133 log::info!("Not a git repository, using default branch");
134 }
135
136 let num_threads = if self.config.parallel_threads == 0 {
139 let available_cores = std::thread::available_parallelism()
140 .map(|n| n.get())
141 .unwrap_or(4);
142 ((available_cores as f64 * 0.8).ceil() as usize).max(1).min(8)
145 } else {
146 self.config.parallel_threads
147 };
148
149 log::info!("Using {} threads for parallel indexing (out of {} available)",
150 num_threads,
151 std::thread::available_parallelism().map(|n| n.get()).unwrap_or(4));
152
153 self.cache.init()?;
155
156 self.check_disk_space(root)?;
158
159 let existing_hashes = self.cache.load_hashes_for_branch(&branch)?;
161 log::debug!("Loaded {} existing file hashes for branch '{}'", existing_hashes.len(), branch);
162
163 let files = self.discover_files(root)?;
165 let total_files = files.len();
166 log::info!("Discovered {} files to index", total_files);
167
168 let tsconfigs = crate::parsers::tsconfig::parse_all_tsconfigs(root)
171 .unwrap_or_else(|e| {
172 log::warn!("Failed to parse tsconfig.json files: {}", e);
173 HashMap::new()
174 });
175 if !tsconfigs.is_empty() {
176 log::info!("Found {} tsconfig.json files", tsconfigs.len());
177 for (config_dir, alias_map) in &tsconfigs {
178 log::debug!(" {} (base_url: {:?}, {} aliases)",
179 config_dir.display(),
180 alias_map.base_url,
181 alias_map.aliases.len());
182 }
183 }
184
185 if !existing_hashes.is_empty() && total_files == existing_hashes.len() {
188 let mut any_changed = false;
190
191 for file_path in &files {
192 let path_str = file_path.to_string_lossy().to_string();
194 let normalized_path = if let Ok(rel_path) = file_path.strip_prefix(root) {
195 rel_path.to_string_lossy().to_string()
197 } else {
198 path_str.trim_start_matches("./").to_string()
200 };
201
202 if let Some(existing_hash) = existing_hashes.get(&normalized_path) {
204 match std::fs::read_to_string(file_path) {
206 Ok(content) => {
207 let current_hash = self.hash_content(content.as_bytes());
208 if ¤t_hash != existing_hash {
209 any_changed = true;
210 log::debug!("File changed: {}", path_str);
211 break; }
213 }
214 Err(_) => {
215 any_changed = true;
216 break;
217 }
218 }
219 } else {
220 any_changed = true;
222 break;
223 }
224 }
225
226 if !any_changed {
227 let content_path = self.cache.path().join("content.bin");
228 let trigrams_path = self.cache.path().join("trigrams.bin");
229
230 let schema_ok = self.cache.check_schema_hash().unwrap_or(false);
233
234 if schema_ok && content_path.exists() && trigrams_path.exists() {
235 if let Ok(reader) = ContentReader::open(&content_path) {
236 if reader.file_count() > 0 {
237 log::info!("No files changed - skipping index rebuild");
238 return Ok(self.cache.stats()?);
239 }
240 }
241 log::warn!("content.bin invalid despite hashes matching - forcing rebuild");
242 } else if !schema_ok {
243 log::info!("Schema hash changed - forcing full rebuild");
244 } else {
245 log::warn!("Binary index files missing - forcing rebuild");
246 }
247 }
248 } else if total_files != existing_hashes.len() {
249 log::info!("File count changed ({} -> {}) - full reindex required",
250 existing_hashes.len(), total_files);
251 }
252
253 let mut new_hashes = HashMap::new();
255 let mut files_indexed = 0;
256 let mut file_metadata: Vec<(String, String, String, usize)> = Vec::new(); let mut all_dependencies: Vec<(String, Vec<ImportInfo>)> = Vec::new(); let mut all_exports: Vec<(String, Vec<ExportInfo>)> = Vec::new(); let mut trigram_index = TrigramIndex::new();
262 let mut content_writer = ContentWriter::new();
263
264 if total_files > 10000 {
266 let temp_dir = self.cache.path().join("trigram_temp");
267 trigram_index.enable_batch_flush(temp_dir)
268 .context("Failed to enable batch-flush mode for trigram index")?;
269 log::info!("Enabled batch-flush mode for {} files", total_files);
270 }
271
272 let content_path = self.cache.path().join("content.bin");
274 content_writer.init(content_path.clone())
275 .context("Failed to initialize content writer")?;
276
277 let pb = if show_progress {
279 let pb = ProgressBar::new(total_files as u64);
280 pb.set_draw_target(ProgressDrawTarget::stderr());
281 pb.set_style(
282 ProgressStyle::default_bar()
283 .template("[{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} files ({percent}%) {msg}")
284 .unwrap()
285 .progress_chars("=>-")
286 );
287 pb.enable_steady_tick(std::time::Duration::from_millis(100));
289 pb
290 } else {
291 ProgressBar::hidden()
292 };
293
294 let progress_counter = Arc::new(AtomicU64::new(0));
296 let progress_status = Arc::new(Mutex::new("Indexing files...".to_string()));
298
299 let _start_time = Instant::now();
300
301 let counter_for_thread = Arc::clone(&progress_counter);
303 let status_for_thread = Arc::clone(&progress_status);
304 let pb_clone = pb.clone();
305 let callback_for_thread = progress_callback.clone();
306 let total_files_for_thread = total_files;
307 let progress_thread = if show_progress || callback_for_thread.is_some() {
308 Some(std::thread::spawn(move || {
309 loop {
310 let count = counter_for_thread.load(Ordering::Relaxed);
311 pb_clone.set_position(count);
312
313 if let Some(ref callback) = callback_for_thread {
315 let status = status_for_thread.lock().unwrap().clone();
316 callback(count as usize, total_files_for_thread, status);
317 }
318
319 if count >= total_files_for_thread as u64 {
320 break;
321 }
322 std::thread::sleep(std::time::Duration::from_millis(50));
323 }
324 }))
325 } else {
326 None
327 };
328
329 let pool = rayon::ThreadPoolBuilder::new()
331 .num_threads(num_threads)
332 .build()
333 .context("Failed to create thread pool")?;
334
335 const BATCH_SIZE: usize = 5000;
338 let num_batches = total_files.div_ceil(BATCH_SIZE);
339 log::info!("Processing {} files in {} batches of up to {} files",
340 total_files, num_batches, BATCH_SIZE);
341
342 for (batch_idx, batch_files) in files.chunks(BATCH_SIZE).enumerate() {
343 log::info!("Processing batch {}/{} ({} files)",
344 batch_idx + 1, num_batches, batch_files.len());
345
346 let counter_clone = Arc::clone(&progress_counter);
348 let results: Vec<Option<FileProcessingResult>> = pool.install(|| {
349 batch_files
350 .par_iter()
351 .map(|file_path| {
352 let path_str = file_path.to_string_lossy().to_string();
354 let normalized_path = if let Ok(rel_path) = file_path.strip_prefix(root) {
355 rel_path.to_string_lossy().to_string()
357 } else {
358 path_str.trim_start_matches("./").to_string()
360 };
361
362 let content = match std::fs::read_to_string(&file_path) {
364 Ok(c) => c,
365 Err(e) => {
366 log::warn!("Failed to read {}: {}", path_str, e);
367 counter_clone.fetch_add(1, Ordering::Relaxed);
369 return None;
370 }
371 };
372
373 let hash = self.hash_content(content.as_bytes());
375
376 let ext = file_path.extension()
378 .and_then(|e| e.to_str())
379 .unwrap_or("");
380 let language = Language::from_extension(ext);
381
382 let line_count = content.lines().count();
384
385 let dependencies = match language {
387 Language::Rust => {
388 match RustDependencyExtractor::extract_dependencies(&content) {
389 Ok(deps) => deps,
390 Err(e) => {
391 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
392 Vec::new()
393 }
394 }
395 }
396 Language::Python => {
397 match PythonDependencyExtractor::extract_dependencies(&content) {
398 Ok(deps) => deps,
399 Err(e) => {
400 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
401 Vec::new()
402 }
403 }
404 }
405 Language::TypeScript | Language::JavaScript => {
406 let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
408 match TypeScriptDependencyExtractor::extract_dependencies_with_alias_map(&content, alias_map) {
409 Ok(deps) => deps,
410 Err(e) => {
411 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
412 Vec::new()
413 }
414 }
415 }
416 Language::Go => {
417 match GoDependencyExtractor::extract_dependencies(&content) {
418 Ok(deps) => deps,
419 Err(e) => {
420 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
421 Vec::new()
422 }
423 }
424 }
425 Language::Java => {
426 match JavaDependencyExtractor::extract_dependencies(&content) {
427 Ok(deps) => deps,
428 Err(e) => {
429 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
430 Vec::new()
431 }
432 }
433 }
434 Language::C => {
435 match CDependencyExtractor::extract_dependencies(&content) {
436 Ok(deps) => deps,
437 Err(e) => {
438 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
439 Vec::new()
440 }
441 }
442 }
443 Language::Cpp => {
444 match CppDependencyExtractor::extract_dependencies(&content) {
445 Ok(deps) => deps,
446 Err(e) => {
447 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
448 Vec::new()
449 }
450 }
451 }
452 Language::CSharp => {
453 match CSharpDependencyExtractor::extract_dependencies(&content) {
454 Ok(deps) => deps,
455 Err(e) => {
456 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
457 Vec::new()
458 }
459 }
460 }
461 Language::PHP => {
462 match PhpDependencyExtractor::extract_dependencies(&content) {
463 Ok(deps) => deps,
464 Err(e) => {
465 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
466 Vec::new()
467 }
468 }
469 }
470 Language::Ruby => {
471 match RubyDependencyExtractor::extract_dependencies(&content) {
472 Ok(deps) => deps,
473 Err(e) => {
474 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
475 Vec::new()
476 }
477 }
478 }
479 Language::Kotlin => {
480 match KotlinDependencyExtractor::extract_dependencies(&content) {
481 Ok(deps) => deps,
482 Err(e) => {
483 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
484 Vec::new()
485 }
486 }
487 }
488 Language::Zig => {
489 match ZigDependencyExtractor::extract_dependencies(&content) {
490 Ok(deps) => deps,
491 Err(e) => {
492 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
493 Vec::new()
494 }
495 }
496 }
497 Language::Vue => {
498 let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
500 match VueDependencyExtractor::extract_dependencies_with_alias_map(&content, alias_map) {
501 Ok(deps) => deps,
502 Err(e) => {
503 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
504 Vec::new()
505 }
506 }
507 }
508 Language::Svelte => {
509 match SvelteDependencyExtractor::extract_dependencies(&content) {
510 Ok(deps) => deps,
511 Err(e) => {
512 log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
513 Vec::new()
514 }
515 }
516 }
517 _ => Vec::new(),
519 };
520
521 let exports = match language {
523 Language::TypeScript | Language::JavaScript => {
524 let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
526 match TypeScriptDependencyExtractor::extract_export_declarations(&content, alias_map) {
527 Ok(exports) => exports,
528 Err(e) => {
529 log::warn!("Failed to extract exports from {}: {}", path_str, e);
530 Vec::new()
531 }
532 }
533 }
534 Language::Vue => {
535 let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
537 match VueDependencyExtractor::extract_export_declarations(&content, alias_map) {
538 Ok(exports) => exports,
539 Err(e) => {
540 log::warn!("Failed to extract exports from {}: {}", path_str, e);
541 Vec::new()
542 }
543 }
544 }
545 _ => Vec::new(),
547 };
548
549 counter_clone.fetch_add(1, Ordering::Relaxed);
551
552 Some(FileProcessingResult {
553 path: file_path.clone(),
554 path_str: normalized_path.to_string(),
555 hash,
556 content,
557 language,
558 line_count,
559 dependencies,
560 exports,
561 })
562 })
563 .collect()
564 });
565
566 for result in results.into_iter().flatten() {
568 let file_id = trigram_index.add_file(result.path.clone());
570
571 trigram_index.index_file(file_id, &result.content);
573
574 content_writer.add_file(result.path.clone(), &result.content);
576
577 files_indexed += 1;
578
579 file_metadata.push((
581 result.path_str.clone(),
582 result.hash.clone(),
583 format!("{:?}", result.language),
584 result.line_count
585 ));
586
587 if !result.dependencies.is_empty() {
589 all_dependencies.push((result.path_str.clone(), result.dependencies));
590 }
591
592 if !result.exports.is_empty() {
594 all_exports.push((result.path_str.clone(), result.exports));
595 }
596
597 new_hashes.insert(result.path_str, result.hash);
598 }
599
600 if total_files > 10000 {
602 let flush_msg = format!("Flushing batch {}/{}...", batch_idx + 1, num_batches);
603 if show_progress {
604 pb.set_message(flush_msg.clone());
605 }
606 *progress_status.lock().unwrap() = flush_msg;
607 trigram_index.flush_batch()
608 .context("Failed to flush trigram batch")?;
609 }
610 }
611
612 if let Some(thread) = progress_thread {
614 let _ = thread.join();
615 }
616
617 if show_progress {
619 let final_count = progress_counter.load(Ordering::Relaxed);
620 pb.set_position(final_count);
621 }
622
623 *progress_status.lock().unwrap() = "Finalizing trigram index...".to_string();
625 if show_progress {
626 pb.set_message("Finalizing trigram index...".to_string());
627 }
628 trigram_index.finalize();
629
630 *progress_status.lock().unwrap() = "Writing file metadata to database...".to_string();
632 if show_progress {
633 pb.set_message("Writing file metadata to database...".to_string());
634 }
635
636 if !file_metadata.is_empty() {
639 let files_without_hash: Vec<(String, String, usize)> = file_metadata
641 .iter()
642 .map(|(path, _hash, lang, lines)| (path.clone(), lang.clone(), *lines))
643 .collect();
644
645 *progress_status.lock().unwrap() = "Recording branch files...".to_string();
647 if show_progress {
648 pb.set_message("Recording branch files...".to_string());
649 }
650
651 let branch_files: Vec<(String, String)> = file_metadata
653 .iter()
654 .map(|(path, hash, _, _)| (path.clone(), hash.clone()))
655 .collect();
656
657 self.cache.batch_update_files_and_branch(
659 &files_without_hash,
660 &branch_files,
661 &branch,
662 git_state.as_ref().map(|s| s.commit.as_str()),
663 ).context("Failed to batch update files and branch hashes")?;
664
665 log::info!("Wrote metadata and hashes for {} files to database", file_metadata.len());
666 }
667
668 self.cache.update_branch_metadata(
670 &branch,
671 git_state.as_ref().map(|s| s.commit.as_str()),
672 file_metadata.len(),
673 git_state.as_ref().map(|s| s.dirty).unwrap_or(false),
674 )?;
675
676 self.cache.checkpoint_wal()
679 .context("Failed to checkpoint WAL")?;
680 log::debug!("WAL checkpoint completed - database is fully synced");
681
682 if !all_dependencies.is_empty() {
684 *progress_status.lock().unwrap() = "Extracting dependencies...".to_string();
685 if show_progress {
686 pb.set_message("Extracting dependencies...".to_string());
687 }
688
689 let go_modules = crate::parsers::go::parse_all_go_modules(root)
691 .unwrap_or_else(|e| {
692 log::warn!("Failed to parse go.mod files: {}", e);
693 Vec::new()
694 });
695 if !go_modules.is_empty() {
696 log::info!("Found {} Go modules", go_modules.len());
697 for module in &go_modules {
698 log::debug!(" {} (project: {})", module.name, module.project_root);
699 }
700 }
701
702 let java_projects = crate::parsers::java::parse_all_java_projects(root)
704 .unwrap_or_else(|e| {
705 log::warn!("Failed to parse Java project configs: {}", e);
706 Vec::new()
707 });
708 if !java_projects.is_empty() {
709 log::info!("Found {} Java projects", java_projects.len());
710 for project in &java_projects {
711 log::debug!(" {} (project: {})", project.package_name, project.project_root);
712 }
713 }
714
715 let python_packages = crate::parsers::python::parse_all_python_packages(root)
717 .unwrap_or_else(|e| {
718 log::warn!("Failed to parse Python package configs: {}", e);
719 Vec::new()
720 });
721 if !python_packages.is_empty() {
722 log::info!("Found {} Python packages", python_packages.len());
723 for package in &python_packages {
724 log::debug!(" {} (project: {})", package.name, package.project_root);
725 }
726 }
727
728 let ruby_projects = crate::parsers::ruby::parse_all_ruby_projects(root)
730 .unwrap_or_else(|e| {
731 log::warn!("Failed to parse Ruby project configs: {}", e);
732 Vec::new()
733 });
734 if !ruby_projects.is_empty() {
735 log::info!("Found {} Ruby projects", ruby_projects.len());
736 for project in &ruby_projects {
737 log::debug!(" {} (project: {})", project.gem_name, project.project_root);
738 }
739 }
740
741 let php_psr4_mappings = crate::parsers::php::parse_all_composer_psr4(root)
745 .unwrap_or_else(|e| {
746 log::warn!("Failed to parse composer.json files: {}", e);
747 Vec::new()
748 });
749 if !php_psr4_mappings.is_empty() {
750 log::info!("Found {} PSR-4 mappings from composer.json files", php_psr4_mappings.len());
751 for mapping in &php_psr4_mappings {
752 log::debug!(" {} => {} (project: {})", mapping.namespace_prefix, mapping.directory, mapping.project_root);
753 }
754 }
755
756 let tsconfigs = crate::parsers::tsconfig::parse_all_tsconfigs(root)
758 .unwrap_or_else(|e| {
759 log::warn!("Failed to parse tsconfig.json files: {}", e);
760 HashMap::new()
761 });
762 if !tsconfigs.is_empty() {
763 log::info!("Found {} tsconfig.json files", tsconfigs.len());
764 for (config_dir, alias_map) in &tsconfigs {
765 log::debug!(" {} (base_url: {:?}, {} aliases)",
766 config_dir.display(),
767 alias_map.base_url,
768 alias_map.aliases.len());
769 }
770 }
771
772 let cache_for_deps = CacheManager::new(root);
774 let dep_index = DependencyIndex::new(cache_for_deps);
775
776 let mut total_deps_inserted = 0;
777
778 for (file_path, import_infos) in all_dependencies {
780 let file_id = match dep_index.get_file_id_by_path(&file_path)? {
782 Some(id) => id,
783 None => {
784 log::warn!("File not found in database (skipping dependencies): {}", file_path);
785 continue;
786 }
787 };
788
789 let mut resolved_deps = Vec::new();
791
792 for mut import_info in import_infos {
793 if file_path.ends_with(".go") {
795 let mut reclassified = false;
797 for module in &go_modules {
798 import_info.import_type = crate::parsers::go::reclassify_go_import(
799 &import_info.imported_path,
800 Some(&module.name),
801 );
802 if matches!(import_info.import_type, ImportType::Internal) {
804 reclassified = true;
805 break;
806 }
807 }
808 if !reclassified {
810 import_info.import_type = crate::parsers::go::reclassify_go_import(
811 &import_info.imported_path,
812 None,
813 );
814 }
815 }
816
817 if file_path.ends_with(".java") {
819 let mut reclassified = false;
821 for project in &java_projects {
822 import_info.import_type = crate::parsers::java::reclassify_java_import(
823 &import_info.imported_path,
824 Some(&project.package_name),
825 );
826 if matches!(import_info.import_type, ImportType::Internal) {
828 reclassified = true;
829 break;
830 }
831 }
832 if !reclassified {
834 import_info.import_type = crate::parsers::java::reclassify_java_import(
835 &import_info.imported_path,
836 None,
837 );
838 }
839 }
840
841 if file_path.ends_with(".py") {
843 let mut reclassified = false;
845 for package in &python_packages {
846 import_info.import_type = crate::parsers::python::reclassify_python_import(
847 &import_info.imported_path,
848 Some(&package.name),
849 );
850 if matches!(import_info.import_type, ImportType::Internal) {
852 reclassified = true;
853 break;
854 }
855 }
856 if !reclassified {
858 import_info.import_type = crate::parsers::python::reclassify_python_import(
859 &import_info.imported_path,
860 None,
861 );
862 }
863 }
864
865 if file_path.ends_with(".rb") || file_path.ends_with(".rake") || file_path.ends_with(".gemspec") {
867 let mut reclassified = false;
869 for project in &ruby_projects {
870 let gem_names = vec![project.gem_name.clone()];
871 import_info.import_type = crate::parsers::ruby::reclassify_ruby_import(
872 &import_info.imported_path,
873 &gem_names,
874 );
875 if matches!(import_info.import_type, ImportType::Internal) {
877 reclassified = true;
878 break;
879 }
880 }
881 if !reclassified {
883 import_info.import_type = crate::parsers::ruby::reclassify_ruby_import(
884 &import_info.imported_path,
885 &[],
886 );
887 }
888 }
889
890 if file_path.ends_with(".kt") || file_path.ends_with(".kts") {
892 let mut reclassified = false;
894 for project in &java_projects {
895 import_info.import_type = crate::parsers::kotlin::reclassify_kotlin_import(
896 &import_info.imported_path,
897 Some(&project.package_name),
898 );
899 if matches!(import_info.import_type, ImportType::Internal) {
901 reclassified = true;
902 break;
903 }
904 }
905 if !reclassified {
907 import_info.import_type = crate::parsers::kotlin::reclassify_kotlin_import(
908 &import_info.imported_path,
909 None,
910 );
911 }
912 }
913
914 if !matches!(import_info.import_type, ImportType::Internal) {
916 continue;
917 }
918
919 let resolved_file_id = if file_path.ends_with(".php") && !php_psr4_mappings.is_empty() {
921 if let Some(resolved_path) = crate::parsers::php::resolve_php_namespace_to_path(
923 &import_info.imported_path,
924 &php_psr4_mappings,
925 ) {
926 match dep_index.get_file_id_by_path(&resolved_path) {
928 Ok(Some(id)) => {
929 log::trace!("Resolved PHP dependency: {} -> {} (file_id={})",
930 import_info.imported_path, resolved_path, id);
931 Some(id)
932 }
933 Ok(None) => {
934 log::trace!("PHP dependency resolved to path but file not in index: {} -> {}",
935 import_info.imported_path, resolved_path);
936 None
937 }
938 Err(e) => {
939 log::debug!("Skipping PHP dependency resolution for '{}': {}", resolved_path, e);
940 None
941 }
942 }
943 } else {
944 log::trace!("Could not resolve PHP namespace using PSR-4: {}",
945 import_info.imported_path);
946 None
947 }
948 } else if file_path.ends_with(".py") && !python_packages.is_empty() {
949 if let Some(resolved_path) = crate::parsers::python::resolve_python_import_to_path(
951 &import_info.imported_path,
952 &python_packages,
953 Some(&file_path),
954 ) {
955 match dep_index.get_file_id_by_path(&resolved_path) {
957 Ok(Some(id)) => {
958 log::trace!("Resolved Python dependency: {} -> {} (file_id={})",
959 import_info.imported_path, resolved_path, id);
960 Some(id)
961 }
962 Ok(None) => {
963 log::trace!("Python dependency resolved to path but file not in index: {} -> {}",
964 import_info.imported_path, resolved_path);
965 None
966 }
967 Err(e) => {
968 log::debug!("Skipping Python dependency resolution for '{}': {}", resolved_path, e);
969 None
970 }
971 }
972 } else {
973 log::trace!("Could not resolve Python import: {}", import_info.imported_path);
974 None
975 }
976 } else if file_path.ends_with(".go") && !go_modules.is_empty() {
977 if let Some(resolved_path) = crate::parsers::go::resolve_go_import_to_path(
979 &import_info.imported_path,
980 &go_modules,
981 Some(&file_path),
982 ) {
983 match dep_index.get_file_id_by_path(&resolved_path) {
985 Ok(Some(id)) => {
986 log::trace!("Resolved Go dependency: {} -> {} (file_id={})",
987 import_info.imported_path, resolved_path, id);
988 Some(id)
989 }
990 Ok(None) => {
991 log::trace!("Go dependency resolved to path but file not in index: {} -> {}",
992 import_info.imported_path, resolved_path);
993 None
994 }
995 Err(e) => {
996 log::debug!("Skipping Go dependency resolution for '{}': {}", resolved_path, e);
997 None
998 }
999 }
1000 } else {
1001 log::trace!("Could not resolve Go import: {}", import_info.imported_path);
1002 None
1003 }
1004 } else if file_path.ends_with(".ts") || file_path.ends_with(".tsx")
1005 || file_path.ends_with(".js") || file_path.ends_with(".jsx")
1006 || file_path.ends_with(".mts") || file_path.ends_with(".cts")
1007 || file_path.ends_with(".mjs") || file_path.ends_with(".cjs") {
1008 let alias_map = find_nearest_tsconfig(&file_path, root, &tsconfigs);
1010 if let Some(candidates_str) = crate::parsers::typescript::resolve_ts_import_to_path(
1011 &import_info.imported_path,
1012 Some(&file_path),
1013 alias_map,
1014 ) {
1015 let candidates: Vec<&str> = candidates_str.split('|').collect();
1017
1018 let mut resolved_id = None;
1020 for candidate_path in candidates {
1021 let normalized_candidate = if let Ok(rel_path) = std::path::Path::new(candidate_path).strip_prefix(root) {
1024 rel_path.to_string_lossy().to_string()
1025 } else {
1026 candidate_path.to_string()
1028 };
1029
1030 log::debug!("Looking up TS/JS candidate: '{}' (from '{}')", normalized_candidate, candidate_path);
1031 match dep_index.get_file_id_by_path(&normalized_candidate) {
1032 Ok(Some(id)) => {
1033 log::debug!("Resolved TS/JS dependency: {} -> {} (file_id={})",
1034 import_info.imported_path, normalized_candidate, id);
1035 resolved_id = Some(id);
1036 break; }
1038 Ok(None) => {
1039 log::trace!("TS/JS candidate not in index: {}", candidate_path);
1040 }
1041 Err(e) => {
1042 log::debug!("Skipping TS/JS dependency resolution for '{}': {}", normalized_candidate, e);
1043 }
1044 }
1045 }
1046
1047 if resolved_id.is_none() {
1048 log::trace!("TS/JS dependency: no matching file found in database for any candidate: {}",
1049 candidates_str);
1050 }
1051
1052 resolved_id
1053 } else {
1054 log::trace!("Could not resolve TS/JS import (non-relative or external): {}", import_info.imported_path);
1055 None
1056 }
1057 } else if file_path.ends_with(".rs") {
1058 if let Some(resolved_path) = crate::parsers::rust::resolve_rust_use_to_path(
1060 &import_info.imported_path,
1061 Some(&file_path),
1062 Some(root.to_str().unwrap_or("")),
1063 ) {
1064 match dep_index.get_file_id_by_path(&resolved_path) {
1066 Ok(Some(id)) => {
1067 log::trace!("Resolved Rust dependency: {} -> {} (file_id={})",
1068 import_info.imported_path, resolved_path, id);
1069 Some(id)
1070 }
1071 Ok(None) => {
1072 log::trace!("Rust dependency resolved to path but file not in index: {} -> {}",
1073 import_info.imported_path, resolved_path);
1074 None
1075 }
1076 Err(e) => {
1077 log::debug!("Skipping Rust dependency resolution for '{}': {}", resolved_path, e);
1078 None
1079 }
1080 }
1081 } else {
1082 log::trace!("Could not resolve Rust import (external or stdlib): {}", import_info.imported_path);
1083 None
1084 }
1085 } else if file_path.ends_with(".java") && !java_projects.is_empty() {
1086 if let Some(resolved_path) = crate::parsers::java::resolve_java_import_to_path(
1088 &import_info.imported_path,
1089 &java_projects,
1090 Some(&file_path),
1091 ) {
1092 match dep_index.get_file_id_by_path(&resolved_path) {
1094 Ok(Some(id)) => {
1095 log::trace!("Resolved Java dependency: {} -> {} (file_id={})",
1096 import_info.imported_path, resolved_path, id);
1097 Some(id)
1098 }
1099 Ok(None) => {
1100 log::trace!("Java dependency resolved to path but file not in index: {} -> {}",
1101 import_info.imported_path, resolved_path);
1102 None
1103 }
1104 Err(e) => {
1105 log::debug!("Skipping Java dependency resolution for '{}': {}", resolved_path, e);
1106 None
1107 }
1108 }
1109 } else {
1110 log::trace!("Could not resolve Java import: {}", import_info.imported_path);
1111 None
1112 }
1113 } else if (file_path.ends_with(".kt") || file_path.ends_with(".kts")) && !java_projects.is_empty() {
1114 if let Some(resolved_path) = crate::parsers::java::resolve_kotlin_import_to_path(
1116 &import_info.imported_path,
1117 &java_projects,
1118 Some(&file_path),
1119 ) {
1120 match dep_index.get_file_id_by_path(&resolved_path) {
1122 Ok(Some(id)) => {
1123 log::trace!("Resolved Kotlin dependency: {} -> {} (file_id={})",
1124 import_info.imported_path, resolved_path, id);
1125 Some(id)
1126 }
1127 Ok(None) => {
1128 log::trace!("Kotlin dependency resolved to path but file not in index: {} -> {}",
1129 import_info.imported_path, resolved_path);
1130 None
1131 }
1132 Err(e) => {
1133 log::debug!("Skipping Kotlin dependency resolution for '{}': {}", resolved_path, e);
1134 None
1135 }
1136 }
1137 } else {
1138 log::trace!("Could not resolve Kotlin import: {}", import_info.imported_path);
1139 None
1140 }
1141 } else if (file_path.ends_with(".rb") || file_path.ends_with(".rake") || file_path.ends_with(".gemspec")) && !ruby_projects.is_empty() {
1142 if let Some(resolved_path) = crate::parsers::ruby::resolve_ruby_require_to_path(
1144 &import_info.imported_path,
1145 &ruby_projects,
1146 Some(&file_path),
1147 ) {
1148 match dep_index.get_file_id_by_path(&resolved_path) {
1150 Ok(Some(id)) => {
1151 log::trace!("Resolved Ruby dependency: {} -> {} (file_id={})",
1152 import_info.imported_path, resolved_path, id);
1153 Some(id)
1154 }
1155 Ok(None) => {
1156 log::trace!("Ruby dependency resolved to path but file not in index: {} -> {}",
1157 import_info.imported_path, resolved_path);
1158 None
1159 }
1160 Err(e) => {
1161 log::debug!("Skipping Ruby dependency resolution for '{}': {}", resolved_path, e);
1162 None
1163 }
1164 }
1165 } else {
1166 log::trace!("Could not resolve Ruby require: {}", import_info.imported_path);
1167 None
1168 }
1169 } else if file_path.ends_with(".c") || file_path.ends_with(".h") {
1170 if let Some(resolved_path) = crate::parsers::c::resolve_c_include_to_path(
1172 &import_info.imported_path,
1173 Some(&file_path),
1174 ) {
1175 match dep_index.get_file_id_by_path(&resolved_path) {
1177 Ok(Some(id)) => {
1178 log::trace!("Resolved C dependency: {} -> {} (file_id={})",
1179 import_info.imported_path, resolved_path, id);
1180 Some(id)
1181 }
1182 Ok(None) => {
1183 log::trace!("C dependency resolved to path but file not in index: {} -> {}",
1184 import_info.imported_path, resolved_path);
1185 None
1186 }
1187 Err(e) => {
1188 log::debug!("Skipping C dependency resolution for '{}': {}", resolved_path, e);
1189 None
1190 }
1191 }
1192 } else {
1193 log::trace!("Could not resolve C include (system header): {}", import_info.imported_path);
1194 None
1195 }
1196 } else if file_path.ends_with(".cpp") || file_path.ends_with(".cc") || file_path.ends_with(".cxx")
1197 || file_path.ends_with(".hpp") || file_path.ends_with(".hxx") || file_path.ends_with(".h++")
1198 || file_path.ends_with(".C") || file_path.ends_with(".H") {
1199 if let Some(resolved_path) = crate::parsers::cpp::resolve_cpp_include_to_path(
1201 &import_info.imported_path,
1202 Some(&file_path),
1203 ) {
1204 match dep_index.get_file_id_by_path(&resolved_path) {
1206 Ok(Some(id)) => {
1207 log::trace!("Resolved C++ dependency: {} -> {} (file_id={})",
1208 import_info.imported_path, resolved_path, id);
1209 Some(id)
1210 }
1211 Ok(None) => {
1212 log::trace!("C++ dependency resolved to path but file not in index: {} -> {}",
1213 import_info.imported_path, resolved_path);
1214 None
1215 }
1216 Err(e) => {
1217 log::debug!("Skipping C++ dependency resolution for '{}': {}", resolved_path, e);
1218 None
1219 }
1220 }
1221 } else {
1222 log::trace!("Could not resolve C++ include (system header): {}", import_info.imported_path);
1223 None
1224 }
1225 } else if file_path.ends_with(".cs") {
1226 if let Some(resolved_path) = crate::parsers::csharp::resolve_csharp_using_to_path(
1228 &import_info.imported_path,
1229 Some(&file_path),
1230 ) {
1231 match dep_index.get_file_id_by_path(&resolved_path) {
1233 Ok(Some(id)) => {
1234 log::trace!("Resolved C# dependency: {} -> {} (file_id={})",
1235 import_info.imported_path, resolved_path, id);
1236 Some(id)
1237 }
1238 Ok(None) => {
1239 log::trace!("C# dependency resolved to path but file not in index: {} -> {}",
1240 import_info.imported_path, resolved_path);
1241 None
1242 }
1243 Err(e) => {
1244 log::debug!("Skipping C# dependency resolution for '{}': {}", resolved_path, e);
1245 None
1246 }
1247 }
1248 } else {
1249 log::trace!("Could not resolve C# using directive: {}", import_info.imported_path);
1250 None
1251 }
1252 } else if file_path.ends_with(".zig") {
1253 if let Some(resolved_path) = crate::parsers::zig::resolve_zig_import_to_path(
1255 &import_info.imported_path,
1256 Some(&file_path),
1257 ) {
1258 match dep_index.get_file_id_by_path(&resolved_path) {
1260 Ok(Some(id)) => {
1261 log::trace!("Resolved Zig dependency: {} -> {} (file_id={})",
1262 import_info.imported_path, resolved_path, id);
1263 Some(id)
1264 }
1265 Ok(None) => {
1266 log::trace!("Zig dependency resolved to path but file not in index: {} -> {}",
1267 import_info.imported_path, resolved_path);
1268 None
1269 }
1270 Err(e) => {
1271 log::debug!("Skipping Zig dependency resolution for '{}': {}", resolved_path, e);
1272 None
1273 }
1274 }
1275 } else {
1276 log::trace!("Could not resolve Zig import (external or stdlib): {}", import_info.imported_path);
1277 None
1278 }
1279 } else if file_path.ends_with(".vue") || file_path.ends_with(".svelte") {
1280 let alias_map = find_nearest_tsconfig(&file_path, root, &tsconfigs);
1282 if let Some(candidates_str) = crate::parsers::typescript::resolve_ts_import_to_path(
1283 &import_info.imported_path,
1284 Some(&file_path),
1285 alias_map,
1286 ) {
1287 let candidates: Vec<&str> = candidates_str.split('|').collect();
1289
1290 let mut resolved_id = None;
1292 for candidate_path in candidates {
1293 let normalized_candidate = if let Ok(rel_path) = std::path::Path::new(candidate_path).strip_prefix(root) {
1296 rel_path.to_string_lossy().to_string()
1297 } else {
1298 candidate_path.to_string()
1300 };
1301
1302 match dep_index.get_file_id_by_path(&normalized_candidate) {
1303 Ok(Some(id)) => {
1304 log::trace!("Resolved Vue/Svelte dependency: {} -> {} (file_id={})",
1305 import_info.imported_path, candidate_path, id);
1306 resolved_id = Some(id);
1307 break; }
1309 Ok(None) => {
1310 log::trace!("Vue/Svelte candidate not in index: {}", candidate_path);
1311 }
1312 Err(e) => {
1313 log::debug!("Skipping Vue/Svelte dependency resolution for '{}': {}", normalized_candidate, e);
1314 }
1315 }
1316 }
1317
1318 if resolved_id.is_none() {
1319 log::trace!("Vue/Svelte dependency: no matching file found in database for any candidate: {}",
1320 candidates_str);
1321 }
1322
1323 resolved_id
1324 } else {
1325 log::trace!("Could not resolve Vue/Svelte import (non-relative or external): {}", import_info.imported_path);
1326 None
1327 }
1328 } else {
1329 None
1330 };
1331
1332 resolved_deps.push(Dependency {
1335 file_id,
1336 imported_path: import_info.imported_path.clone(),
1337 resolved_file_id,
1338 import_type: import_info.import_type,
1339 line_number: import_info.line_number,
1340 imported_symbols: import_info.imported_symbols.clone(),
1341 });
1342 }
1343
1344 dep_index.clear_dependencies(file_id)?;
1346
1347 if !resolved_deps.is_empty() {
1349 dep_index.batch_insert_dependencies(&resolved_deps)?;
1350 total_deps_inserted += resolved_deps.len();
1351 }
1352 }
1353
1354 log::info!("Extracted {} dependencies", total_deps_inserted);
1355 }
1356
1357 if !all_exports.is_empty() {
1359 *progress_status.lock().unwrap() = "Extracting exports...".to_string();
1360 if show_progress {
1361 pb.set_message("Extracting exports...".to_string());
1362 }
1363
1364 let tsconfigs = crate::parsers::tsconfig::parse_all_tsconfigs(root)
1366 .unwrap_or_else(|e| {
1367 log::warn!("Failed to parse tsconfig.json files: {}", e);
1368 HashMap::new()
1369 });
1370
1371 let cache_for_exports = CacheManager::new(root);
1373 let dep_index = DependencyIndex::new(cache_for_exports);
1374
1375 let mut total_exports_inserted = 0;
1376
1377 for (file_path, export_infos) in all_exports {
1379 let file_id = match dep_index.get_file_id_by_path(&file_path)? {
1381 Some(id) => id,
1382 None => {
1383 log::warn!("File not found in database (skipping exports): {}", file_path);
1384 continue;
1385 }
1386 };
1387
1388 for export_info in export_infos {
1390 let resolved_source_id = if file_path.ends_with(".ts") || file_path.ends_with(".tsx")
1392 || file_path.ends_with(".js") || file_path.ends_with(".jsx")
1393 || file_path.ends_with(".mts") || file_path.ends_with(".cts")
1394 || file_path.ends_with(".mjs") || file_path.ends_with(".cjs")
1395 || file_path.ends_with(".vue") {
1396 let alias_map = find_nearest_tsconfig(&file_path, root, &tsconfigs);
1398 if let Some(candidates_str) = crate::parsers::typescript::resolve_ts_import_to_path(
1399 &export_info.source_path,
1400 Some(&file_path),
1401 alias_map,
1402 ) {
1403 let candidates: Vec<&str> = candidates_str.split('|').collect();
1405
1406 let mut resolved_id = None;
1408 for candidate_path in candidates {
1409 let normalized_candidate = if let Ok(rel_path) = std::path::Path::new(candidate_path).strip_prefix(root) {
1411 rel_path.to_string_lossy().to_string()
1412 } else {
1413 candidate_path.to_string()
1414 };
1415
1416 match dep_index.get_file_id_by_path(&normalized_candidate) {
1417 Ok(Some(id)) => {
1418 log::trace!("Resolved export source: {} -> {} (file_id={})",
1419 export_info.source_path, normalized_candidate, id);
1420 resolved_id = Some(id);
1421 break; }
1423 Ok(None) => {
1424 log::trace!("Export source candidate not in index: {}", candidate_path);
1425 }
1426 Err(e) => {
1427 log::debug!("Skipping export source resolution for '{}': {}", normalized_candidate, e);
1428 }
1429 }
1430 }
1431
1432 if resolved_id.is_none() {
1433 log::trace!("Export source: no matching file found in database for any candidate: {}",
1434 candidates_str);
1435 }
1436
1437 resolved_id
1438 } else {
1439 log::trace!("Could not resolve export source (non-relative or external): {}", export_info.source_path);
1440 None
1441 }
1442 } else {
1443 None
1444 };
1445
1446 dep_index.insert_export(
1448 file_id,
1449 export_info.exported_symbol,
1450 export_info.source_path,
1451 resolved_source_id,
1452 export_info.line_number,
1453 )?;
1454
1455 total_exports_inserted += 1;
1456 }
1457 }
1458
1459 log::info!("Extracted {} exports", total_exports_inserted);
1460 }
1461
1462 log::info!("Indexed {} files", files_indexed);
1463
1464 *progress_status.lock().unwrap() = "Writing trigram index...".to_string();
1466 if show_progress {
1467 pb.set_message("Writing trigram index...".to_string());
1468 }
1469 let trigrams_path = self.cache.path().join("trigrams.bin");
1470 log::info!("Writing trigram index with {} trigrams to trigrams.bin",
1471 trigram_index.trigram_count());
1472
1473 trigram_index.write(&trigrams_path)
1474 .context("Failed to write trigram index")?;
1475 log::info!("Wrote {} files to trigrams.bin", trigram_index.file_count());
1476
1477 *progress_status.lock().unwrap() = "Finalizing content store...".to_string();
1479 if show_progress {
1480 pb.set_message("Finalizing content store...".to_string());
1481 }
1482 content_writer.finalize_if_needed()
1483 .context("Failed to finalize content store")?;
1484 log::info!("Wrote {} files ({} bytes) to content.bin",
1485 content_writer.file_count(), content_writer.content_size());
1486
1487 *progress_status.lock().unwrap() = "Updating statistics...".to_string();
1489 if show_progress {
1490 pb.set_message("Updating statistics...".to_string());
1491 }
1492 self.cache.update_stats(&branch)?;
1494
1495 self.cache.update_schema_hash()?;
1497
1498 pb.finish_with_message("Indexing complete");
1499
1500 let stats = self.cache.stats()?;
1502 log::info!("Indexing complete: {} files",
1503 stats.total_files);
1504
1505 Ok(stats)
1506 }
1507
1508 fn discover_files(&self, root: &Path) -> Result<Vec<PathBuf>> {
1510 let mut files = Vec::new();
1511
1512 let walker = WalkBuilder::new(root)
1517 .follow_links(self.config.follow_symlinks)
1518 .git_ignore(true) .git_global(false) .git_exclude(false) .build();
1522
1523 for entry in walker {
1524 let entry = entry?;
1525 let path = entry.path();
1526
1527 if !entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) {
1529 continue;
1530 }
1531
1532 if self.should_index(path) {
1534 files.push(path.to_path_buf());
1535 }
1536 }
1537
1538 Ok(files)
1539 }
1540
1541 fn should_index(&self, path: &Path) -> bool {
1543 let ext = match path.extension() {
1545 Some(ext) => ext.to_string_lossy(),
1546 None => return false,
1547 };
1548
1549 let lang = Language::from_extension(&ext);
1550
1551 if !lang.is_supported() {
1553 if !matches!(lang, Language::Unknown) {
1554 log::debug!("Skipping {} ({:?} parser not yet implemented)",
1555 path.display(), lang);
1556 }
1557 return false;
1558 }
1559
1560 if !self.config.languages.is_empty() && !self.config.languages.contains(&lang) {
1562 log::debug!("Skipping {} ({:?} not in configured languages)", path.display(), lang);
1563 return false;
1564 }
1565
1566 if let Ok(metadata) = std::fs::metadata(path) {
1568 if metadata.len() > self.config.max_file_size as u64 {
1569 log::debug!("Skipping {} (too large: {} bytes)",
1570 path.display(), metadata.len());
1571 return false;
1572 }
1573 }
1574
1575 true
1579 }
1580
1581 fn hash_content(&self, content: &[u8]) -> String {
1583 let hash = blake3::hash(content);
1584 hash.to_hex().to_string()
1585 }
1586
1587 fn check_disk_space(&self, root: &Path) -> Result<()> {
1592 let cache_path = self.cache.path();
1594
1595 #[cfg(unix)]
1597 {
1598 let test_file = cache_path.join(".space_check");
1601 match std::fs::write(&test_file, b"test") {
1602 Ok(_) => {
1603 let _ = std::fs::remove_file(&test_file);
1604
1605 if let Ok(output) = std::process::Command::new("df")
1607 .arg("-k")
1608 .arg(cache_path.parent().unwrap_or(root))
1609 .output()
1610 {
1611 if let Ok(df_output) = String::from_utf8(output.stdout) {
1612 if let Some(line) = df_output.lines().nth(1) {
1614 let parts: Vec<&str> = line.split_whitespace().collect();
1615 if parts.len() >= 4 {
1616 if let Ok(available_kb) = parts[3].parse::<u64>() {
1617 let available_mb = available_kb / 1024;
1618
1619 if available_mb < 100 {
1621 log::warn!("Low disk space: only {}MB available. Indexing may fail.", available_mb);
1622 output::warn(&format!("Low disk space ({}MB available). Consider freeing up space.", available_mb));
1623 } else {
1624 log::debug!("Available disk space: {}MB", available_mb);
1625 }
1626 }
1627 }
1628 }
1629 }
1630 }
1631
1632 Ok(())
1633 }
1634 Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
1635 anyhow::bail!(
1636 "Permission denied writing to cache directory: {}. Check file permissions.",
1637 cache_path.display()
1638 )
1639 }
1640 Err(e) => {
1641 log::warn!("Failed to write test file (possible disk space issue): {}", e);
1643 Err(e).context("Failed to verify disk space - indexing may fail due to insufficient space")
1644 }
1645 }
1646 }
1647
1648 #[cfg(not(unix))]
1649 {
1650 let test_file = cache_path.join(".space_check");
1652 match std::fs::write(&test_file, b"test") {
1653 Ok(_) => {
1654 let _ = std::fs::remove_file(&test_file);
1655 Ok(())
1656 }
1657 Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
1658 anyhow::bail!(
1659 "Permission denied writing to cache directory: {}. Check file permissions.",
1660 cache_path.display()
1661 )
1662 }
1663 Err(e) => {
1664 log::warn!("Failed to write test file (possible disk space issue): {}", e);
1665 Err(e).context("Failed to verify disk space - indexing may fail due to insufficient space")
1666 }
1667 }
1668 }
1669 }
1670}
1671
1672#[cfg(test)]
1673mod tests {
1674 use super::*;
1675 use tempfile::TempDir;
1676 use std::fs;
1677
1678 #[test]
1679 fn test_indexer_creation() {
1680 let temp = TempDir::new().unwrap();
1681 let cache = CacheManager::new(temp.path());
1682 let config = IndexConfig::default();
1683 let indexer = Indexer::new(cache, config);
1684
1685 assert!(indexer.cache.path().ends_with(".reflex"));
1686 }
1687
1688 #[test]
1689 fn test_hash_content() {
1690 let temp = TempDir::new().unwrap();
1691 let cache = CacheManager::new(temp.path());
1692 let config = IndexConfig::default();
1693 let indexer = Indexer::new(cache, config);
1694
1695 let content1 = b"hello world";
1696 let content2 = b"hello world";
1697 let content3 = b"different content";
1698
1699 let hash1 = indexer.hash_content(content1);
1700 let hash2 = indexer.hash_content(content2);
1701 let hash3 = indexer.hash_content(content3);
1702
1703 assert_eq!(hash1, hash2);
1705
1706 assert_ne!(hash1, hash3);
1708
1709 assert_eq!(hash1.len(), 64); }
1712
1713 #[test]
1714 fn test_should_index_rust_file() {
1715 let temp = TempDir::new().unwrap();
1716 let cache = CacheManager::new(temp.path());
1717 let config = IndexConfig::default();
1718 let indexer = Indexer::new(cache, config);
1719
1720 let rust_file = temp.path().join("test.rs");
1722 fs::write(&rust_file, "fn main() {}").unwrap();
1723
1724 assert!(indexer.should_index(&rust_file));
1725 }
1726
1727 #[test]
1728 fn test_should_index_unsupported_extension() {
1729 let temp = TempDir::new().unwrap();
1730 let cache = CacheManager::new(temp.path());
1731 let config = IndexConfig::default();
1732 let indexer = Indexer::new(cache, config);
1733
1734 let unsupported_file = temp.path().join("test.txt");
1735 fs::write(&unsupported_file, "plain text").unwrap();
1736
1737 assert!(!indexer.should_index(&unsupported_file));
1738 }
1739
1740 #[test]
1741 fn test_should_index_no_extension() {
1742 let temp = TempDir::new().unwrap();
1743 let cache = CacheManager::new(temp.path());
1744 let config = IndexConfig::default();
1745 let indexer = Indexer::new(cache, config);
1746
1747 let no_ext_file = temp.path().join("Makefile");
1748 fs::write(&no_ext_file, "all:\n\techo hello").unwrap();
1749
1750 assert!(!indexer.should_index(&no_ext_file));
1751 }
1752
1753 #[test]
1754 fn test_should_index_size_limit() {
1755 let temp = TempDir::new().unwrap();
1756 let cache = CacheManager::new(temp.path());
1757
1758 let mut config = IndexConfig::default();
1760 config.max_file_size = 100;
1761
1762 let indexer = Indexer::new(cache, config);
1763
1764 let small_file = temp.path().join("small.rs");
1766 fs::write(&small_file, "fn main() {}").unwrap();
1767 assert!(indexer.should_index(&small_file));
1768
1769 let large_file = temp.path().join("large.rs");
1771 let large_content = "a".repeat(150);
1772 fs::write(&large_file, large_content).unwrap();
1773 assert!(!indexer.should_index(&large_file));
1774 }
1775
1776 #[test]
1777 fn test_discover_files_empty_dir() {
1778 let temp = TempDir::new().unwrap();
1779 let cache = CacheManager::new(temp.path());
1780 let config = IndexConfig::default();
1781 let indexer = Indexer::new(cache, config);
1782
1783 let files = indexer.discover_files(temp.path()).unwrap();
1784 assert_eq!(files.len(), 0);
1785 }
1786
1787 #[test]
1788 fn test_discover_files_single_file() {
1789 let temp = TempDir::new().unwrap();
1790 let cache = CacheManager::new(temp.path());
1791 let config = IndexConfig::default();
1792 let indexer = Indexer::new(cache, config);
1793
1794 let rust_file = temp.path().join("main.rs");
1796 fs::write(&rust_file, "fn main() {}").unwrap();
1797
1798 let files = indexer.discover_files(temp.path()).unwrap();
1799 assert_eq!(files.len(), 1);
1800 assert!(files[0].ends_with("main.rs"));
1801 }
1802
1803 #[test]
1804 fn test_discover_files_multiple_languages() {
1805 let temp = TempDir::new().unwrap();
1806 let cache = CacheManager::new(temp.path());
1807 let config = IndexConfig::default();
1808 let indexer = Indexer::new(cache, config);
1809
1810 fs::write(temp.path().join("main.rs"), "fn main() {}").unwrap();
1812 fs::write(temp.path().join("script.py"), "print('hello')").unwrap();
1813 fs::write(temp.path().join("app.js"), "console.log('hi')").unwrap();
1814 fs::write(temp.path().join("README.md"), "# Project").unwrap(); let files = indexer.discover_files(temp.path()).unwrap();
1817 assert_eq!(files.len(), 3); }
1819
1820 #[test]
1821 fn test_discover_files_subdirectories() {
1822 let temp = TempDir::new().unwrap();
1823 let cache = CacheManager::new(temp.path());
1824 let config = IndexConfig::default();
1825 let indexer = Indexer::new(cache, config);
1826
1827 let src_dir = temp.path().join("src");
1829 fs::create_dir(&src_dir).unwrap();
1830 fs::write(src_dir.join("main.rs"), "fn main() {}").unwrap();
1831 fs::write(src_dir.join("lib.rs"), "pub mod test {}").unwrap();
1832
1833 let tests_dir = temp.path().join("tests");
1834 fs::create_dir(&tests_dir).unwrap();
1835 fs::write(tests_dir.join("test.rs"), "#[test] fn test() {}").unwrap();
1836
1837 let files = indexer.discover_files(temp.path()).unwrap();
1838 assert_eq!(files.len(), 3);
1839 }
1840
1841 #[test]
1842 fn test_discover_files_respects_gitignore() {
1843 let temp = TempDir::new().unwrap();
1844
1845 std::process::Command::new("git")
1847 .arg("init")
1848 .current_dir(temp.path())
1849 .output()
1850 .expect("Failed to initialize git repo");
1851
1852 let cache = CacheManager::new(temp.path());
1853 let config = IndexConfig::default();
1854 let indexer = Indexer::new(cache, config);
1855
1856 fs::write(temp.path().join(".gitignore"), "ignored/\n").unwrap();
1859
1860 fs::write(temp.path().join("included.rs"), "fn main() {}").unwrap();
1862 fs::write(temp.path().join("also_included.py"), "print('hi')").unwrap();
1863
1864 let ignored_dir = temp.path().join("ignored");
1865 fs::create_dir(&ignored_dir).unwrap();
1866 fs::write(ignored_dir.join("excluded.rs"), "fn test() {}").unwrap();
1867
1868 let files = indexer.discover_files(temp.path()).unwrap();
1869
1870 assert!(files.iter().any(|f| f.ends_with("included.rs")), "Should find included.rs");
1872 assert!(files.iter().any(|f| f.ends_with("also_included.py")), "Should find also_included.py");
1873
1874 assert!(!files.iter().any(|f| {
1877 let path_str = f.to_string_lossy();
1878 path_str.contains("ignored") && f.ends_with("excluded.rs")
1879 }), "Should NOT find excluded.rs in ignored/ directory (gitignore pattern)");
1880
1881 assert_eq!(files.len(), 2, "Should find exactly 2 files (not including .gitignore or ignored/excluded.rs)");
1884 }
1885
1886 #[test]
1887 fn test_index_empty_directory() {
1888 let temp = TempDir::new().unwrap();
1889 let cache = CacheManager::new(temp.path());
1890 let config = IndexConfig::default();
1891 let indexer = Indexer::new(cache, config);
1892
1893 let stats = indexer.index(temp.path(), false).unwrap();
1894
1895 assert_eq!(stats.total_files, 0);
1896 }
1897
1898 #[test]
1899 fn test_index_single_rust_file() {
1900 let temp = TempDir::new().unwrap();
1901 let project_root = temp.path().join("project");
1902 fs::create_dir(&project_root).unwrap();
1903
1904 let cache = CacheManager::new(&project_root);
1905 let config = IndexConfig::default();
1906 let indexer = Indexer::new(cache, config);
1907
1908 fs::write(
1910 project_root.join("main.rs"),
1911 "fn main() { println!(\"Hello\"); }"
1912 ).unwrap();
1913
1914 let stats = indexer.index(&project_root, false).unwrap();
1915
1916 assert_eq!(stats.total_files, 1);
1917 assert!(stats.files_by_language.get("Rust").is_some());
1918 }
1919
1920 #[test]
1921 fn test_index_multiple_files() {
1922 let temp = TempDir::new().unwrap();
1923 let project_root = temp.path().join("project");
1924 fs::create_dir(&project_root).unwrap();
1925
1926 let cache = CacheManager::new(&project_root);
1927 let config = IndexConfig::default();
1928 let indexer = Indexer::new(cache, config);
1929
1930 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1932 fs::write(project_root.join("lib.rs"), "pub fn test() {}").unwrap();
1933 fs::write(project_root.join("script.py"), "def main(): pass").unwrap();
1934
1935 let stats = indexer.index(&project_root, false).unwrap();
1936
1937 assert_eq!(stats.total_files, 3);
1938 assert_eq!(stats.files_by_language.get("Rust"), Some(&2));
1939 assert_eq!(stats.files_by_language.get("Python"), Some(&1));
1940 }
1941
1942 #[test]
1943 fn test_index_creates_trigram_index() {
1944 let temp = TempDir::new().unwrap();
1945 let project_root = temp.path().join("project");
1946 fs::create_dir(&project_root).unwrap();
1947
1948 let cache = CacheManager::new(&project_root);
1949 let config = IndexConfig::default();
1950 let indexer = Indexer::new(cache, config);
1951
1952 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1953
1954 indexer.index(&project_root, false).unwrap();
1955
1956 let trigrams_path = project_root.join(".reflex/trigrams.bin");
1958 assert!(trigrams_path.exists());
1959 }
1960
1961 #[test]
1962 fn test_index_creates_content_store() {
1963 let temp = TempDir::new().unwrap();
1964 let project_root = temp.path().join("project");
1965 fs::create_dir(&project_root).unwrap();
1966
1967 let cache = CacheManager::new(&project_root);
1968 let config = IndexConfig::default();
1969 let indexer = Indexer::new(cache, config);
1970
1971 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1972
1973 indexer.index(&project_root, false).unwrap();
1974
1975 let content_path = project_root.join(".reflex/content.bin");
1977 assert!(content_path.exists());
1978 }
1979
1980 #[test]
1981 fn test_index_incremental_no_changes() {
1982 let temp = TempDir::new().unwrap();
1983 let project_root = temp.path().join("project");
1984 fs::create_dir(&project_root).unwrap();
1985
1986 let cache = CacheManager::new(&project_root);
1987 let config = IndexConfig::default();
1988 let indexer = Indexer::new(cache, config);
1989
1990 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1991
1992 let stats1 = indexer.index(&project_root, false).unwrap();
1994 assert_eq!(stats1.total_files, 1);
1995
1996 let stats2 = indexer.index(&project_root, false).unwrap();
1998 assert_eq!(stats2.total_files, 1);
1999 }
2000
2001 #[test]
2002 fn test_index_incremental_with_changes() {
2003 let temp = TempDir::new().unwrap();
2004 let project_root = temp.path().join("project");
2005 fs::create_dir(&project_root).unwrap();
2006
2007 let cache = CacheManager::new(&project_root);
2008 let config = IndexConfig::default();
2009 let indexer = Indexer::new(cache, config);
2010
2011 let main_path = project_root.join("main.rs");
2012 fs::write(&main_path, "fn main() {}").unwrap();
2013
2014 indexer.index(&project_root, false).unwrap();
2016
2017 fs::write(&main_path, "fn main() { println!(\"changed\"); }").unwrap();
2019
2020 let stats = indexer.index(&project_root, false).unwrap();
2022 assert_eq!(stats.total_files, 1);
2023 }
2024
2025 #[test]
2026 fn test_index_incremental_new_file() {
2027 let temp = TempDir::new().unwrap();
2028 let project_root = temp.path().join("project");
2029 fs::create_dir(&project_root).unwrap();
2030
2031 let cache = CacheManager::new(&project_root);
2032 let config = IndexConfig::default();
2033 let indexer = Indexer::new(cache, config);
2034
2035 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2036
2037 let stats1 = indexer.index(&project_root, false).unwrap();
2039 assert_eq!(stats1.total_files, 1);
2040
2041 fs::write(project_root.join("lib.rs"), "pub fn test() {}").unwrap();
2043
2044 let stats2 = indexer.index(&project_root, false).unwrap();
2046 assert_eq!(stats2.total_files, 2);
2047 }
2048
2049 #[test]
2050 fn test_index_parallel_threads_config() {
2051 let temp = TempDir::new().unwrap();
2052 let project_root = temp.path().join("project");
2053 fs::create_dir(&project_root).unwrap();
2054
2055 let cache = CacheManager::new(&project_root);
2056
2057 let mut config = IndexConfig::default();
2059 config.parallel_threads = 2;
2060
2061 let indexer = Indexer::new(cache, config);
2062
2063 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2064
2065 let stats = indexer.index(&project_root, false).unwrap();
2066 assert_eq!(stats.total_files, 1);
2067 }
2068
2069 #[test]
2070 fn test_index_parallel_threads_auto() {
2071 let temp = TempDir::new().unwrap();
2072 let project_root = temp.path().join("project");
2073 fs::create_dir(&project_root).unwrap();
2074
2075 let cache = CacheManager::new(&project_root);
2076
2077 let mut config = IndexConfig::default();
2079 config.parallel_threads = 0;
2080
2081 let indexer = Indexer::new(cache, config);
2082
2083 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2084
2085 let stats = indexer.index(&project_root, false).unwrap();
2086 assert_eq!(stats.total_files, 1);
2087 }
2088
2089 #[test]
2090 fn test_index_respects_size_limit() {
2091 let temp = TempDir::new().unwrap();
2092 let project_root = temp.path().join("project");
2093 fs::create_dir(&project_root).unwrap();
2094
2095 let cache = CacheManager::new(&project_root);
2096
2097 let mut config = IndexConfig::default();
2099 config.max_file_size = 50;
2100
2101 let indexer = Indexer::new(cache, config);
2102
2103 fs::write(project_root.join("small.rs"), "fn a() {}").unwrap();
2105
2106 let large_content = "fn main() {}\n".repeat(10);
2108 fs::write(project_root.join("large.rs"), large_content).unwrap();
2109
2110 let stats = indexer.index(&project_root, false).unwrap();
2111
2112 assert_eq!(stats.total_files, 1);
2114 }
2115
2116 #[test]
2117 fn test_index_mixed_languages() {
2118 let temp = TempDir::new().unwrap();
2119 let project_root = temp.path().join("project");
2120 fs::create_dir(&project_root).unwrap();
2121
2122 let cache = CacheManager::new(&project_root);
2123 let config = IndexConfig::default();
2124 let indexer = Indexer::new(cache, config);
2125
2126 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2128 fs::write(project_root.join("test.py"), "def test(): pass").unwrap();
2129 fs::write(project_root.join("app.js"), "function main() {}").unwrap();
2130 fs::write(project_root.join("lib.go"), "func main() {}").unwrap();
2131
2132 let stats = indexer.index(&project_root, false).unwrap();
2133
2134 assert_eq!(stats.total_files, 4);
2135 assert!(stats.files_by_language.contains_key("Rust"));
2136 assert!(stats.files_by_language.contains_key("Python"));
2137 assert!(stats.files_by_language.contains_key("JavaScript"));
2138 assert!(stats.files_by_language.contains_key("Go"));
2139 }
2140
2141 #[test]
2142 fn test_index_updates_cache_stats() {
2143 let temp = TempDir::new().unwrap();
2144 let project_root = temp.path().join("project");
2145 fs::create_dir(&project_root).unwrap();
2146
2147 let cache = CacheManager::new(&project_root);
2148 let config = IndexConfig::default();
2149 let indexer = Indexer::new(cache, config);
2150
2151 fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2152
2153 indexer.index(&project_root, false).unwrap();
2154
2155 let cache = CacheManager::new(&project_root);
2157 let stats = cache.stats().unwrap();
2158
2159 assert_eq!(stats.total_files, 1);
2160 assert!(stats.index_size_bytes > 0);
2161 }
2162}