reflex/
indexer.rs

1//! Indexing engine for parsing source code
2//!
3//! The indexer scans the project directory, parses source files using Tree-sitter,
4//! and builds the symbol/token cache for fast querying.
5
6use anyhow::{Context, Result};
7use ignore::WalkBuilder;
8use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
9use rayon::prelude::*;
10use std::collections::HashMap;
11use std::path::{Path, PathBuf};
12use std::sync::atomic::{AtomicU64, Ordering};
13use std::sync::{Arc, Mutex};
14use std::time::Instant;
15
16use crate::cache::CacheManager;
17use crate::content_store::ContentWriter;
18use crate::dependency::DependencyIndex;
19use crate::models::{Dependency, IndexConfig, IndexStats, Language, ImportType};
20use crate::output;
21use crate::parsers::{DependencyExtractor, ImportInfo, ExportInfo};
22use crate::parsers::rust::RustDependencyExtractor;
23use crate::parsers::python::PythonDependencyExtractor;
24use crate::parsers::typescript::TypeScriptDependencyExtractor;
25use crate::parsers::go::GoDependencyExtractor;
26use crate::parsers::java::JavaDependencyExtractor;
27use crate::parsers::c::CDependencyExtractor;
28use crate::parsers::cpp::CppDependencyExtractor;
29use crate::parsers::csharp::CSharpDependencyExtractor;
30use crate::parsers::php::PhpDependencyExtractor;
31use crate::parsers::ruby::RubyDependencyExtractor;
32use crate::parsers::kotlin::KotlinDependencyExtractor;
33use crate::parsers::zig::ZigDependencyExtractor;
34use crate::parsers::vue::VueDependencyExtractor;
35use crate::parsers::svelte::SvelteDependencyExtractor;
36use crate::trigram::TrigramIndex;
37
38/// Progress callback type: (current_file_count, total_file_count, status_message)
39/// Uses Arc to allow cloning for multi-threaded progress updates
40pub type ProgressCallback = Arc<dyn Fn(usize, usize, String) + Send + Sync>;
41
42/// Result of processing a single file (used for parallel processing)
43struct FileProcessingResult {
44    path: PathBuf,
45    path_str: String,
46    hash: String,
47    content: String,
48    language: Language,
49    line_count: usize,
50    dependencies: Vec<ImportInfo>,
51    exports: Vec<ExportInfo>,
52}
53
54/// Find the nearest tsconfig.json for a given source file
55///
56/// Walks up the directory tree from the source file to find the nearest tsconfig directory.
57/// Returns a reference to the PathAliasMap if found.
58fn find_nearest_tsconfig<'a>(
59    file_path: &str,
60    root: &Path,
61    tsconfigs: &'a HashMap<PathBuf, crate::parsers::tsconfig::PathAliasMap>,
62) -> Option<&'a crate::parsers::tsconfig::PathAliasMap> {
63    // Convert file_path to absolute path (relative to root)
64    let abs_file_path = if Path::new(file_path).is_absolute() {
65        PathBuf::from(file_path)
66    } else {
67        root.join(file_path)
68    };
69
70    // Start from the file's directory and walk up
71    let mut current_dir = abs_file_path.parent()?;
72
73    loop {
74        // Check if we have a tsconfig for this directory
75        if let Some(alias_map) = tsconfigs.get(current_dir) {
76            return Some(alias_map);
77        }
78
79        // Move up one directory
80        current_dir = current_dir.parent()?;
81
82        // Stop if we've reached the root
83        if current_dir == root || !current_dir.starts_with(root) {
84            break;
85        }
86    }
87
88    None
89}
90
91/// Manages the indexing process
92pub struct Indexer {
93    cache: CacheManager,
94    config: IndexConfig,
95}
96
97impl Indexer {
98    /// Create a new indexer with the given cache manager and config
99    pub fn new(cache: CacheManager, config: IndexConfig) -> Self {
100        Self { cache, config }
101    }
102
103    /// Build or update the index for the given root directory
104    pub fn index(&self, root: impl AsRef<Path>, show_progress: bool) -> Result<IndexStats> {
105        self.index_with_callback(root, show_progress, None)
106    }
107
108    /// Build or update the index with progress callback support
109    pub fn index_with_callback(
110        &self,
111        root: impl AsRef<Path>,
112        show_progress: bool,
113        progress_callback: Option<ProgressCallback>,
114    ) -> Result<IndexStats> {
115        let root = root.as_ref();
116        log::info!("Indexing directory: {:?}", root);
117
118        // Get git state (if in git repo)
119        let git_state = crate::git::get_git_state_optional(root)?;
120        let branch = git_state
121            .as_ref()
122            .map(|s| s.branch.clone())
123            .unwrap_or_else(|| "_default".to_string());
124
125        if let Some(ref state) = git_state {
126            log::info!(
127                "Git state: branch='{}', commit='{}', dirty={}",
128                state.branch,
129                state.commit,
130                state.dirty
131            );
132        } else {
133            log::info!("Not a git repository, using default branch");
134        }
135
136        // Configure thread pool for parallel processing
137        // 0 = auto (use 80% of available cores to avoid locking the system)
138        let num_threads = if self.config.parallel_threads == 0 {
139            let available_cores = std::thread::available_parallelism()
140                .map(|n| n.get())
141                .unwrap_or(4);
142            // Use 80% of available cores (minimum 1, maximum 8)
143            // Cap at 8 to prevent diminishing returns from cache contention on high-core systems
144            ((available_cores as f64 * 0.8).ceil() as usize).max(1).min(8)
145        } else {
146            self.config.parallel_threads
147        };
148
149        log::info!("Using {} threads for parallel indexing (out of {} available)",
150                   num_threads,
151                   std::thread::available_parallelism().map(|n| n.get()).unwrap_or(4));
152
153        // Ensure cache is initialized
154        self.cache.init()?;
155
156        // Check available disk space after cache is initialized
157        self.check_disk_space(root)?;
158
159        // Load existing hashes for incremental indexing (for current branch)
160        let existing_hashes = self.cache.load_hashes_for_branch(&branch)?;
161        log::debug!("Loaded {} existing file hashes for branch '{}'", existing_hashes.len(), branch);
162
163        // Step 1: Walk directory tree and collect files
164        let files = self.discover_files(root)?;
165        let total_files = files.len();
166        log::info!("Discovered {} files to index", total_files);
167
168        // Step 1.4: Parse tsconfig.json files for TypeScript/Vue path alias resolution
169        // Must be done before parallel processing so it's available during dependency extraction
170        let tsconfigs = crate::parsers::tsconfig::parse_all_tsconfigs(root)
171            .unwrap_or_else(|e| {
172                log::warn!("Failed to parse tsconfig.json files: {}", e);
173                HashMap::new()
174            });
175        if !tsconfigs.is_empty() {
176            log::info!("Found {} tsconfig.json files", tsconfigs.len());
177            for (config_dir, alias_map) in &tsconfigs {
178                log::debug!("  {} (base_url: {:?}, {} aliases)",
179                           config_dir.display(),
180                           alias_map.base_url,
181                           alias_map.aliases.len());
182            }
183        }
184
185        // Step 1.5: Quick incremental check - are all files unchanged?
186        // If yes, skip expensive rebuild entirely and return cached stats
187        if !existing_hashes.is_empty() && total_files == existing_hashes.len() {
188            // Same number of files - check if any changed by comparing hashes
189            let mut any_changed = false;
190
191            for file_path in &files {
192                // Normalize path to be relative to root (handles both ./ prefix and absolute paths)
193                let path_str = file_path.to_string_lossy().to_string();
194                let normalized_path = if let Ok(rel_path) = file_path.strip_prefix(root) {
195                    // Convert absolute path to relative
196                    rel_path.to_string_lossy().to_string()
197                } else {
198                    // Already relative, just strip ./ prefix
199                    path_str.trim_start_matches("./").to_string()
200                };
201
202                // Check if file exists in cache
203                if let Some(existing_hash) = existing_hashes.get(&normalized_path) {
204                    // Read and hash file to check if changed
205                    match std::fs::read_to_string(file_path) {
206                        Ok(content) => {
207                            let current_hash = self.hash_content(content.as_bytes());
208                            if &current_hash != existing_hash {
209                                any_changed = true;
210                                log::debug!("File changed: {}", path_str);
211                                break; // Early exit - we know we need to rebuild
212                            }
213                        }
214                        Err(_) => {
215                            any_changed = true;
216                            break;
217                        }
218                    }
219                } else {
220                    // File not in cache - something changed
221                    any_changed = true;
222                    break;
223                }
224            }
225
226            if !any_changed {
227                log::info!("No files changed - skipping index rebuild");
228                let stats = self.cache.stats()?;
229                return Ok(stats);
230            }
231        } else if total_files != existing_hashes.len() {
232            log::info!("File count changed ({} -> {}) - full reindex required",
233                       existing_hashes.len(), total_files);
234        }
235
236        // Step 2: Build trigram index + content store
237        let mut new_hashes = HashMap::new();
238        let mut files_indexed = 0;
239        let mut file_metadata: Vec<(String, String, String, usize)> = Vec::new(); // For batch SQLite update
240        let mut all_dependencies: Vec<(String, Vec<ImportInfo>)> = Vec::new(); // For batch dependency insertion
241        let mut all_exports: Vec<(String, Vec<ExportInfo>)> = Vec::new(); // For batch export insertion
242
243        // Initialize trigram index and content store
244        let mut trigram_index = TrigramIndex::new();
245        let mut content_writer = ContentWriter::new();
246
247        // Enable batch-flush mode for trigram index if we have lots of files
248        if total_files > 10000 {
249            let temp_dir = self.cache.path().join("trigram_temp");
250            trigram_index.enable_batch_flush(temp_dir)
251                .context("Failed to enable batch-flush mode for trigram index")?;
252            log::info!("Enabled batch-flush mode for {} files", total_files);
253        }
254
255        // Initialize content writer to start streaming writes immediately
256        let content_path = self.cache.path().join("content.bin");
257        content_writer.init(content_path.clone())
258            .context("Failed to initialize content writer")?;
259
260        // Create progress bar (only if requested via --progress flag)
261        let pb = if show_progress {
262            let pb = ProgressBar::new(total_files as u64);
263            pb.set_draw_target(ProgressDrawTarget::stderr());
264            pb.set_style(
265                ProgressStyle::default_bar()
266                    .template("[{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} files ({percent}%) {msg}")
267                    .unwrap()
268                    .progress_chars("=>-")
269            );
270            // Force updates every 100ms to ensure progress is visible
271            pb.enable_steady_tick(std::time::Duration::from_millis(100));
272            pb
273        } else {
274            ProgressBar::hidden()
275        };
276
277        // Atomic counter for thread-safe progress updates
278        let progress_counter = Arc::new(AtomicU64::new(0));
279        // Shared status message for progress callback
280        let progress_status = Arc::new(Mutex::new("Indexing files...".to_string()));
281
282        let _start_time = Instant::now();
283
284        // Spawn a background thread to update progress bar and call callback during parallel processing
285        let counter_for_thread = Arc::clone(&progress_counter);
286        let status_for_thread = Arc::clone(&progress_status);
287        let pb_clone = pb.clone();
288        let callback_for_thread = progress_callback.clone();
289        let total_files_for_thread = total_files;
290        let progress_thread = if show_progress || callback_for_thread.is_some() {
291            Some(std::thread::spawn(move || {
292                loop {
293                    let count = counter_for_thread.load(Ordering::Relaxed);
294                    pb_clone.set_position(count);
295
296                    // Call progress callback if provided
297                    if let Some(ref callback) = callback_for_thread {
298                        let status = status_for_thread.lock().unwrap().clone();
299                        callback(count as usize, total_files_for_thread, status);
300                    }
301
302                    if count >= total_files_for_thread as u64 {
303                        break;
304                    }
305                    std::thread::sleep(std::time::Duration::from_millis(50));
306                }
307            }))
308        } else {
309            None
310        };
311
312        // Build a custom thread pool with limited threads
313        let pool = rayon::ThreadPoolBuilder::new()
314            .num_threads(num_threads)
315            .build()
316            .context("Failed to create thread pool")?;
317
318        // Process files in batches to avoid OOM on huge codebases
319        // Batch size: process 5000 files at a time to limit memory usage
320        const BATCH_SIZE: usize = 5000;
321        let num_batches = total_files.div_ceil(BATCH_SIZE);
322        log::info!("Processing {} files in {} batches of up to {} files",
323                   total_files, num_batches, BATCH_SIZE);
324
325        for (batch_idx, batch_files) in files.chunks(BATCH_SIZE).enumerate() {
326            log::info!("Processing batch {}/{} ({} files)",
327                       batch_idx + 1, num_batches, batch_files.len());
328
329            // Process files in parallel using rayon with custom thread pool
330            let counter_clone = Arc::clone(&progress_counter);
331            let results: Vec<Option<FileProcessingResult>> = pool.install(|| {
332                batch_files
333                    .par_iter()
334                    .map(|file_path| {
335                // Normalize path to be relative to root (handles both ./ prefix and absolute paths)
336                let path_str = file_path.to_string_lossy().to_string();
337                let normalized_path = if let Ok(rel_path) = file_path.strip_prefix(root) {
338                    // Convert absolute path to relative
339                    rel_path.to_string_lossy().to_string()
340                } else {
341                    // Already relative, just strip ./ prefix
342                    path_str.trim_start_matches("./").to_string()
343                };
344
345                // Read file content once (used for hashing, trigrams, and parsing)
346                let content = match std::fs::read_to_string(&file_path) {
347                    Ok(c) => c,
348                    Err(e) => {
349                        log::warn!("Failed to read {}: {}", path_str, e);
350                        // Update progress
351                        counter_clone.fetch_add(1, Ordering::Relaxed);
352                        return None;
353                    }
354                };
355
356                // Compute hash from content (no duplicate file read!)
357                let hash = self.hash_content(content.as_bytes());
358
359                // Detect language
360                let ext = file_path.extension()
361                    .and_then(|e| e.to_str())
362                    .unwrap_or("");
363                let language = Language::from_extension(ext);
364
365                // Count lines in the file
366                let line_count = content.lines().count();
367
368                // Extract dependencies and exports for supported languages
369                let dependencies = match language {
370                    Language::Rust => {
371                        match RustDependencyExtractor::extract_dependencies(&content) {
372                            Ok(deps) => deps,
373                            Err(e) => {
374                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
375                                Vec::new()
376                            }
377                        }
378                    }
379                    Language::Python => {
380                        match PythonDependencyExtractor::extract_dependencies(&content) {
381                            Ok(deps) => deps,
382                            Err(e) => {
383                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
384                                Vec::new()
385                            }
386                        }
387                    }
388                    Language::TypeScript | Language::JavaScript => {
389                        // Find nearest tsconfig for path alias resolution
390                        let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
391                        match TypeScriptDependencyExtractor::extract_dependencies_with_alias_map(&content, alias_map) {
392                            Ok(deps) => deps,
393                            Err(e) => {
394                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
395                                Vec::new()
396                            }
397                        }
398                    }
399                    Language::Go => {
400                        match GoDependencyExtractor::extract_dependencies(&content) {
401                            Ok(deps) => deps,
402                            Err(e) => {
403                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
404                                Vec::new()
405                            }
406                        }
407                    }
408                    Language::Java => {
409                        match JavaDependencyExtractor::extract_dependencies(&content) {
410                            Ok(deps) => deps,
411                            Err(e) => {
412                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
413                                Vec::new()
414                            }
415                        }
416                    }
417                    Language::C => {
418                        match CDependencyExtractor::extract_dependencies(&content) {
419                            Ok(deps) => deps,
420                            Err(e) => {
421                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
422                                Vec::new()
423                            }
424                        }
425                    }
426                    Language::Cpp => {
427                        match CppDependencyExtractor::extract_dependencies(&content) {
428                            Ok(deps) => deps,
429                            Err(e) => {
430                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
431                                Vec::new()
432                            }
433                        }
434                    }
435                    Language::CSharp => {
436                        match CSharpDependencyExtractor::extract_dependencies(&content) {
437                            Ok(deps) => deps,
438                            Err(e) => {
439                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
440                                Vec::new()
441                            }
442                        }
443                    }
444                    Language::PHP => {
445                        match PhpDependencyExtractor::extract_dependencies(&content) {
446                            Ok(deps) => deps,
447                            Err(e) => {
448                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
449                                Vec::new()
450                            }
451                        }
452                    }
453                    Language::Ruby => {
454                        match RubyDependencyExtractor::extract_dependencies(&content) {
455                            Ok(deps) => deps,
456                            Err(e) => {
457                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
458                                Vec::new()
459                            }
460                        }
461                    }
462                    Language::Kotlin => {
463                        match KotlinDependencyExtractor::extract_dependencies(&content) {
464                            Ok(deps) => deps,
465                            Err(e) => {
466                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
467                                Vec::new()
468                            }
469                        }
470                    }
471                    Language::Zig => {
472                        match ZigDependencyExtractor::extract_dependencies(&content) {
473                            Ok(deps) => deps,
474                            Err(e) => {
475                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
476                                Vec::new()
477                            }
478                        }
479                    }
480                    Language::Vue => {
481                        // Find nearest tsconfig for path alias resolution
482                        let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
483                        match VueDependencyExtractor::extract_dependencies_with_alias_map(&content, alias_map) {
484                            Ok(deps) => deps,
485                            Err(e) => {
486                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
487                                Vec::new()
488                            }
489                        }
490                    }
491                    Language::Svelte => {
492                        match SvelteDependencyExtractor::extract_dependencies(&content) {
493                            Ok(deps) => deps,
494                            Err(e) => {
495                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
496                                Vec::new()
497                            }
498                        }
499                    }
500                    // Other languages not yet implemented
501                    _ => Vec::new(),
502                };
503
504                // Extract exports (for barrel export tracking)
505                let exports = match language {
506                    Language::TypeScript | Language::JavaScript => {
507                        // Find nearest tsconfig for path alias resolution
508                        let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
509                        match TypeScriptDependencyExtractor::extract_export_declarations(&content, alias_map) {
510                            Ok(exports) => exports,
511                            Err(e) => {
512                                log::warn!("Failed to extract exports from {}: {}", path_str, e);
513                                Vec::new()
514                            }
515                        }
516                    }
517                    Language::Vue => {
518                        // Find nearest tsconfig for path alias resolution
519                        let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
520                        match VueDependencyExtractor::extract_export_declarations(&content, alias_map) {
521                            Ok(exports) => exports,
522                            Err(e) => {
523                                log::warn!("Failed to extract exports from {}: {}", path_str, e);
524                                Vec::new()
525                            }
526                        }
527                    }
528                    // Other languages not yet implemented for export tracking
529                    _ => Vec::new(),
530                };
531
532                // Update progress atomically
533                counter_clone.fetch_add(1, Ordering::Relaxed);
534
535                Some(FileProcessingResult {
536                    path: file_path.clone(),
537                    path_str: normalized_path.to_string(),
538                    hash,
539                    content,
540                    language,
541                    line_count,
542                    dependencies,
543                    exports,
544                })
545                })
546                .collect()
547            });
548
549            // Process batch results immediately (streaming approach to minimize memory)
550            for result in results.into_iter().flatten() {
551                // Add file to trigram index (get file_id)
552                let file_id = trigram_index.add_file(result.path.clone());
553
554                // Index file content directly (avoid accumulating all trigrams)
555                trigram_index.index_file(file_id, &result.content);
556
557                // Add to content store
558                content_writer.add_file(result.path.clone(), &result.content);
559
560                files_indexed += 1;
561
562                // Prepare file metadata for batch database update
563                file_metadata.push((
564                    result.path_str.clone(),
565                    result.hash.clone(),
566                    format!("{:?}", result.language),
567                    result.line_count
568                ));
569
570                // Collect dependencies for batch insertion (if any)
571                if !result.dependencies.is_empty() {
572                    all_dependencies.push((result.path_str.clone(), result.dependencies));
573                }
574
575                // Collect exports for batch insertion (if any)
576                if !result.exports.is_empty() {
577                    all_exports.push((result.path_str.clone(), result.exports));
578                }
579
580                new_hashes.insert(result.path_str, result.hash);
581            }
582
583            // Flush trigram index batch to disk if batch-flush mode is enabled
584            if total_files > 10000 {
585                let flush_msg = format!("Flushing batch {}/{}...", batch_idx + 1, num_batches);
586                if show_progress {
587                    pb.set_message(flush_msg.clone());
588                }
589                *progress_status.lock().unwrap() = flush_msg;
590                trigram_index.flush_batch()
591                    .context("Failed to flush trigram batch")?;
592            }
593        }
594
595        // Wait for progress thread to finish
596        if let Some(thread) = progress_thread {
597            let _ = thread.join();
598        }
599
600        // Update progress bar to final count
601        if show_progress {
602            let final_count = progress_counter.load(Ordering::Relaxed);
603            pb.set_position(final_count);
604        }
605
606        // Finalize trigram index (sort and deduplicate posting lists)
607        *progress_status.lock().unwrap() = "Finalizing trigram index...".to_string();
608        if show_progress {
609            pb.set_message("Finalizing trigram index...".to_string());
610        }
611        trigram_index.finalize();
612
613        // Update progress bar message for post-processing
614        *progress_status.lock().unwrap() = "Writing file metadata to database...".to_string();
615        if show_progress {
616            pb.set_message("Writing file metadata to database...".to_string());
617        }
618
619        // Batch write file metadata AND branch hashes in a SINGLE atomic transaction
620        // This ensures that if files are inserted, their hashes are guaranteed to be inserted too
621        if !file_metadata.is_empty() {
622            // Prepare files data (path, language, line_count)
623            let files_without_hash: Vec<(String, String, usize)> = file_metadata
624                .iter()
625                .map(|(path, _hash, lang, lines)| (path.clone(), lang.clone(), *lines))
626                .collect();
627
628        // Record files for this branch (for branch-aware indexing)
629        *progress_status.lock().unwrap() = "Recording branch files...".to_string();
630        if show_progress {
631            pb.set_message("Recording branch files...".to_string());
632        }
633
634            // Prepare branch files data (path, hash)
635            let branch_files: Vec<(String, String)> = file_metadata
636                .iter()
637                .map(|(path, hash, _, _)| (path.clone(), hash.clone()))
638                .collect();
639
640            // Use atomic method that combines both operations
641            self.cache.batch_update_files_and_branch(
642                &files_without_hash,
643                &branch_files,
644                &branch,
645                git_state.as_ref().map(|s| s.commit.as_str()),
646            ).context("Failed to batch update files and branch hashes")?;
647
648            log::info!("Wrote metadata and hashes for {} files to database", file_metadata.len());
649        }
650
651        // Update branch metadata
652        self.cache.update_branch_metadata(
653            &branch,
654            git_state.as_ref().map(|s| s.commit.as_str()),
655            file_metadata.len(),
656            git_state.as_ref().map(|s| s.dirty).unwrap_or(false),
657        )?;
658
659        // Force WAL checkpoint to ensure background processes see all committed data
660        // This is critical when spawning background symbol indexer immediately after
661        self.cache.checkpoint_wal()
662            .context("Failed to checkpoint WAL")?;
663        log::debug!("WAL checkpoint completed - database is fully synced");
664
665        // Step 2.5: Insert dependencies (after files are inserted and have IDs)
666        if !all_dependencies.is_empty() {
667            *progress_status.lock().unwrap() = "Extracting dependencies...".to_string();
668            if show_progress {
669                pb.set_message("Extracting dependencies...".to_string());
670            }
671
672            // Find and parse all go.mod files for Go projects (monorepo support)
673            let go_modules = crate::parsers::go::parse_all_go_modules(root)
674                .unwrap_or_else(|e| {
675                    log::warn!("Failed to parse go.mod files: {}", e);
676                    Vec::new()
677                });
678            if !go_modules.is_empty() {
679                log::info!("Found {} Go modules", go_modules.len());
680                for module in &go_modules {
681                    log::debug!("  {} (project: {})", module.name, module.project_root);
682                }
683            }
684
685            // Find and parse all pom.xml/build.gradle files for Java projects (monorepo support)
686            let java_projects = crate::parsers::java::parse_all_java_projects(root)
687                .unwrap_or_else(|e| {
688                    log::warn!("Failed to parse Java project configs: {}", e);
689                    Vec::new()
690                });
691            if !java_projects.is_empty() {
692                log::info!("Found {} Java projects", java_projects.len());
693                for project in &java_projects {
694                    log::debug!("  {} (project: {})", project.package_name, project.project_root);
695                }
696            }
697
698            // Find and parse all Python package configs for Python projects (monorepo support)
699            let python_packages = crate::parsers::python::parse_all_python_packages(root)
700                .unwrap_or_else(|e| {
701                    log::warn!("Failed to parse Python package configs: {}", e);
702                    Vec::new()
703                });
704            if !python_packages.is_empty() {
705                log::info!("Found {} Python packages", python_packages.len());
706                for package in &python_packages {
707                    log::debug!("  {} (project: {})", package.name, package.project_root);
708                }
709            }
710
711            // Find and parse *.gemspec files for Ruby projects (monorepo support)
712            let ruby_projects = crate::parsers::ruby::parse_all_ruby_projects(root)
713                .unwrap_or_else(|e| {
714                    log::warn!("Failed to parse Ruby project configs: {}", e);
715                    Vec::new()
716                });
717            if !ruby_projects.is_empty() {
718                log::info!("Found {} Ruby projects", ruby_projects.len());
719                for project in &ruby_projects {
720                    log::debug!("  {} (project: {})", project.gem_name, project.project_root);
721                }
722            }
723
724            // Note: Kotlin projects use the same java_projects above (same build systems: Maven/Gradle)
725
726            // Find and parse all composer.json files for PHP projects (monorepo support)
727            let php_psr4_mappings = crate::parsers::php::parse_all_composer_psr4(root)
728                .unwrap_or_else(|e| {
729                    log::warn!("Failed to parse composer.json files: {}", e);
730                    Vec::new()
731                });
732            if !php_psr4_mappings.is_empty() {
733                log::info!("Found {} PSR-4 mappings from composer.json files", php_psr4_mappings.len());
734                for mapping in &php_psr4_mappings {
735                    log::debug!("  {} => {} (project: {})", mapping.namespace_prefix, mapping.directory, mapping.project_root);
736                }
737            }
738
739            // Find and parse all tsconfig.json files for TypeScript/Vue projects (monorepo support)
740            let tsconfigs = crate::parsers::tsconfig::parse_all_tsconfigs(root)
741                .unwrap_or_else(|e| {
742                    log::warn!("Failed to parse tsconfig.json files: {}", e);
743                    HashMap::new()
744                });
745            if !tsconfigs.is_empty() {
746                log::info!("Found {} tsconfig.json files", tsconfigs.len());
747                for (config_dir, alias_map) in &tsconfigs {
748                    log::debug!("  {} (base_url: {:?}, {} aliases)",
749                               config_dir.display(),
750                               alias_map.base_url,
751                               alias_map.aliases.len());
752                }
753            }
754
755            // Create dependency index to resolve paths and insert dependencies
756            let cache_for_deps = CacheManager::new(root);
757            let dep_index = DependencyIndex::new(cache_for_deps);
758
759            let mut total_deps_inserted = 0;
760
761            // Process each file's dependencies
762            for (file_path, import_infos) in all_dependencies {
763                // Get file ID from database
764                let file_id = match dep_index.get_file_id_by_path(&file_path)? {
765                    Some(id) => id,
766                    None => {
767                        log::warn!("File not found in database (skipping dependencies): {}", file_path);
768                        continue;
769                    }
770                };
771
772                // Reclassify and filter dependencies
773                let mut resolved_deps = Vec::new();
774
775                for mut import_info in import_infos {
776                    // Reclassify Go imports using module names (if Go project)
777                    if file_path.ends_with(".go") {
778                        // Check if the import matches any Go module
779                        let mut reclassified = false;
780                        for module in &go_modules {
781                            import_info.import_type = crate::parsers::go::reclassify_go_import(
782                                &import_info.imported_path,
783                                Some(&module.name),
784                            );
785                            // If it's internal, we've found the right module
786                            if matches!(import_info.import_type, ImportType::Internal) {
787                                reclassified = true;
788                                break;
789                            }
790                        }
791                        // If no module matched, use base classification
792                        if !reclassified {
793                            import_info.import_type = crate::parsers::go::reclassify_go_import(
794                                &import_info.imported_path,
795                                None,
796                            );
797                        }
798                    }
799
800                    // Reclassify Java imports using package names (if Java project)
801                    if file_path.ends_with(".java") {
802                        // Check if the import matches any Java project
803                        let mut reclassified = false;
804                        for project in &java_projects {
805                            import_info.import_type = crate::parsers::java::reclassify_java_import(
806                                &import_info.imported_path,
807                                Some(&project.package_name),
808                            );
809                            // If it's internal, we've found the right project
810                            if matches!(import_info.import_type, ImportType::Internal) {
811                                reclassified = true;
812                                break;
813                            }
814                        }
815                        // If no project matched, use base classification
816                        if !reclassified {
817                            import_info.import_type = crate::parsers::java::reclassify_java_import(
818                                &import_info.imported_path,
819                                None,
820                            );
821                        }
822                    }
823
824                    // Reclassify Python imports using package names (if Python project)
825                    if file_path.ends_with(".py") {
826                        // Check if the import matches any Python package
827                        let mut reclassified = false;
828                        for package in &python_packages {
829                            import_info.import_type = crate::parsers::python::reclassify_python_import(
830                                &import_info.imported_path,
831                                Some(&package.name),
832                            );
833                            // If it's internal, we've found the right package
834                            if matches!(import_info.import_type, ImportType::Internal) {
835                                reclassified = true;
836                                break;
837                            }
838                        }
839                        // If no package matched, use base classification
840                        if !reclassified {
841                            import_info.import_type = crate::parsers::python::reclassify_python_import(
842                                &import_info.imported_path,
843                                None,
844                            );
845                        }
846                    }
847
848                    // Reclassify Ruby imports using gem names (if Ruby project)
849                    if file_path.ends_with(".rb") || file_path.ends_with(".rake") || file_path.ends_with(".gemspec") {
850                        // Check if the import matches any Ruby project
851                        let mut reclassified = false;
852                        for project in &ruby_projects {
853                            let gem_names = vec![project.gem_name.clone()];
854                            import_info.import_type = crate::parsers::ruby::reclassify_ruby_import(
855                                &import_info.imported_path,
856                                &gem_names,
857                            );
858                            // If it's internal, we've found the right project
859                            if matches!(import_info.import_type, ImportType::Internal) {
860                                reclassified = true;
861                                break;
862                            }
863                        }
864                        // If no project matched, use base classification (will be External or Stdlib)
865                        if !reclassified {
866                            import_info.import_type = crate::parsers::ruby::reclassify_ruby_import(
867                                &import_info.imported_path,
868                                &[],
869                            );
870                        }
871                    }
872
873                    // Reclassify Kotlin imports using package names (if Kotlin project)
874                    if file_path.ends_with(".kt") || file_path.ends_with(".kts") {
875                        // Check if the import matches any Java/Kotlin project (same build systems)
876                        let mut reclassified = false;
877                        for project in &java_projects {
878                            import_info.import_type = crate::parsers::kotlin::reclassify_kotlin_import(
879                                &import_info.imported_path,
880                                Some(&project.package_name),
881                            );
882                            // If it's internal, we've found the right project
883                            if matches!(import_info.import_type, ImportType::Internal) {
884                                reclassified = true;
885                                break;
886                            }
887                        }
888                        // If no project matched, use base classification
889                        if !reclassified {
890                            import_info.import_type = crate::parsers::kotlin::reclassify_kotlin_import(
891                                &import_info.imported_path,
892                                None,
893                            );
894                        }
895                    }
896
897                    // ONLY insert Internal dependencies - skip External and Stdlib
898                    if !matches!(import_info.import_type, ImportType::Internal) {
899                        continue;
900                    }
901
902                    // Resolve PHP dependencies using PSR-4 (deterministic)
903                    let resolved_file_id = if file_path.ends_with(".php") && !php_psr4_mappings.is_empty() {
904                        // Use PSR-4 to resolve namespace to file path
905                        if let Some(resolved_path) = crate::parsers::php::resolve_php_namespace_to_path(
906                            &import_info.imported_path,
907                            &php_psr4_mappings,
908                        ) {
909                            // Look up file ID in database using exact match
910                            match dep_index.get_file_id_by_path(&resolved_path)? {
911                                Some(id) => {
912                                    log::trace!("Resolved PHP dependency: {} -> {} (file_id={})",
913                                               import_info.imported_path, resolved_path, id);
914                                    Some(id)
915                                }
916                                None => {
917                                    log::trace!("PHP dependency resolved to path but file not in index: {} -> {}",
918                                               import_info.imported_path, resolved_path);
919                                    None
920                                }
921                            }
922                        } else {
923                            log::trace!("Could not resolve PHP namespace using PSR-4: {}",
924                                       import_info.imported_path);
925                            None
926                        }
927                    } else if file_path.ends_with(".py") && !python_packages.is_empty() {
928                        // Resolve Python dependencies using package mappings
929                        if let Some(resolved_path) = crate::parsers::python::resolve_python_import_to_path(
930                            &import_info.imported_path,
931                            &python_packages,
932                            Some(&file_path),
933                        ) {
934                            // Look up file ID in database using exact match
935                            match dep_index.get_file_id_by_path(&resolved_path)? {
936                                Some(id) => {
937                                    log::trace!("Resolved Python dependency: {} -> {} (file_id={})",
938                                               import_info.imported_path, resolved_path, id);
939                                    Some(id)
940                                }
941                                None => {
942                                    log::trace!("Python dependency resolved to path but file not in index: {} -> {}",
943                                               import_info.imported_path, resolved_path);
944                                    None
945                                }
946                            }
947                        } else {
948                            log::trace!("Could not resolve Python import: {}", import_info.imported_path);
949                            None
950                        }
951                    } else if file_path.ends_with(".go") && !go_modules.is_empty() {
952                        // Resolve Go dependencies using module mappings
953                        if let Some(resolved_path) = crate::parsers::go::resolve_go_import_to_path(
954                            &import_info.imported_path,
955                            &go_modules,
956                            Some(&file_path),
957                        ) {
958                            // Look up file ID in database using exact match
959                            match dep_index.get_file_id_by_path(&resolved_path)? {
960                                Some(id) => {
961                                    log::trace!("Resolved Go dependency: {} -> {} (file_id={})",
962                                               import_info.imported_path, resolved_path, id);
963                                    Some(id)
964                                }
965                                None => {
966                                    log::trace!("Go dependency resolved to path but file not in index: {} -> {}",
967                                               import_info.imported_path, resolved_path);
968                                    None
969                                }
970                            }
971                        } else {
972                            log::trace!("Could not resolve Go import: {}", import_info.imported_path);
973                            None
974                        }
975                    } else if file_path.ends_with(".ts") || file_path.ends_with(".tsx")
976                            || file_path.ends_with(".js") || file_path.ends_with(".jsx")
977                            || file_path.ends_with(".mts") || file_path.ends_with(".cts")
978                            || file_path.ends_with(".mjs") || file_path.ends_with(".cjs") {
979                        // Resolve TypeScript/JavaScript dependencies (relative imports and path aliases)
980                        let alias_map = find_nearest_tsconfig(&file_path, root, &tsconfigs);
981                        if let Some(candidates_str) = crate::parsers::typescript::resolve_ts_import_to_path(
982                            &import_info.imported_path,
983                            Some(&file_path),
984                            alias_map,
985                        ) {
986                            // Parse pipe-delimited candidates (e.g., "path.tsx|path.ts|path.jsx|path.js")
987                            let candidates: Vec<&str> = candidates_str.split('|').collect();
988
989                            // Try each candidate in order until we find one in the database
990                            let mut resolved_id = None;
991                            for candidate_path in candidates {
992                                // Normalize path to be relative to project root
993                                // Convert absolute paths to relative (without requiring file to exist)
994                                let normalized_candidate = if let Ok(rel_path) = std::path::Path::new(candidate_path).strip_prefix(root) {
995                                    rel_path.to_string_lossy().to_string()
996                                } else {
997                                    // Not an absolute path or not under root - use as-is
998                                    candidate_path.to_string()
999                                };
1000
1001                                log::debug!("Looking up TS/JS candidate: '{}' (from '{}')", normalized_candidate, candidate_path);
1002                                match dep_index.get_file_id_by_path(&normalized_candidate)? {
1003                                    Some(id) => {
1004                                        log::debug!("Resolved TS/JS dependency: {} -> {} (file_id={})",
1005                                                   import_info.imported_path, normalized_candidate, id);
1006                                        resolved_id = Some(id);
1007                                        break; // Found a match, stop trying
1008                                    }
1009                                    None => {
1010                                        log::trace!("TS/JS candidate not in index: {}", candidate_path);
1011                                    }
1012                                }
1013                            }
1014
1015                            if resolved_id.is_none() {
1016                                log::trace!("TS/JS dependency: no matching file found in database for any candidate: {}",
1017                                           candidates_str);
1018                            }
1019
1020                            resolved_id
1021                        } else {
1022                            log::trace!("Could not resolve TS/JS import (non-relative or external): {}", import_info.imported_path);
1023                            None
1024                        }
1025                    } else if file_path.ends_with(".rs") {
1026                        // Resolve Rust dependencies (crate::, super::, self::, mod declarations)
1027                        if let Some(resolved_path) = crate::parsers::rust::resolve_rust_use_to_path(
1028                            &import_info.imported_path,
1029                            Some(&file_path),
1030                            Some(root.to_str().unwrap_or("")),
1031                        ) {
1032                            // Look up file ID in database using exact match
1033                            match dep_index.get_file_id_by_path(&resolved_path)? {
1034                                Some(id) => {
1035                                    log::trace!("Resolved Rust dependency: {} -> {} (file_id={})",
1036                                               import_info.imported_path, resolved_path, id);
1037                                    Some(id)
1038                                }
1039                                None => {
1040                                    log::trace!("Rust dependency resolved to path but file not in index: {} -> {}",
1041                                               import_info.imported_path, resolved_path);
1042                                    None
1043                                }
1044                            }
1045                        } else {
1046                            log::trace!("Could not resolve Rust import (external or stdlib): {}", import_info.imported_path);
1047                            None
1048                        }
1049                    } else if file_path.ends_with(".java") && !java_projects.is_empty() {
1050                        // Resolve Java dependencies using project mappings
1051                        if let Some(resolved_path) = crate::parsers::java::resolve_java_import_to_path(
1052                            &import_info.imported_path,
1053                            &java_projects,
1054                            Some(&file_path),
1055                        ) {
1056                            // Look up file ID in database using exact match
1057                            match dep_index.get_file_id_by_path(&resolved_path)? {
1058                                Some(id) => {
1059                                    log::trace!("Resolved Java dependency: {} -> {} (file_id={})",
1060                                               import_info.imported_path, resolved_path, id);
1061                                    Some(id)
1062                                }
1063                                None => {
1064                                    log::trace!("Java dependency resolved to path but file not in index: {} -> {}",
1065                                               import_info.imported_path, resolved_path);
1066                                    None
1067                                }
1068                            }
1069                        } else {
1070                            log::trace!("Could not resolve Java import: {}", import_info.imported_path);
1071                            None
1072                        }
1073                    } else if (file_path.ends_with(".kt") || file_path.ends_with(".kts")) && !java_projects.is_empty() {
1074                        // Resolve Kotlin dependencies using project mappings (same build systems as Java)
1075                        if let Some(resolved_path) = crate::parsers::java::resolve_kotlin_import_to_path(
1076                            &import_info.imported_path,
1077                            &java_projects,
1078                            Some(&file_path),
1079                        ) {
1080                            // Look up file ID in database using exact match
1081                            match dep_index.get_file_id_by_path(&resolved_path)? {
1082                                Some(id) => {
1083                                    log::trace!("Resolved Kotlin dependency: {} -> {} (file_id={})",
1084                                               import_info.imported_path, resolved_path, id);
1085                                    Some(id)
1086                                }
1087                                None => {
1088                                    log::trace!("Kotlin dependency resolved to path but file not in index: {} -> {}",
1089                                               import_info.imported_path, resolved_path);
1090                                    None
1091                                }
1092                            }
1093                        } else {
1094                            log::trace!("Could not resolve Kotlin import: {}", import_info.imported_path);
1095                            None
1096                        }
1097                    } else if (file_path.ends_with(".rb") || file_path.ends_with(".rake") || file_path.ends_with(".gemspec")) && !ruby_projects.is_empty() {
1098                        // Resolve Ruby dependencies using project mappings
1099                        if let Some(resolved_path) = crate::parsers::ruby::resolve_ruby_require_to_path(
1100                            &import_info.imported_path,
1101                            &ruby_projects,
1102                            Some(&file_path),
1103                        ) {
1104                            // Look up file ID in database using exact match
1105                            match dep_index.get_file_id_by_path(&resolved_path)? {
1106                                Some(id) => {
1107                                    log::trace!("Resolved Ruby dependency: {} -> {} (file_id={})",
1108                                               import_info.imported_path, resolved_path, id);
1109                                    Some(id)
1110                                }
1111                                None => {
1112                                    log::trace!("Ruby dependency resolved to path but file not in index: {} -> {}",
1113                                               import_info.imported_path, resolved_path);
1114                                    None
1115                                }
1116                            }
1117                        } else {
1118                            log::trace!("Could not resolve Ruby require: {}", import_info.imported_path);
1119                            None
1120                        }
1121                    } else if file_path.ends_with(".c") || file_path.ends_with(".h") {
1122                        // Resolve C dependencies (relative #include paths)
1123                        if let Some(resolved_path) = crate::parsers::c::resolve_c_include_to_path(
1124                            &import_info.imported_path,
1125                            Some(&file_path),
1126                        ) {
1127                            // Look up file ID in database using exact match
1128                            match dep_index.get_file_id_by_path(&resolved_path)? {
1129                                Some(id) => {
1130                                    log::trace!("Resolved C dependency: {} -> {} (file_id={})",
1131                                               import_info.imported_path, resolved_path, id);
1132                                    Some(id)
1133                                }
1134                                None => {
1135                                    log::trace!("C dependency resolved to path but file not in index: {} -> {}",
1136                                               import_info.imported_path, resolved_path);
1137                                    None
1138                                }
1139                            }
1140                        } else {
1141                            log::trace!("Could not resolve C include (system header): {}", import_info.imported_path);
1142                            None
1143                        }
1144                    } else if file_path.ends_with(".cpp") || file_path.ends_with(".cc") || file_path.ends_with(".cxx")
1145                           || file_path.ends_with(".hpp") || file_path.ends_with(".hxx") || file_path.ends_with(".h++")
1146                           || file_path.ends_with(".C") || file_path.ends_with(".H") {
1147                        // Resolve C++ dependencies (relative #include paths)
1148                        if let Some(resolved_path) = crate::parsers::cpp::resolve_cpp_include_to_path(
1149                            &import_info.imported_path,
1150                            Some(&file_path),
1151                        ) {
1152                            // Look up file ID in database using exact match
1153                            match dep_index.get_file_id_by_path(&resolved_path)? {
1154                                Some(id) => {
1155                                    log::trace!("Resolved C++ dependency: {} -> {} (file_id={})",
1156                                               import_info.imported_path, resolved_path, id);
1157                                    Some(id)
1158                                }
1159                                None => {
1160                                    log::trace!("C++ dependency resolved to path but file not in index: {} -> {}",
1161                                               import_info.imported_path, resolved_path);
1162                                    None
1163                                }
1164                            }
1165                        } else {
1166                            log::trace!("Could not resolve C++ include (system header): {}", import_info.imported_path);
1167                            None
1168                        }
1169                    } else if file_path.ends_with(".cs") {
1170                        // Resolve C# dependencies (using namespace-to-path mapping)
1171                        if let Some(resolved_path) = crate::parsers::csharp::resolve_csharp_using_to_path(
1172                            &import_info.imported_path,
1173                            Some(&file_path),
1174                        ) {
1175                            // Look up file ID in database using exact match
1176                            match dep_index.get_file_id_by_path(&resolved_path)? {
1177                                Some(id) => {
1178                                    log::trace!("Resolved C# dependency: {} -> {} (file_id={})",
1179                                               import_info.imported_path, resolved_path, id);
1180                                    Some(id)
1181                                }
1182                                None => {
1183                                    log::trace!("C# dependency resolved to path but file not in index: {} -> {}",
1184                                               import_info.imported_path, resolved_path);
1185                                    None
1186                                }
1187                            }
1188                        } else {
1189                            log::trace!("Could not resolve C# using directive: {}", import_info.imported_path);
1190                            None
1191                        }
1192                    } else if file_path.ends_with(".zig") {
1193                        // Resolve Zig dependencies (relative @import paths)
1194                        if let Some(resolved_path) = crate::parsers::zig::resolve_zig_import_to_path(
1195                            &import_info.imported_path,
1196                            Some(&file_path),
1197                        ) {
1198                            // Look up file ID in database using exact match
1199                            match dep_index.get_file_id_by_path(&resolved_path)? {
1200                                Some(id) => {
1201                                    log::trace!("Resolved Zig dependency: {} -> {} (file_id={})",
1202                                               import_info.imported_path, resolved_path, id);
1203                                    Some(id)
1204                                }
1205                                None => {
1206                                    log::trace!("Zig dependency resolved to path but file not in index: {} -> {}",
1207                                               import_info.imported_path, resolved_path);
1208                                    None
1209                                }
1210                            }
1211                        } else {
1212                            log::trace!("Could not resolve Zig import (external or stdlib): {}", import_info.imported_path);
1213                            None
1214                        }
1215                    } else if file_path.ends_with(".vue") || file_path.ends_with(".svelte") {
1216                        // Resolve Vue/Svelte dependencies (use TypeScript/JavaScript resolver for imports in <script> blocks)
1217                        let alias_map = find_nearest_tsconfig(&file_path, root, &tsconfigs);
1218                        if let Some(candidates_str) = crate::parsers::typescript::resolve_ts_import_to_path(
1219                            &import_info.imported_path,
1220                            Some(&file_path),
1221                            alias_map,
1222                        ) {
1223                            // Parse pipe-delimited candidates (e.g., "path.tsx|path.ts|path.jsx|path.js")
1224                            let candidates: Vec<&str> = candidates_str.split('|').collect();
1225
1226                            // Try each candidate in order until we find one in the database
1227                            let mut resolved_id = None;
1228                            for candidate_path in candidates {
1229                                // Normalize path to be relative to project root
1230                                // Convert absolute paths to relative (without requiring file to exist)
1231                                let normalized_candidate = if let Ok(rel_path) = std::path::Path::new(candidate_path).strip_prefix(root) {
1232                                    rel_path.to_string_lossy().to_string()
1233                                } else {
1234                                    // Not an absolute path or not under root - use as-is
1235                                    candidate_path.to_string()
1236                                };
1237
1238                                match dep_index.get_file_id_by_path(&normalized_candidate)? {
1239                                    Some(id) => {
1240                                        log::trace!("Resolved Vue/Svelte dependency: {} -> {} (file_id={})",
1241                                                   import_info.imported_path, candidate_path, id);
1242                                        resolved_id = Some(id);
1243                                        break; // Found a match, stop trying
1244                                    }
1245                                    None => {
1246                                        log::trace!("Vue/Svelte candidate not in index: {}", candidate_path);
1247                                    }
1248                                }
1249                            }
1250
1251                            if resolved_id.is_none() {
1252                                log::trace!("Vue/Svelte dependency: no matching file found in database for any candidate: {}",
1253                                           candidates_str);
1254                            }
1255
1256                            resolved_id
1257                        } else {
1258                            log::trace!("Could not resolve Vue/Svelte import (non-relative or external): {}", import_info.imported_path);
1259                            None
1260                        }
1261                    } else {
1262                        None
1263                    };
1264
1265                    // resolved_file_id will be populated using deterministic language-specific resolution
1266                    // All language resolvers have been implemented!
1267                    resolved_deps.push(Dependency {
1268                        file_id,
1269                        imported_path: import_info.imported_path.clone(),
1270                        resolved_file_id,
1271                        import_type: import_info.import_type,
1272                        line_number: import_info.line_number,
1273                        imported_symbols: import_info.imported_symbols.clone(),
1274                    });
1275                }
1276
1277                // Clear existing dependencies for this file (incremental reindex)
1278                dep_index.clear_dependencies(file_id)?;
1279
1280                // Batch insert dependencies
1281                if !resolved_deps.is_empty() {
1282                    dep_index.batch_insert_dependencies(&resolved_deps)?;
1283                    total_deps_inserted += resolved_deps.len();
1284                }
1285            }
1286
1287            log::info!("Extracted {} dependencies", total_deps_inserted);
1288        }
1289
1290        // Step 2.6: Insert exports (after files are inserted and have IDs)
1291        if !all_exports.is_empty() {
1292            *progress_status.lock().unwrap() = "Extracting exports...".to_string();
1293            if show_progress {
1294                pb.set_message("Extracting exports...".to_string());
1295            }
1296
1297            // Reuse the tsconfigs parsed earlier for TypeScript/Vue path alias resolution
1298            let tsconfigs = crate::parsers::tsconfig::parse_all_tsconfigs(root)
1299                .unwrap_or_else(|e| {
1300                    log::warn!("Failed to parse tsconfig.json files: {}", e);
1301                    HashMap::new()
1302                });
1303
1304            // Create dependency index to resolve paths and insert exports
1305            let cache_for_exports = CacheManager::new(root);
1306            let dep_index = DependencyIndex::new(cache_for_exports);
1307
1308            let mut total_exports_inserted = 0;
1309
1310            // Process each file's exports
1311            for (file_path, export_infos) in all_exports {
1312                // Get file ID from database
1313                let file_id = match dep_index.get_file_id_by_path(&file_path)? {
1314                    Some(id) => id,
1315                    None => {
1316                        log::warn!("File not found in database (skipping exports): {}", file_path);
1317                        continue;
1318                    }
1319                };
1320
1321                // Resolve export source paths and insert
1322                for export_info in export_infos {
1323                    // Resolve export source path (same logic as imports)
1324                    let resolved_source_id = if file_path.ends_with(".ts") || file_path.ends_with(".tsx")
1325                            || file_path.ends_with(".js") || file_path.ends_with(".jsx")
1326                            || file_path.ends_with(".mts") || file_path.ends_with(".cts")
1327                            || file_path.ends_with(".mjs") || file_path.ends_with(".cjs")
1328                            || file_path.ends_with(".vue") {
1329                        // Resolve TypeScript/JavaScript/Vue export paths (relative imports and path aliases)
1330                        let alias_map = find_nearest_tsconfig(&file_path, root, &tsconfigs);
1331                        if let Some(candidates_str) = crate::parsers::typescript::resolve_ts_import_to_path(
1332                            &export_info.source_path,
1333                            Some(&file_path),
1334                            alias_map,
1335                        ) {
1336                            // Parse pipe-delimited candidates (e.g., "path.tsx|path.ts|path.jsx|path.js|path.vue")
1337                            let candidates: Vec<&str> = candidates_str.split('|').collect();
1338
1339                            // Try each candidate in order until we find one in the database
1340                            let mut resolved_id = None;
1341                            for candidate_path in candidates {
1342                                // Normalize path to be relative to project root
1343                                let normalized_candidate = if let Ok(rel_path) = std::path::Path::new(candidate_path).strip_prefix(root) {
1344                                    rel_path.to_string_lossy().to_string()
1345                                } else {
1346                                    candidate_path.to_string()
1347                                };
1348
1349                                match dep_index.get_file_id_by_path(&normalized_candidate)? {
1350                                    Some(id) => {
1351                                        log::trace!("Resolved export source: {} -> {} (file_id={})",
1352                                                   export_info.source_path, normalized_candidate, id);
1353                                        resolved_id = Some(id);
1354                                        break; // Found a match, stop trying
1355                                    }
1356                                    None => {
1357                                        log::trace!("Export source candidate not in index: {}", candidate_path);
1358                                    }
1359                                }
1360                            }
1361
1362                            if resolved_id.is_none() {
1363                                log::trace!("Export source: no matching file found in database for any candidate: {}",
1364                                           candidates_str);
1365                            }
1366
1367                            resolved_id
1368                        } else {
1369                            log::trace!("Could not resolve export source (non-relative or external): {}", export_info.source_path);
1370                            None
1371                        }
1372                    } else {
1373                        None
1374                    };
1375
1376                    // Insert export into database
1377                    dep_index.insert_export(
1378                        file_id,
1379                        export_info.exported_symbol,
1380                        export_info.source_path,
1381                        resolved_source_id,
1382                        export_info.line_number,
1383                    )?;
1384
1385                    total_exports_inserted += 1;
1386                }
1387            }
1388
1389            log::info!("Extracted {} exports", total_exports_inserted);
1390        }
1391
1392        log::info!("Indexed {} files", files_indexed);
1393
1394        // Step 3: Write trigram index
1395        *progress_status.lock().unwrap() = "Writing trigram index...".to_string();
1396        if show_progress {
1397            pb.set_message("Writing trigram index...".to_string());
1398        }
1399        let trigrams_path = self.cache.path().join("trigrams.bin");
1400        log::info!("Writing trigram index with {} trigrams to trigrams.bin",
1401                   trigram_index.trigram_count());
1402
1403        trigram_index.write(&trigrams_path)
1404            .context("Failed to write trigram index")?;
1405        log::info!("Wrote {} files to trigrams.bin", trigram_index.file_count());
1406
1407        // Step 4: Finalize content store (already been writing incrementally)
1408        *progress_status.lock().unwrap() = "Finalizing content store...".to_string();
1409        if show_progress {
1410            pb.set_message("Finalizing content store...".to_string());
1411        }
1412        content_writer.finalize_if_needed()
1413            .context("Failed to finalize content store")?;
1414        log::info!("Wrote {} files ({} bytes) to content.bin",
1415                   content_writer.file_count(), content_writer.content_size());
1416
1417        // Step 5: Update SQLite statistics from database totals (branch-aware)
1418        *progress_status.lock().unwrap() = "Updating statistics...".to_string();
1419        if show_progress {
1420            pb.set_message("Updating statistics...".to_string());
1421        }
1422        // Update stats for current branch only
1423        self.cache.update_stats(&branch)?;
1424
1425        // Update schema hash to mark cache as compatible with current binary
1426        self.cache.update_schema_hash()?;
1427
1428        pb.finish_with_message("Indexing complete");
1429
1430        // Return stats
1431        let stats = self.cache.stats()?;
1432        log::info!("Indexing complete: {} files",
1433                   stats.total_files);
1434
1435        Ok(stats)
1436    }
1437
1438    /// Discover all indexable files in the directory tree
1439    fn discover_files(&self, root: &Path) -> Result<Vec<PathBuf>> {
1440        let mut files = Vec::new();
1441
1442        // WalkBuilder from ignore crate automatically respects:
1443        // - .gitignore (when in a git repo)
1444        // - .ignore files
1445        // - Hidden files (can be configured)
1446        let walker = WalkBuilder::new(root)
1447            .follow_links(self.config.follow_symlinks)
1448            .git_ignore(true)  // Explicitly enable gitignore support (enabled by default, but be explicit)
1449            .git_global(false) // Don't use global gitignore
1450            .git_exclude(false) // Don't use .git/info/exclude
1451            .build();
1452
1453        for entry in walker {
1454            let entry = entry?;
1455            let path = entry.path();
1456
1457            // Only process files (not directories)
1458            if !entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) {
1459                continue;
1460            }
1461
1462            // Check if should be indexed
1463            if self.should_index(path) {
1464                files.push(path.to_path_buf());
1465            }
1466        }
1467
1468        Ok(files)
1469    }
1470
1471    /// Check if a file should be indexed based on config
1472    fn should_index(&self, path: &Path) -> bool {
1473        // Check file extension for supported languages
1474        let ext = match path.extension() {
1475            Some(ext) => ext.to_string_lossy(),
1476            None => return false,
1477        };
1478
1479        let lang = Language::from_extension(&ext);
1480
1481        // Only index files for languages with parser implementations
1482        if !lang.is_supported() {
1483            if !matches!(lang, Language::Unknown) {
1484                log::debug!("Skipping {} ({:?} parser not yet implemented)",
1485                           path.display(), lang);
1486            }
1487            return false;
1488        }
1489
1490        // Check file size limits
1491        if let Ok(metadata) = std::fs::metadata(path) {
1492            if metadata.len() > self.config.max_file_size as u64 {
1493                log::debug!("Skipping {} (too large: {} bytes)",
1494                           path.display(), metadata.len());
1495                return false;
1496            }
1497        }
1498
1499        // TODO: Check include/exclude patterns when glob support is added
1500        // For now, accept all files with supported language extensions
1501
1502        true
1503    }
1504
1505    /// Compute blake3 hash from file contents for change detection
1506    fn hash_content(&self, content: &[u8]) -> String {
1507        let hash = blake3::hash(content);
1508        hash.to_hex().to_string()
1509    }
1510
1511    /// Check available disk space before indexing
1512    ///
1513    /// Ensures there's enough free space to create the index. Warns if disk space is low.
1514    /// This prevents partial index writes and confusing error messages.
1515    fn check_disk_space(&self, root: &Path) -> Result<()> {
1516        // Get available space on the filesystem containing the cache directory
1517        let cache_path = self.cache.path();
1518
1519        // Use statvfs on Unix systems
1520        #[cfg(unix)]
1521        {
1522            // On Linux, we can use statvfs to get available space
1523            // For now, we'll use a simple heuristic: warn if we can't write a test file
1524            let test_file = cache_path.join(".space_check");
1525            match std::fs::write(&test_file, b"test") {
1526                Ok(_) => {
1527                    let _ = std::fs::remove_file(&test_file);
1528
1529                    // Try to estimate available space using df command
1530                    if let Ok(output) = std::process::Command::new("df")
1531                        .arg("-k")
1532                        .arg(cache_path.parent().unwrap_or(root))
1533                        .output()
1534                    {
1535                        if let Ok(df_output) = String::from_utf8(output.stdout) {
1536                            // Parse df output to get available KB
1537                            if let Some(line) = df_output.lines().nth(1) {
1538                                let parts: Vec<&str> = line.split_whitespace().collect();
1539                                if parts.len() >= 4 {
1540                                    if let Ok(available_kb) = parts[3].parse::<u64>() {
1541                                        let available_mb = available_kb / 1024;
1542
1543                                        // Warn if less than 100MB available
1544                                        if available_mb < 100 {
1545                                            log::warn!("Low disk space: only {}MB available. Indexing may fail.", available_mb);
1546                                            output::warn(&format!("Low disk space ({}MB available). Consider freeing up space.", available_mb));
1547                                        } else {
1548                                            log::debug!("Available disk space: {}MB", available_mb);
1549                                        }
1550                                    }
1551                                }
1552                            }
1553                        }
1554                    }
1555
1556                    Ok(())
1557                }
1558                Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
1559                    anyhow::bail!(
1560                        "Permission denied writing to cache directory: {}. Check file permissions.",
1561                        cache_path.display()
1562                    )
1563                }
1564                Err(e) => {
1565                    // If we can't write, it might be a disk space issue
1566                    log::warn!("Failed to write test file (possible disk space issue): {}", e);
1567                    Err(e).context("Failed to verify disk space - indexing may fail due to insufficient space")
1568                }
1569            }
1570        }
1571
1572        #[cfg(not(unix))]
1573        {
1574            // On Windows, try to write a test file
1575            let test_file = cache_path.join(".space_check");
1576            match std::fs::write(&test_file, b"test") {
1577                Ok(_) => {
1578                    let _ = std::fs::remove_file(&test_file);
1579                    Ok(())
1580                }
1581                Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
1582                    anyhow::bail!(
1583                        "Permission denied writing to cache directory: {}. Check file permissions.",
1584                        cache_path.display()
1585                    )
1586                }
1587                Err(e) => {
1588                    log::warn!("Failed to write test file (possible disk space issue): {}", e);
1589                    Err(e).context("Failed to verify disk space - indexing may fail due to insufficient space")
1590                }
1591            }
1592        }
1593    }
1594}
1595
1596#[cfg(test)]
1597mod tests {
1598    use super::*;
1599    use tempfile::TempDir;
1600    use std::fs;
1601
1602    #[test]
1603    fn test_indexer_creation() {
1604        let temp = TempDir::new().unwrap();
1605        let cache = CacheManager::new(temp.path());
1606        let config = IndexConfig::default();
1607        let indexer = Indexer::new(cache, config);
1608
1609        assert!(indexer.cache.path().ends_with(".reflex"));
1610    }
1611
1612    #[test]
1613    fn test_hash_content() {
1614        let temp = TempDir::new().unwrap();
1615        let cache = CacheManager::new(temp.path());
1616        let config = IndexConfig::default();
1617        let indexer = Indexer::new(cache, config);
1618
1619        let content1 = b"hello world";
1620        let content2 = b"hello world";
1621        let content3 = b"different content";
1622
1623        let hash1 = indexer.hash_content(content1);
1624        let hash2 = indexer.hash_content(content2);
1625        let hash3 = indexer.hash_content(content3);
1626
1627        // Same content should produce same hash
1628        assert_eq!(hash1, hash2);
1629
1630        // Different content should produce different hash
1631        assert_ne!(hash1, hash3);
1632
1633        // Hash should be hex string
1634        assert_eq!(hash1.len(), 64); // blake3 hash is 32 bytes = 64 hex chars
1635    }
1636
1637    #[test]
1638    fn test_should_index_rust_file() {
1639        let temp = TempDir::new().unwrap();
1640        let cache = CacheManager::new(temp.path());
1641        let config = IndexConfig::default();
1642        let indexer = Indexer::new(cache, config);
1643
1644        // Create a small Rust file
1645        let rust_file = temp.path().join("test.rs");
1646        fs::write(&rust_file, "fn main() {}").unwrap();
1647
1648        assert!(indexer.should_index(&rust_file));
1649    }
1650
1651    #[test]
1652    fn test_should_index_unsupported_extension() {
1653        let temp = TempDir::new().unwrap();
1654        let cache = CacheManager::new(temp.path());
1655        let config = IndexConfig::default();
1656        let indexer = Indexer::new(cache, config);
1657
1658        let unsupported_file = temp.path().join("test.txt");
1659        fs::write(&unsupported_file, "plain text").unwrap();
1660
1661        assert!(!indexer.should_index(&unsupported_file));
1662    }
1663
1664    #[test]
1665    fn test_should_index_no_extension() {
1666        let temp = TempDir::new().unwrap();
1667        let cache = CacheManager::new(temp.path());
1668        let config = IndexConfig::default();
1669        let indexer = Indexer::new(cache, config);
1670
1671        let no_ext_file = temp.path().join("Makefile");
1672        fs::write(&no_ext_file, "all:\n\techo hello").unwrap();
1673
1674        assert!(!indexer.should_index(&no_ext_file));
1675    }
1676
1677    #[test]
1678    fn test_should_index_size_limit() {
1679        let temp = TempDir::new().unwrap();
1680        let cache = CacheManager::new(temp.path());
1681
1682        // Config with 100 byte size limit
1683        let mut config = IndexConfig::default();
1684        config.max_file_size = 100;
1685
1686        let indexer = Indexer::new(cache, config);
1687
1688        // Create small file (should be indexed)
1689        let small_file = temp.path().join("small.rs");
1690        fs::write(&small_file, "fn main() {}").unwrap();
1691        assert!(indexer.should_index(&small_file));
1692
1693        // Create large file (should be skipped)
1694        let large_file = temp.path().join("large.rs");
1695        let large_content = "a".repeat(150);
1696        fs::write(&large_file, large_content).unwrap();
1697        assert!(!indexer.should_index(&large_file));
1698    }
1699
1700    #[test]
1701    fn test_discover_files_empty_dir() {
1702        let temp = TempDir::new().unwrap();
1703        let cache = CacheManager::new(temp.path());
1704        let config = IndexConfig::default();
1705        let indexer = Indexer::new(cache, config);
1706
1707        let files = indexer.discover_files(temp.path()).unwrap();
1708        assert_eq!(files.len(), 0);
1709    }
1710
1711    #[test]
1712    fn test_discover_files_single_file() {
1713        let temp = TempDir::new().unwrap();
1714        let cache = CacheManager::new(temp.path());
1715        let config = IndexConfig::default();
1716        let indexer = Indexer::new(cache, config);
1717
1718        // Create a Rust file
1719        let rust_file = temp.path().join("main.rs");
1720        fs::write(&rust_file, "fn main() {}").unwrap();
1721
1722        let files = indexer.discover_files(temp.path()).unwrap();
1723        assert_eq!(files.len(), 1);
1724        assert!(files[0].ends_with("main.rs"));
1725    }
1726
1727    #[test]
1728    fn test_discover_files_multiple_languages() {
1729        let temp = TempDir::new().unwrap();
1730        let cache = CacheManager::new(temp.path());
1731        let config = IndexConfig::default();
1732        let indexer = Indexer::new(cache, config);
1733
1734        // Create files of different languages
1735        fs::write(temp.path().join("main.rs"), "fn main() {}").unwrap();
1736        fs::write(temp.path().join("script.py"), "print('hello')").unwrap();
1737        fs::write(temp.path().join("app.js"), "console.log('hi')").unwrap();
1738        fs::write(temp.path().join("README.md"), "# Project").unwrap(); // Should be skipped
1739
1740        let files = indexer.discover_files(temp.path()).unwrap();
1741        assert_eq!(files.len(), 3); // Only supported languages
1742    }
1743
1744    #[test]
1745    fn test_discover_files_subdirectories() {
1746        let temp = TempDir::new().unwrap();
1747        let cache = CacheManager::new(temp.path());
1748        let config = IndexConfig::default();
1749        let indexer = Indexer::new(cache, config);
1750
1751        // Create nested directory structure
1752        let src_dir = temp.path().join("src");
1753        fs::create_dir(&src_dir).unwrap();
1754        fs::write(src_dir.join("main.rs"), "fn main() {}").unwrap();
1755        fs::write(src_dir.join("lib.rs"), "pub mod test {}").unwrap();
1756
1757        let tests_dir = temp.path().join("tests");
1758        fs::create_dir(&tests_dir).unwrap();
1759        fs::write(tests_dir.join("test.rs"), "#[test] fn test() {}").unwrap();
1760
1761        let files = indexer.discover_files(temp.path()).unwrap();
1762        assert_eq!(files.len(), 3);
1763    }
1764
1765    #[test]
1766    fn test_discover_files_respects_gitignore() {
1767        let temp = TempDir::new().unwrap();
1768
1769        // Initialize git repo (required for .gitignore to work with WalkBuilder)
1770        std::process::Command::new("git")
1771            .arg("init")
1772            .current_dir(temp.path())
1773            .output()
1774            .expect("Failed to initialize git repo");
1775
1776        let cache = CacheManager::new(temp.path());
1777        let config = IndexConfig::default();
1778        let indexer = Indexer::new(cache, config);
1779
1780        // Create .gitignore - use "ignored/" pattern to ignore the directory
1781        // Note: WalkBuilder respects .gitignore ONLY in git repositories
1782        fs::write(temp.path().join(".gitignore"), "ignored/\n").unwrap();
1783
1784        // Create files
1785        fs::write(temp.path().join("included.rs"), "fn main() {}").unwrap();
1786        fs::write(temp.path().join("also_included.py"), "print('hi')").unwrap();
1787
1788        let ignored_dir = temp.path().join("ignored");
1789        fs::create_dir(&ignored_dir).unwrap();
1790        fs::write(ignored_dir.join("excluded.rs"), "fn test() {}").unwrap();
1791
1792        let files = indexer.discover_files(temp.path()).unwrap();
1793
1794        // Verify the expected files are found
1795        assert!(files.iter().any(|f| f.ends_with("included.rs")), "Should find included.rs");
1796        assert!(files.iter().any(|f| f.ends_with("also_included.py")), "Should find also_included.py");
1797
1798        // Verify excluded.rs in ignored/ directory is NOT found
1799        // This is the key test - gitignore should filter it out
1800        assert!(!files.iter().any(|f| {
1801            let path_str = f.to_string_lossy();
1802            path_str.contains("ignored") && f.ends_with("excluded.rs")
1803        }), "Should NOT find excluded.rs in ignored/ directory (gitignore pattern)");
1804
1805        // Should find exactly 2 files (included.rs and also_included.py)
1806        // .gitignore file itself has no supported language extension, so it won't be indexed
1807        assert_eq!(files.len(), 2, "Should find exactly 2 files (not including .gitignore or ignored/excluded.rs)");
1808    }
1809
1810    #[test]
1811    fn test_index_empty_directory() {
1812        let temp = TempDir::new().unwrap();
1813        let cache = CacheManager::new(temp.path());
1814        let config = IndexConfig::default();
1815        let indexer = Indexer::new(cache, config);
1816
1817        let stats = indexer.index(temp.path(), false).unwrap();
1818
1819        assert_eq!(stats.total_files, 0);
1820    }
1821
1822    #[test]
1823    fn test_index_single_rust_file() {
1824        let temp = TempDir::new().unwrap();
1825        let project_root = temp.path().join("project");
1826        fs::create_dir(&project_root).unwrap();
1827
1828        let cache = CacheManager::new(&project_root);
1829        let config = IndexConfig::default();
1830        let indexer = Indexer::new(cache, config);
1831
1832        // Create a Rust file
1833        fs::write(
1834            project_root.join("main.rs"),
1835            "fn main() { println!(\"Hello\"); }"
1836        ).unwrap();
1837
1838        let stats = indexer.index(&project_root, false).unwrap();
1839
1840        assert_eq!(stats.total_files, 1);
1841        assert!(stats.files_by_language.get("Rust").is_some());
1842    }
1843
1844    #[test]
1845    fn test_index_multiple_files() {
1846        let temp = TempDir::new().unwrap();
1847        let project_root = temp.path().join("project");
1848        fs::create_dir(&project_root).unwrap();
1849
1850        let cache = CacheManager::new(&project_root);
1851        let config = IndexConfig::default();
1852        let indexer = Indexer::new(cache, config);
1853
1854        // Create multiple files
1855        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1856        fs::write(project_root.join("lib.rs"), "pub fn test() {}").unwrap();
1857        fs::write(project_root.join("script.py"), "def main(): pass").unwrap();
1858
1859        let stats = indexer.index(&project_root, false).unwrap();
1860
1861        assert_eq!(stats.total_files, 3);
1862        assert_eq!(stats.files_by_language.get("Rust"), Some(&2));
1863        assert_eq!(stats.files_by_language.get("Python"), Some(&1));
1864    }
1865
1866    #[test]
1867    fn test_index_creates_trigram_index() {
1868        let temp = TempDir::new().unwrap();
1869        let project_root = temp.path().join("project");
1870        fs::create_dir(&project_root).unwrap();
1871
1872        let cache = CacheManager::new(&project_root);
1873        let config = IndexConfig::default();
1874        let indexer = Indexer::new(cache, config);
1875
1876        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1877
1878        indexer.index(&project_root, false).unwrap();
1879
1880        // Verify trigrams.bin was created
1881        let trigrams_path = project_root.join(".reflex/trigrams.bin");
1882        assert!(trigrams_path.exists());
1883    }
1884
1885    #[test]
1886    fn test_index_creates_content_store() {
1887        let temp = TempDir::new().unwrap();
1888        let project_root = temp.path().join("project");
1889        fs::create_dir(&project_root).unwrap();
1890
1891        let cache = CacheManager::new(&project_root);
1892        let config = IndexConfig::default();
1893        let indexer = Indexer::new(cache, config);
1894
1895        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1896
1897        indexer.index(&project_root, false).unwrap();
1898
1899        // Verify content.bin was created
1900        let content_path = project_root.join(".reflex/content.bin");
1901        assert!(content_path.exists());
1902    }
1903
1904    #[test]
1905    fn test_index_incremental_no_changes() {
1906        let temp = TempDir::new().unwrap();
1907        let project_root = temp.path().join("project");
1908        fs::create_dir(&project_root).unwrap();
1909
1910        let cache = CacheManager::new(&project_root);
1911        let config = IndexConfig::default();
1912        let indexer = Indexer::new(cache, config);
1913
1914        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1915
1916        // First index
1917        let stats1 = indexer.index(&project_root, false).unwrap();
1918        assert_eq!(stats1.total_files, 1);
1919
1920        // Second index without changes
1921        let stats2 = indexer.index(&project_root, false).unwrap();
1922        assert_eq!(stats2.total_files, 1);
1923    }
1924
1925    #[test]
1926    fn test_index_incremental_with_changes() {
1927        let temp = TempDir::new().unwrap();
1928        let project_root = temp.path().join("project");
1929        fs::create_dir(&project_root).unwrap();
1930
1931        let cache = CacheManager::new(&project_root);
1932        let config = IndexConfig::default();
1933        let indexer = Indexer::new(cache, config);
1934
1935        let main_path = project_root.join("main.rs");
1936        fs::write(&main_path, "fn main() {}").unwrap();
1937
1938        // First index
1939        indexer.index(&project_root, false).unwrap();
1940
1941        // Modify file
1942        fs::write(&main_path, "fn main() { println!(\"changed\"); }").unwrap();
1943
1944        // Second index should detect change
1945        let stats = indexer.index(&project_root, false).unwrap();
1946        assert_eq!(stats.total_files, 1);
1947    }
1948
1949    #[test]
1950    fn test_index_incremental_new_file() {
1951        let temp = TempDir::new().unwrap();
1952        let project_root = temp.path().join("project");
1953        fs::create_dir(&project_root).unwrap();
1954
1955        let cache = CacheManager::new(&project_root);
1956        let config = IndexConfig::default();
1957        let indexer = Indexer::new(cache, config);
1958
1959        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1960
1961        // First index
1962        let stats1 = indexer.index(&project_root, false).unwrap();
1963        assert_eq!(stats1.total_files, 1);
1964
1965        // Add new file
1966        fs::write(project_root.join("lib.rs"), "pub fn test() {}").unwrap();
1967
1968        // Second index should include new file
1969        let stats2 = indexer.index(&project_root, false).unwrap();
1970        assert_eq!(stats2.total_files, 2);
1971    }
1972
1973    #[test]
1974    fn test_index_parallel_threads_config() {
1975        let temp = TempDir::new().unwrap();
1976        let project_root = temp.path().join("project");
1977        fs::create_dir(&project_root).unwrap();
1978
1979        let cache = CacheManager::new(&project_root);
1980
1981        // Test with explicit thread count
1982        let mut config = IndexConfig::default();
1983        config.parallel_threads = 2;
1984
1985        let indexer = Indexer::new(cache, config);
1986
1987        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1988
1989        let stats = indexer.index(&project_root, false).unwrap();
1990        assert_eq!(stats.total_files, 1);
1991    }
1992
1993    #[test]
1994    fn test_index_parallel_threads_auto() {
1995        let temp = TempDir::new().unwrap();
1996        let project_root = temp.path().join("project");
1997        fs::create_dir(&project_root).unwrap();
1998
1999        let cache = CacheManager::new(&project_root);
2000
2001        // Test with auto thread count (0 = auto)
2002        let mut config = IndexConfig::default();
2003        config.parallel_threads = 0;
2004
2005        let indexer = Indexer::new(cache, config);
2006
2007        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2008
2009        let stats = indexer.index(&project_root, false).unwrap();
2010        assert_eq!(stats.total_files, 1);
2011    }
2012
2013    #[test]
2014    fn test_index_respects_size_limit() {
2015        let temp = TempDir::new().unwrap();
2016        let project_root = temp.path().join("project");
2017        fs::create_dir(&project_root).unwrap();
2018
2019        let cache = CacheManager::new(&project_root);
2020
2021        // Very small size limit
2022        let mut config = IndexConfig::default();
2023        config.max_file_size = 50;
2024
2025        let indexer = Indexer::new(cache, config);
2026
2027        // Small file (should be indexed)
2028        fs::write(project_root.join("small.rs"), "fn a() {}").unwrap();
2029
2030        // Large file (should be skipped)
2031        let large_content = "fn main() {}\n".repeat(10);
2032        fs::write(project_root.join("large.rs"), large_content).unwrap();
2033
2034        let stats = indexer.index(&project_root, false).unwrap();
2035
2036        // Only small file should be indexed
2037        assert_eq!(stats.total_files, 1);
2038    }
2039
2040    #[test]
2041    fn test_index_mixed_languages() {
2042        let temp = TempDir::new().unwrap();
2043        let project_root = temp.path().join("project");
2044        fs::create_dir(&project_root).unwrap();
2045
2046        let cache = CacheManager::new(&project_root);
2047        let config = IndexConfig::default();
2048        let indexer = Indexer::new(cache, config);
2049
2050        // Create files in multiple languages
2051        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2052        fs::write(project_root.join("test.py"), "def test(): pass").unwrap();
2053        fs::write(project_root.join("app.js"), "function main() {}").unwrap();
2054        fs::write(project_root.join("lib.go"), "func main() {}").unwrap();
2055
2056        let stats = indexer.index(&project_root, false).unwrap();
2057
2058        assert_eq!(stats.total_files, 4);
2059        assert!(stats.files_by_language.contains_key("Rust"));
2060        assert!(stats.files_by_language.contains_key("Python"));
2061        assert!(stats.files_by_language.contains_key("JavaScript"));
2062        assert!(stats.files_by_language.contains_key("Go"));
2063    }
2064
2065    #[test]
2066    fn test_index_updates_cache_stats() {
2067        let temp = TempDir::new().unwrap();
2068        let project_root = temp.path().join("project");
2069        fs::create_dir(&project_root).unwrap();
2070
2071        let cache = CacheManager::new(&project_root);
2072        let config = IndexConfig::default();
2073        let indexer = Indexer::new(cache, config);
2074
2075        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2076
2077        indexer.index(&project_root, false).unwrap();
2078
2079        // Verify cache stats were updated
2080        let cache = CacheManager::new(&project_root);
2081        let stats = cache.stats().unwrap();
2082
2083        assert_eq!(stats.total_files, 1);
2084        assert!(stats.index_size_bytes > 0);
2085    }
2086}