Skip to main content

reflex/
indexer.rs

1//! Indexing engine for parsing source code
2//!
3//! The indexer scans the project directory, parses source files using Tree-sitter,
4//! and builds the symbol/token cache for fast querying.
5
6use anyhow::{Context, Result};
7use ignore::WalkBuilder;
8use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
9use rayon::prelude::*;
10use std::collections::HashMap;
11use std::path::{Path, PathBuf};
12use std::sync::atomic::{AtomicU64, Ordering};
13use std::sync::{Arc, Mutex};
14use std::time::Instant;
15
16use crate::cache::CacheManager;
17use crate::content_store::{ContentReader, ContentWriter};
18use crate::dependency::DependencyIndex;
19use crate::models::{Dependency, IndexConfig, IndexStats, Language, ImportType};
20use crate::output;
21use crate::parsers::{DependencyExtractor, ImportInfo, ExportInfo};
22use crate::parsers::rust::RustDependencyExtractor;
23use crate::parsers::python::PythonDependencyExtractor;
24use crate::parsers::typescript::TypeScriptDependencyExtractor;
25use crate::parsers::go::GoDependencyExtractor;
26use crate::parsers::java::JavaDependencyExtractor;
27use crate::parsers::c::CDependencyExtractor;
28use crate::parsers::cpp::CppDependencyExtractor;
29use crate::parsers::csharp::CSharpDependencyExtractor;
30use crate::parsers::php::PhpDependencyExtractor;
31use crate::parsers::ruby::RubyDependencyExtractor;
32use crate::parsers::kotlin::KotlinDependencyExtractor;
33use crate::parsers::zig::ZigDependencyExtractor;
34use crate::parsers::vue::VueDependencyExtractor;
35use crate::parsers::svelte::SvelteDependencyExtractor;
36use crate::trigram::TrigramIndex;
37
38/// Progress callback type: (current_file_count, total_file_count, status_message)
39/// Uses Arc to allow cloning for multi-threaded progress updates
40pub type ProgressCallback = Arc<dyn Fn(usize, usize, String) + Send + Sync>;
41
42/// Result of processing a single file (used for parallel processing)
43struct FileProcessingResult {
44    path: PathBuf,
45    path_str: String,
46    hash: String,
47    content: String,
48    language: Language,
49    line_count: usize,
50    dependencies: Vec<ImportInfo>,
51    exports: Vec<ExportInfo>,
52}
53
54/// Find the nearest tsconfig.json for a given source file
55///
56/// Walks up the directory tree from the source file to find the nearest tsconfig directory.
57/// Returns a reference to the PathAliasMap if found.
58fn find_nearest_tsconfig<'a>(
59    file_path: &str,
60    root: &Path,
61    tsconfigs: &'a HashMap<PathBuf, crate::parsers::tsconfig::PathAliasMap>,
62) -> Option<&'a crate::parsers::tsconfig::PathAliasMap> {
63    // Convert file_path to absolute path (relative to root)
64    let abs_file_path = if Path::new(file_path).is_absolute() {
65        PathBuf::from(file_path)
66    } else {
67        root.join(file_path)
68    };
69
70    // Start from the file's directory and walk up
71    let mut current_dir = abs_file_path.parent()?;
72
73    loop {
74        // Check if we have a tsconfig for this directory
75        if let Some(alias_map) = tsconfigs.get(current_dir) {
76            return Some(alias_map);
77        }
78
79        // Move up one directory
80        current_dir = current_dir.parent()?;
81
82        // Stop if we've reached the root
83        if current_dir == root || !current_dir.starts_with(root) {
84            break;
85        }
86    }
87
88    None
89}
90
91/// Manages the indexing process
92pub struct Indexer {
93    cache: CacheManager,
94    config: IndexConfig,
95}
96
97impl Indexer {
98    /// Create a new indexer with the given cache manager and config
99    pub fn new(cache: CacheManager, config: IndexConfig) -> Self {
100        Self { cache, config }
101    }
102
103    /// Build or update the index for the given root directory
104    pub fn index(&self, root: impl AsRef<Path>, show_progress: bool) -> Result<IndexStats> {
105        self.index_with_callback(root, show_progress, None)
106    }
107
108    /// Build or update the index with progress callback support
109    pub fn index_with_callback(
110        &self,
111        root: impl AsRef<Path>,
112        show_progress: bool,
113        progress_callback: Option<ProgressCallback>,
114    ) -> Result<IndexStats> {
115        let root = root.as_ref();
116        log::info!("Indexing directory: {:?}", root);
117
118        // Get git state (if in git repo)
119        let git_state = crate::git::get_git_state_optional(root)?;
120        let branch = git_state
121            .as_ref()
122            .map(|s| s.branch.clone())
123            .unwrap_or_else(|| "_default".to_string());
124
125        if let Some(ref state) = git_state {
126            log::info!(
127                "Git state: branch='{}', commit='{}', dirty={}",
128                state.branch,
129                state.commit,
130                state.dirty
131            );
132        } else {
133            log::info!("Not a git repository, using default branch");
134        }
135
136        // Configure thread pool for parallel processing
137        // 0 = auto (use 80% of available cores to avoid locking the system)
138        let num_threads = if self.config.parallel_threads == 0 {
139            let available_cores = std::thread::available_parallelism()
140                .map(|n| n.get())
141                .unwrap_or(4);
142            // Use 80% of available cores (minimum 1, maximum 8)
143            // Cap at 8 to prevent diminishing returns from cache contention on high-core systems
144            ((available_cores as f64 * 0.8).ceil() as usize).max(1).min(8)
145        } else {
146            self.config.parallel_threads
147        };
148
149        log::info!("Using {} threads for parallel indexing (out of {} available)",
150                   num_threads,
151                   std::thread::available_parallelism().map(|n| n.get()).unwrap_or(4));
152
153        // Ensure cache is initialized
154        self.cache.init()?;
155
156        // Check available disk space after cache is initialized
157        self.check_disk_space(root)?;
158
159        // Load existing hashes for incremental indexing (for current branch)
160        let existing_hashes = self.cache.load_hashes_for_branch(&branch)?;
161        log::debug!("Loaded {} existing file hashes for branch '{}'", existing_hashes.len(), branch);
162
163        // Step 1: Walk directory tree and collect files
164        let files = self.discover_files(root)?;
165        let total_files = files.len();
166        log::info!("Discovered {} files to index", total_files);
167
168        // Step 1.4: Parse tsconfig.json files for TypeScript/Vue path alias resolution
169        // Must be done before parallel processing so it's available during dependency extraction
170        let tsconfigs = crate::parsers::tsconfig::parse_all_tsconfigs(root)
171            .unwrap_or_else(|e| {
172                log::warn!("Failed to parse tsconfig.json files: {}", e);
173                HashMap::new()
174            });
175        if !tsconfigs.is_empty() {
176            log::info!("Found {} tsconfig.json files", tsconfigs.len());
177            for (config_dir, alias_map) in &tsconfigs {
178                log::debug!("  {} (base_url: {:?}, {} aliases)",
179                           config_dir.display(),
180                           alias_map.base_url,
181                           alias_map.aliases.len());
182            }
183        }
184
185        // Step 1.5: Quick incremental check - are all files unchanged?
186        // If yes, skip expensive rebuild entirely and return cached stats
187        if !existing_hashes.is_empty() && total_files == existing_hashes.len() {
188            // Same number of files - check if any changed by comparing hashes
189            let mut any_changed = false;
190
191            for file_path in &files {
192                // Normalize path to be relative to root (handles both ./ prefix and absolute paths)
193                let path_str = file_path.to_string_lossy().to_string();
194                let normalized_path = if let Ok(rel_path) = file_path.strip_prefix(root) {
195                    // Convert absolute path to relative
196                    rel_path.to_string_lossy().to_string()
197                } else {
198                    // Already relative, just strip ./ prefix
199                    path_str.trim_start_matches("./").to_string()
200                };
201
202                // Check if file exists in cache
203                if let Some(existing_hash) = existing_hashes.get(&normalized_path) {
204                    // Read and hash file to check if changed
205                    match std::fs::read_to_string(file_path) {
206                        Ok(content) => {
207                            let current_hash = self.hash_content(content.as_bytes());
208                            if &current_hash != existing_hash {
209                                any_changed = true;
210                                log::debug!("File changed: {}", path_str);
211                                break; // Early exit - we know we need to rebuild
212                            }
213                        }
214                        Err(_) => {
215                            any_changed = true;
216                            break;
217                        }
218                    }
219                } else {
220                    // File not in cache - something changed
221                    any_changed = true;
222                    break;
223                }
224            }
225
226            if !any_changed {
227                let content_path = self.cache.path().join("content.bin");
228                let trigrams_path = self.cache.path().join("trigrams.bin");
229
230                // Check if schema hash matches - if not, we need a full rebuild
231                // even though file contents haven't changed (binary format may differ)
232                let schema_ok = self.cache.check_schema_hash().unwrap_or(false);
233
234                if schema_ok && content_path.exists() && trigrams_path.exists() {
235                    if let Ok(reader) = ContentReader::open(&content_path) {
236                        if reader.file_count() > 0 {
237                            log::info!("No files changed - skipping index rebuild");
238                            return Ok(self.cache.stats()?);
239                        }
240                    }
241                    log::warn!("content.bin invalid despite hashes matching - forcing rebuild");
242                } else if !schema_ok {
243                    log::info!("Schema hash changed - forcing full rebuild");
244                } else {
245                    log::warn!("Binary index files missing - forcing rebuild");
246                }
247            }
248        } else if total_files != existing_hashes.len() {
249            log::info!("File count changed ({} -> {}) - full reindex required",
250                       existing_hashes.len(), total_files);
251        }
252
253        // Step 2: Build trigram index + content store
254        let mut new_hashes = HashMap::new();
255        let mut files_indexed = 0;
256        let mut file_metadata: Vec<(String, String, String, usize)> = Vec::new(); // For batch SQLite update
257        let mut all_dependencies: Vec<(String, Vec<ImportInfo>)> = Vec::new(); // For batch dependency insertion
258        let mut all_exports: Vec<(String, Vec<ExportInfo>)> = Vec::new(); // For batch export insertion
259
260        // Initialize trigram index and content store
261        let mut trigram_index = TrigramIndex::new();
262        let mut content_writer = ContentWriter::new();
263
264        // Enable batch-flush mode for trigram index if we have lots of files
265        if total_files > 10000 {
266            let temp_dir = self.cache.path().join("trigram_temp");
267            trigram_index.enable_batch_flush(temp_dir)
268                .context("Failed to enable batch-flush mode for trigram index")?;
269            log::info!("Enabled batch-flush mode for {} files", total_files);
270        }
271
272        // Initialize content writer to start streaming writes immediately
273        let content_path = self.cache.path().join("content.bin");
274        content_writer.init(content_path.clone())
275            .context("Failed to initialize content writer")?;
276
277        // Create progress bar (only if requested via --progress flag)
278        let pb = if show_progress {
279            let pb = ProgressBar::new(total_files as u64);
280            pb.set_draw_target(ProgressDrawTarget::stderr());
281            pb.set_style(
282                ProgressStyle::default_bar()
283                    .template("[{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} files ({percent}%) {msg}")
284                    .unwrap()
285                    .progress_chars("=>-")
286            );
287            // Force updates every 100ms to ensure progress is visible
288            pb.enable_steady_tick(std::time::Duration::from_millis(100));
289            pb
290        } else {
291            ProgressBar::hidden()
292        };
293
294        // Atomic counter for thread-safe progress updates
295        let progress_counter = Arc::new(AtomicU64::new(0));
296        // Shared status message for progress callback
297        let progress_status = Arc::new(Mutex::new("Indexing files...".to_string()));
298
299        let _start_time = Instant::now();
300
301        // Spawn a background thread to update progress bar and call callback during parallel processing
302        let counter_for_thread = Arc::clone(&progress_counter);
303        let status_for_thread = Arc::clone(&progress_status);
304        let pb_clone = pb.clone();
305        let callback_for_thread = progress_callback.clone();
306        let total_files_for_thread = total_files;
307        let progress_thread = if show_progress || callback_for_thread.is_some() {
308            Some(std::thread::spawn(move || {
309                loop {
310                    let count = counter_for_thread.load(Ordering::Relaxed);
311                    pb_clone.set_position(count);
312
313                    // Call progress callback if provided
314                    if let Some(ref callback) = callback_for_thread {
315                        let status = status_for_thread.lock().unwrap().clone();
316                        callback(count as usize, total_files_for_thread, status);
317                    }
318
319                    if count >= total_files_for_thread as u64 {
320                        break;
321                    }
322                    std::thread::sleep(std::time::Duration::from_millis(50));
323                }
324            }))
325        } else {
326            None
327        };
328
329        // Build a custom thread pool with limited threads
330        let pool = rayon::ThreadPoolBuilder::new()
331            .num_threads(num_threads)
332            .build()
333            .context("Failed to create thread pool")?;
334
335        // Process files in batches to avoid OOM on huge codebases
336        // Batch size: process 5000 files at a time to limit memory usage
337        const BATCH_SIZE: usize = 5000;
338        let num_batches = total_files.div_ceil(BATCH_SIZE);
339        log::info!("Processing {} files in {} batches of up to {} files",
340                   total_files, num_batches, BATCH_SIZE);
341
342        for (batch_idx, batch_files) in files.chunks(BATCH_SIZE).enumerate() {
343            log::info!("Processing batch {}/{} ({} files)",
344                       batch_idx + 1, num_batches, batch_files.len());
345
346            // Process files in parallel using rayon with custom thread pool
347            let counter_clone = Arc::clone(&progress_counter);
348            let results: Vec<Option<FileProcessingResult>> = pool.install(|| {
349                batch_files
350                    .par_iter()
351                    .map(|file_path| {
352                // Normalize path to be relative to root (handles both ./ prefix and absolute paths)
353                let path_str = file_path.to_string_lossy().to_string();
354                let normalized_path = if let Ok(rel_path) = file_path.strip_prefix(root) {
355                    // Convert absolute path to relative
356                    rel_path.to_string_lossy().to_string()
357                } else {
358                    // Already relative, just strip ./ prefix
359                    path_str.trim_start_matches("./").to_string()
360                };
361
362                // Read file content once (used for hashing, trigrams, and parsing)
363                let content = match std::fs::read_to_string(&file_path) {
364                    Ok(c) => c,
365                    Err(e) => {
366                        log::warn!("Failed to read {}: {}", path_str, e);
367                        // Update progress
368                        counter_clone.fetch_add(1, Ordering::Relaxed);
369                        return None;
370                    }
371                };
372
373                // Compute hash from content (no duplicate file read!)
374                let hash = self.hash_content(content.as_bytes());
375
376                // Detect language
377                let ext = file_path.extension()
378                    .and_then(|e| e.to_str())
379                    .unwrap_or("");
380                let language = Language::from_extension(ext);
381
382                // Count lines in the file
383                let line_count = content.lines().count();
384
385                // Extract dependencies and exports for supported languages
386                let dependencies = match language {
387                    Language::Rust => {
388                        match RustDependencyExtractor::extract_dependencies(&content) {
389                            Ok(deps) => deps,
390                            Err(e) => {
391                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
392                                Vec::new()
393                            }
394                        }
395                    }
396                    Language::Python => {
397                        match PythonDependencyExtractor::extract_dependencies(&content) {
398                            Ok(deps) => deps,
399                            Err(e) => {
400                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
401                                Vec::new()
402                            }
403                        }
404                    }
405                    Language::TypeScript | Language::JavaScript => {
406                        // Find nearest tsconfig for path alias resolution
407                        let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
408                        match TypeScriptDependencyExtractor::extract_dependencies_with_alias_map(&content, alias_map) {
409                            Ok(deps) => deps,
410                            Err(e) => {
411                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
412                                Vec::new()
413                            }
414                        }
415                    }
416                    Language::Go => {
417                        match GoDependencyExtractor::extract_dependencies(&content) {
418                            Ok(deps) => deps,
419                            Err(e) => {
420                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
421                                Vec::new()
422                            }
423                        }
424                    }
425                    Language::Java => {
426                        match JavaDependencyExtractor::extract_dependencies(&content) {
427                            Ok(deps) => deps,
428                            Err(e) => {
429                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
430                                Vec::new()
431                            }
432                        }
433                    }
434                    Language::C => {
435                        match CDependencyExtractor::extract_dependencies(&content) {
436                            Ok(deps) => deps,
437                            Err(e) => {
438                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
439                                Vec::new()
440                            }
441                        }
442                    }
443                    Language::Cpp => {
444                        match CppDependencyExtractor::extract_dependencies(&content) {
445                            Ok(deps) => deps,
446                            Err(e) => {
447                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
448                                Vec::new()
449                            }
450                        }
451                    }
452                    Language::CSharp => {
453                        match CSharpDependencyExtractor::extract_dependencies(&content) {
454                            Ok(deps) => deps,
455                            Err(e) => {
456                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
457                                Vec::new()
458                            }
459                        }
460                    }
461                    Language::PHP => {
462                        match PhpDependencyExtractor::extract_dependencies(&content) {
463                            Ok(deps) => deps,
464                            Err(e) => {
465                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
466                                Vec::new()
467                            }
468                        }
469                    }
470                    Language::Ruby => {
471                        match RubyDependencyExtractor::extract_dependencies(&content) {
472                            Ok(deps) => deps,
473                            Err(e) => {
474                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
475                                Vec::new()
476                            }
477                        }
478                    }
479                    Language::Kotlin => {
480                        match KotlinDependencyExtractor::extract_dependencies(&content) {
481                            Ok(deps) => deps,
482                            Err(e) => {
483                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
484                                Vec::new()
485                            }
486                        }
487                    }
488                    Language::Zig => {
489                        match ZigDependencyExtractor::extract_dependencies(&content) {
490                            Ok(deps) => deps,
491                            Err(e) => {
492                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
493                                Vec::new()
494                            }
495                        }
496                    }
497                    Language::Vue => {
498                        // Find nearest tsconfig for path alias resolution
499                        let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
500                        match VueDependencyExtractor::extract_dependencies_with_alias_map(&content, alias_map) {
501                            Ok(deps) => deps,
502                            Err(e) => {
503                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
504                                Vec::new()
505                            }
506                        }
507                    }
508                    Language::Svelte => {
509                        match SvelteDependencyExtractor::extract_dependencies(&content) {
510                            Ok(deps) => deps,
511                            Err(e) => {
512                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
513                                Vec::new()
514                            }
515                        }
516                    }
517                    // Other languages not yet implemented
518                    _ => Vec::new(),
519                };
520
521                // Extract exports (for barrel export tracking)
522                let exports = match language {
523                    Language::TypeScript | Language::JavaScript => {
524                        // Find nearest tsconfig for path alias resolution
525                        let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
526                        match TypeScriptDependencyExtractor::extract_export_declarations(&content, alias_map) {
527                            Ok(exports) => exports,
528                            Err(e) => {
529                                log::warn!("Failed to extract exports from {}: {}", path_str, e);
530                                Vec::new()
531                            }
532                        }
533                    }
534                    Language::Vue => {
535                        // Find nearest tsconfig for path alias resolution
536                        let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
537                        match VueDependencyExtractor::extract_export_declarations(&content, alias_map) {
538                            Ok(exports) => exports,
539                            Err(e) => {
540                                log::warn!("Failed to extract exports from {}: {}", path_str, e);
541                                Vec::new()
542                            }
543                        }
544                    }
545                    // Other languages not yet implemented for export tracking
546                    _ => Vec::new(),
547                };
548
549                // Update progress atomically
550                counter_clone.fetch_add(1, Ordering::Relaxed);
551
552                Some(FileProcessingResult {
553                    path: file_path.clone(),
554                    path_str: normalized_path.to_string(),
555                    hash,
556                    content,
557                    language,
558                    line_count,
559                    dependencies,
560                    exports,
561                })
562                })
563                .collect()
564            });
565
566            // Process batch results immediately (streaming approach to minimize memory)
567            for result in results.into_iter().flatten() {
568                // Add file to trigram index (get file_id)
569                let file_id = trigram_index.add_file(result.path.clone());
570
571                // Index file content directly (avoid accumulating all trigrams)
572                trigram_index.index_file(file_id, &result.content);
573
574                // Add to content store
575                content_writer.add_file(result.path.clone(), &result.content);
576
577                files_indexed += 1;
578
579                // Prepare file metadata for batch database update
580                file_metadata.push((
581                    result.path_str.clone(),
582                    result.hash.clone(),
583                    format!("{:?}", result.language),
584                    result.line_count
585                ));
586
587                // Collect dependencies for batch insertion (if any)
588                if !result.dependencies.is_empty() {
589                    all_dependencies.push((result.path_str.clone(), result.dependencies));
590                }
591
592                // Collect exports for batch insertion (if any)
593                if !result.exports.is_empty() {
594                    all_exports.push((result.path_str.clone(), result.exports));
595                }
596
597                new_hashes.insert(result.path_str, result.hash);
598            }
599
600            // Flush trigram index batch to disk if batch-flush mode is enabled
601            if total_files > 10000 {
602                let flush_msg = format!("Flushing batch {}/{}...", batch_idx + 1, num_batches);
603                if show_progress {
604                    pb.set_message(flush_msg.clone());
605                }
606                *progress_status.lock().unwrap() = flush_msg;
607                trigram_index.flush_batch()
608                    .context("Failed to flush trigram batch")?;
609            }
610        }
611
612        // Wait for progress thread to finish
613        if let Some(thread) = progress_thread {
614            let _ = thread.join();
615        }
616
617        // Update progress bar to final count
618        if show_progress {
619            let final_count = progress_counter.load(Ordering::Relaxed);
620            pb.set_position(final_count);
621        }
622
623        // Finalize trigram index (sort and deduplicate posting lists)
624        *progress_status.lock().unwrap() = "Finalizing trigram index...".to_string();
625        if show_progress {
626            pb.set_message("Finalizing trigram index...".to_string());
627        }
628        trigram_index.finalize();
629
630        // Update progress bar message for post-processing
631        *progress_status.lock().unwrap() = "Writing file metadata to database...".to_string();
632        if show_progress {
633            pb.set_message("Writing file metadata to database...".to_string());
634        }
635
636        // Batch write file metadata AND branch hashes in a SINGLE atomic transaction
637        // This ensures that if files are inserted, their hashes are guaranteed to be inserted too
638        if !file_metadata.is_empty() {
639            // Prepare files data (path, language, line_count)
640            let files_without_hash: Vec<(String, String, usize)> = file_metadata
641                .iter()
642                .map(|(path, _hash, lang, lines)| (path.clone(), lang.clone(), *lines))
643                .collect();
644
645        // Record files for this branch (for branch-aware indexing)
646        *progress_status.lock().unwrap() = "Recording branch files...".to_string();
647        if show_progress {
648            pb.set_message("Recording branch files...".to_string());
649        }
650
651            // Prepare branch files data (path, hash)
652            let branch_files: Vec<(String, String)> = file_metadata
653                .iter()
654                .map(|(path, hash, _, _)| (path.clone(), hash.clone()))
655                .collect();
656
657            // Use atomic method that combines both operations
658            self.cache.batch_update_files_and_branch(
659                &files_without_hash,
660                &branch_files,
661                &branch,
662                git_state.as_ref().map(|s| s.commit.as_str()),
663            ).context("Failed to batch update files and branch hashes")?;
664
665            log::info!("Wrote metadata and hashes for {} files to database", file_metadata.len());
666        }
667
668        // Update branch metadata
669        self.cache.update_branch_metadata(
670            &branch,
671            git_state.as_ref().map(|s| s.commit.as_str()),
672            file_metadata.len(),
673            git_state.as_ref().map(|s| s.dirty).unwrap_or(false),
674        )?;
675
676        // Force WAL checkpoint to ensure background processes see all committed data
677        // This is critical when spawning background symbol indexer immediately after
678        self.cache.checkpoint_wal()
679            .context("Failed to checkpoint WAL")?;
680        log::debug!("WAL checkpoint completed - database is fully synced");
681
682        // Step 2.5: Insert dependencies (after files are inserted and have IDs)
683        if !all_dependencies.is_empty() {
684            *progress_status.lock().unwrap() = "Extracting dependencies...".to_string();
685            if show_progress {
686                pb.set_message("Extracting dependencies...".to_string());
687            }
688
689            // Find and parse all go.mod files for Go projects (monorepo support)
690            let go_modules = crate::parsers::go::parse_all_go_modules(root)
691                .unwrap_or_else(|e| {
692                    log::warn!("Failed to parse go.mod files: {}", e);
693                    Vec::new()
694                });
695            if !go_modules.is_empty() {
696                log::info!("Found {} Go modules", go_modules.len());
697                for module in &go_modules {
698                    log::debug!("  {} (project: {})", module.name, module.project_root);
699                }
700            }
701
702            // Find and parse all pom.xml/build.gradle files for Java projects (monorepo support)
703            let java_projects = crate::parsers::java::parse_all_java_projects(root)
704                .unwrap_or_else(|e| {
705                    log::warn!("Failed to parse Java project configs: {}", e);
706                    Vec::new()
707                });
708            if !java_projects.is_empty() {
709                log::info!("Found {} Java projects", java_projects.len());
710                for project in &java_projects {
711                    log::debug!("  {} (project: {})", project.package_name, project.project_root);
712                }
713            }
714
715            // Find and parse all Python package configs for Python projects (monorepo support)
716            let python_packages = crate::parsers::python::parse_all_python_packages(root)
717                .unwrap_or_else(|e| {
718                    log::warn!("Failed to parse Python package configs: {}", e);
719                    Vec::new()
720                });
721            if !python_packages.is_empty() {
722                log::info!("Found {} Python packages", python_packages.len());
723                for package in &python_packages {
724                    log::debug!("  {} (project: {})", package.name, package.project_root);
725                }
726            }
727
728            // Find and parse *.gemspec files for Ruby projects (monorepo support)
729            let ruby_projects = crate::parsers::ruby::parse_all_ruby_projects(root)
730                .unwrap_or_else(|e| {
731                    log::warn!("Failed to parse Ruby project configs: {}", e);
732                    Vec::new()
733                });
734            if !ruby_projects.is_empty() {
735                log::info!("Found {} Ruby projects", ruby_projects.len());
736                for project in &ruby_projects {
737                    log::debug!("  {} (project: {})", project.gem_name, project.project_root);
738                }
739            }
740
741            // Note: Kotlin projects use the same java_projects above (same build systems: Maven/Gradle)
742
743            // Find and parse all composer.json files for PHP projects (monorepo support)
744            let php_psr4_mappings = crate::parsers::php::parse_all_composer_psr4(root)
745                .unwrap_or_else(|e| {
746                    log::warn!("Failed to parse composer.json files: {}", e);
747                    Vec::new()
748                });
749            if !php_psr4_mappings.is_empty() {
750                log::info!("Found {} PSR-4 mappings from composer.json files", php_psr4_mappings.len());
751                for mapping in &php_psr4_mappings {
752                    log::debug!("  {} => {} (project: {})", mapping.namespace_prefix, mapping.directory, mapping.project_root);
753                }
754            }
755
756            // Find and parse all tsconfig.json files for TypeScript/Vue projects (monorepo support)
757            let tsconfigs = crate::parsers::tsconfig::parse_all_tsconfigs(root)
758                .unwrap_or_else(|e| {
759                    log::warn!("Failed to parse tsconfig.json files: {}", e);
760                    HashMap::new()
761                });
762            if !tsconfigs.is_empty() {
763                log::info!("Found {} tsconfig.json files", tsconfigs.len());
764                for (config_dir, alias_map) in &tsconfigs {
765                    log::debug!("  {} (base_url: {:?}, {} aliases)",
766                               config_dir.display(),
767                               alias_map.base_url,
768                               alias_map.aliases.len());
769                }
770            }
771
772            // Create dependency index to resolve paths and insert dependencies
773            let cache_for_deps = CacheManager::new(root);
774            let dep_index = DependencyIndex::new(cache_for_deps);
775
776            let mut total_deps_inserted = 0;
777
778            // Process each file's dependencies
779            for (file_path, import_infos) in all_dependencies {
780                // Get file ID from database
781                let file_id = match dep_index.get_file_id_by_path(&file_path)? {
782                    Some(id) => id,
783                    None => {
784                        log::warn!("File not found in database (skipping dependencies): {}", file_path);
785                        continue;
786                    }
787                };
788
789                // Reclassify and filter dependencies
790                let mut resolved_deps = Vec::new();
791
792                for mut import_info in import_infos {
793                    // Reclassify Go imports using module names (if Go project)
794                    if file_path.ends_with(".go") {
795                        // Check if the import matches any Go module
796                        let mut reclassified = false;
797                        for module in &go_modules {
798                            import_info.import_type = crate::parsers::go::reclassify_go_import(
799                                &import_info.imported_path,
800                                Some(&module.name),
801                            );
802                            // If it's internal, we've found the right module
803                            if matches!(import_info.import_type, ImportType::Internal) {
804                                reclassified = true;
805                                break;
806                            }
807                        }
808                        // If no module matched, use base classification
809                        if !reclassified {
810                            import_info.import_type = crate::parsers::go::reclassify_go_import(
811                                &import_info.imported_path,
812                                None,
813                            );
814                        }
815                    }
816
817                    // Reclassify Java imports using package names (if Java project)
818                    if file_path.ends_with(".java") {
819                        // Check if the import matches any Java project
820                        let mut reclassified = false;
821                        for project in &java_projects {
822                            import_info.import_type = crate::parsers::java::reclassify_java_import(
823                                &import_info.imported_path,
824                                Some(&project.package_name),
825                            );
826                            // If it's internal, we've found the right project
827                            if matches!(import_info.import_type, ImportType::Internal) {
828                                reclassified = true;
829                                break;
830                            }
831                        }
832                        // If no project matched, use base classification
833                        if !reclassified {
834                            import_info.import_type = crate::parsers::java::reclassify_java_import(
835                                &import_info.imported_path,
836                                None,
837                            );
838                        }
839                    }
840
841                    // Reclassify Python imports using package names (if Python project)
842                    if file_path.ends_with(".py") {
843                        // Check if the import matches any Python package
844                        let mut reclassified = false;
845                        for package in &python_packages {
846                            import_info.import_type = crate::parsers::python::reclassify_python_import(
847                                &import_info.imported_path,
848                                Some(&package.name),
849                            );
850                            // If it's internal, we've found the right package
851                            if matches!(import_info.import_type, ImportType::Internal) {
852                                reclassified = true;
853                                break;
854                            }
855                        }
856                        // If no package matched, use base classification
857                        if !reclassified {
858                            import_info.import_type = crate::parsers::python::reclassify_python_import(
859                                &import_info.imported_path,
860                                None,
861                            );
862                        }
863                    }
864
865                    // Reclassify Ruby imports using gem names (if Ruby project)
866                    if file_path.ends_with(".rb") || file_path.ends_with(".rake") || file_path.ends_with(".gemspec") {
867                        // Check if the import matches any Ruby project
868                        let mut reclassified = false;
869                        for project in &ruby_projects {
870                            let gem_names = vec![project.gem_name.clone()];
871                            import_info.import_type = crate::parsers::ruby::reclassify_ruby_import(
872                                &import_info.imported_path,
873                                &gem_names,
874                            );
875                            // If it's internal, we've found the right project
876                            if matches!(import_info.import_type, ImportType::Internal) {
877                                reclassified = true;
878                                break;
879                            }
880                        }
881                        // If no project matched, use base classification (will be External or Stdlib)
882                        if !reclassified {
883                            import_info.import_type = crate::parsers::ruby::reclassify_ruby_import(
884                                &import_info.imported_path,
885                                &[],
886                            );
887                        }
888                    }
889
890                    // Reclassify Kotlin imports using package names (if Kotlin project)
891                    if file_path.ends_with(".kt") || file_path.ends_with(".kts") {
892                        // Check if the import matches any Java/Kotlin project (same build systems)
893                        let mut reclassified = false;
894                        for project in &java_projects {
895                            import_info.import_type = crate::parsers::kotlin::reclassify_kotlin_import(
896                                &import_info.imported_path,
897                                Some(&project.package_name),
898                            );
899                            // If it's internal, we've found the right project
900                            if matches!(import_info.import_type, ImportType::Internal) {
901                                reclassified = true;
902                                break;
903                            }
904                        }
905                        // If no project matched, use base classification
906                        if !reclassified {
907                            import_info.import_type = crate::parsers::kotlin::reclassify_kotlin_import(
908                                &import_info.imported_path,
909                                None,
910                            );
911                        }
912                    }
913
914                    // ONLY insert Internal dependencies - skip External and Stdlib
915                    if !matches!(import_info.import_type, ImportType::Internal) {
916                        continue;
917                    }
918
919                    // Resolve PHP dependencies using PSR-4 (deterministic)
920                    let resolved_file_id = if file_path.ends_with(".php") && !php_psr4_mappings.is_empty() {
921                        // Use PSR-4 to resolve namespace to file path
922                        if let Some(resolved_path) = crate::parsers::php::resolve_php_namespace_to_path(
923                            &import_info.imported_path,
924                            &php_psr4_mappings,
925                        ) {
926                            // Look up file ID in database using exact match
927                            match dep_index.get_file_id_by_path(&resolved_path) {
928                                Ok(Some(id)) => {
929                                    log::trace!("Resolved PHP dependency: {} -> {} (file_id={})",
930                                               import_info.imported_path, resolved_path, id);
931                                    Some(id)
932                                }
933                                Ok(None) => {
934                                    log::trace!("PHP dependency resolved to path but file not in index: {} -> {}",
935                                               import_info.imported_path, resolved_path);
936                                    None
937                                }
938                                Err(e) => {
939                                    log::debug!("Skipping PHP dependency resolution for '{}': {}", resolved_path, e);
940                                    None
941                                }
942                            }
943                        } else {
944                            log::trace!("Could not resolve PHP namespace using PSR-4: {}",
945                                       import_info.imported_path);
946                            None
947                        }
948                    } else if file_path.ends_with(".py") && !python_packages.is_empty() {
949                        // Resolve Python dependencies using package mappings
950                        if let Some(resolved_path) = crate::parsers::python::resolve_python_import_to_path(
951                            &import_info.imported_path,
952                            &python_packages,
953                            Some(&file_path),
954                        ) {
955                            // Look up file ID in database using exact match
956                            match dep_index.get_file_id_by_path(&resolved_path) {
957                                Ok(Some(id)) => {
958                                    log::trace!("Resolved Python dependency: {} -> {} (file_id={})",
959                                               import_info.imported_path, resolved_path, id);
960                                    Some(id)
961                                }
962                                Ok(None) => {
963                                    log::trace!("Python dependency resolved to path but file not in index: {} -> {}",
964                                               import_info.imported_path, resolved_path);
965                                    None
966                                }
967                                Err(e) => {
968                                    log::debug!("Skipping Python dependency resolution for '{}': {}", resolved_path, e);
969                                    None
970                                }
971                            }
972                        } else {
973                            log::trace!("Could not resolve Python import: {}", import_info.imported_path);
974                            None
975                        }
976                    } else if file_path.ends_with(".go") && !go_modules.is_empty() {
977                        // Resolve Go dependencies using module mappings
978                        if let Some(resolved_path) = crate::parsers::go::resolve_go_import_to_path(
979                            &import_info.imported_path,
980                            &go_modules,
981                            Some(&file_path),
982                        ) {
983                            // Look up file ID in database using exact match
984                            match dep_index.get_file_id_by_path(&resolved_path) {
985                                Ok(Some(id)) => {
986                                    log::trace!("Resolved Go dependency: {} -> {} (file_id={})",
987                                               import_info.imported_path, resolved_path, id);
988                                    Some(id)
989                                }
990                                Ok(None) => {
991                                    log::trace!("Go dependency resolved to path but file not in index: {} -> {}",
992                                               import_info.imported_path, resolved_path);
993                                    None
994                                }
995                                Err(e) => {
996                                    log::debug!("Skipping Go dependency resolution for '{}': {}", resolved_path, e);
997                                    None
998                                }
999                            }
1000                        } else {
1001                            log::trace!("Could not resolve Go import: {}", import_info.imported_path);
1002                            None
1003                        }
1004                    } else if file_path.ends_with(".ts") || file_path.ends_with(".tsx")
1005                            || file_path.ends_with(".js") || file_path.ends_with(".jsx")
1006                            || file_path.ends_with(".mts") || file_path.ends_with(".cts")
1007                            || file_path.ends_with(".mjs") || file_path.ends_with(".cjs") {
1008                        // Resolve TypeScript/JavaScript dependencies (relative imports and path aliases)
1009                        let alias_map = find_nearest_tsconfig(&file_path, root, &tsconfigs);
1010                        if let Some(candidates_str) = crate::parsers::typescript::resolve_ts_import_to_path(
1011                            &import_info.imported_path,
1012                            Some(&file_path),
1013                            alias_map,
1014                        ) {
1015                            // Parse pipe-delimited candidates (e.g., "path.tsx|path.ts|path.jsx|path.js")
1016                            let candidates: Vec<&str> = candidates_str.split('|').collect();
1017
1018                            // Try each candidate in order until we find one in the database
1019                            let mut resolved_id = None;
1020                            for candidate_path in candidates {
1021                                // Normalize path to be relative to project root
1022                                // Convert absolute paths to relative (without requiring file to exist)
1023                                let normalized_candidate = if let Ok(rel_path) = std::path::Path::new(candidate_path).strip_prefix(root) {
1024                                    rel_path.to_string_lossy().to_string()
1025                                } else {
1026                                    // Not an absolute path or not under root - use as-is
1027                                    candidate_path.to_string()
1028                                };
1029
1030                                log::debug!("Looking up TS/JS candidate: '{}' (from '{}')", normalized_candidate, candidate_path);
1031                                match dep_index.get_file_id_by_path(&normalized_candidate) {
1032                                    Ok(Some(id)) => {
1033                                        log::debug!("Resolved TS/JS dependency: {} -> {} (file_id={})",
1034                                                   import_info.imported_path, normalized_candidate, id);
1035                                        resolved_id = Some(id);
1036                                        break; // Found a match, stop trying
1037                                    }
1038                                    Ok(None) => {
1039                                        log::trace!("TS/JS candidate not in index: {}", candidate_path);
1040                                    }
1041                                    Err(e) => {
1042                                        log::debug!("Skipping TS/JS dependency resolution for '{}': {}", normalized_candidate, e);
1043                                    }
1044                                }
1045                            }
1046
1047                            if resolved_id.is_none() {
1048                                log::trace!("TS/JS dependency: no matching file found in database for any candidate: {}",
1049                                           candidates_str);
1050                            }
1051
1052                            resolved_id
1053                        } else {
1054                            log::trace!("Could not resolve TS/JS import (non-relative or external): {}", import_info.imported_path);
1055                            None
1056                        }
1057                    } else if file_path.ends_with(".rs") {
1058                        // Resolve Rust dependencies (crate::, super::, self::, mod declarations)
1059                        if let Some(resolved_path) = crate::parsers::rust::resolve_rust_use_to_path(
1060                            &import_info.imported_path,
1061                            Some(&file_path),
1062                            Some(root.to_str().unwrap_or("")),
1063                        ) {
1064                            // Look up file ID in database using exact match
1065                            match dep_index.get_file_id_by_path(&resolved_path) {
1066                                Ok(Some(id)) => {
1067                                    log::trace!("Resolved Rust dependency: {} -> {} (file_id={})",
1068                                               import_info.imported_path, resolved_path, id);
1069                                    Some(id)
1070                                }
1071                                Ok(None) => {
1072                                    log::trace!("Rust dependency resolved to path but file not in index: {} -> {}",
1073                                               import_info.imported_path, resolved_path);
1074                                    None
1075                                }
1076                                Err(e) => {
1077                                    log::debug!("Skipping Rust dependency resolution for '{}': {}", resolved_path, e);
1078                                    None
1079                                }
1080                            }
1081                        } else {
1082                            log::trace!("Could not resolve Rust import (external or stdlib): {}", import_info.imported_path);
1083                            None
1084                        }
1085                    } else if file_path.ends_with(".java") && !java_projects.is_empty() {
1086                        // Resolve Java dependencies using project mappings
1087                        if let Some(resolved_path) = crate::parsers::java::resolve_java_import_to_path(
1088                            &import_info.imported_path,
1089                            &java_projects,
1090                            Some(&file_path),
1091                        ) {
1092                            // Look up file ID in database using exact match
1093                            match dep_index.get_file_id_by_path(&resolved_path) {
1094                                Ok(Some(id)) => {
1095                                    log::trace!("Resolved Java dependency: {} -> {} (file_id={})",
1096                                               import_info.imported_path, resolved_path, id);
1097                                    Some(id)
1098                                }
1099                                Ok(None) => {
1100                                    log::trace!("Java dependency resolved to path but file not in index: {} -> {}",
1101                                               import_info.imported_path, resolved_path);
1102                                    None
1103                                }
1104                                Err(e) => {
1105                                    log::debug!("Skipping Java dependency resolution for '{}': {}", resolved_path, e);
1106                                    None
1107                                }
1108                            }
1109                        } else {
1110                            log::trace!("Could not resolve Java import: {}", import_info.imported_path);
1111                            None
1112                        }
1113                    } else if (file_path.ends_with(".kt") || file_path.ends_with(".kts")) && !java_projects.is_empty() {
1114                        // Resolve Kotlin dependencies using project mappings (same build systems as Java)
1115                        if let Some(resolved_path) = crate::parsers::java::resolve_kotlin_import_to_path(
1116                            &import_info.imported_path,
1117                            &java_projects,
1118                            Some(&file_path),
1119                        ) {
1120                            // Look up file ID in database using exact match
1121                            match dep_index.get_file_id_by_path(&resolved_path) {
1122                                Ok(Some(id)) => {
1123                                    log::trace!("Resolved Kotlin dependency: {} -> {} (file_id={})",
1124                                               import_info.imported_path, resolved_path, id);
1125                                    Some(id)
1126                                }
1127                                Ok(None) => {
1128                                    log::trace!("Kotlin dependency resolved to path but file not in index: {} -> {}",
1129                                               import_info.imported_path, resolved_path);
1130                                    None
1131                                }
1132                                Err(e) => {
1133                                    log::debug!("Skipping Kotlin dependency resolution for '{}': {}", resolved_path, e);
1134                                    None
1135                                }
1136                            }
1137                        } else {
1138                            log::trace!("Could not resolve Kotlin import: {}", import_info.imported_path);
1139                            None
1140                        }
1141                    } else if (file_path.ends_with(".rb") || file_path.ends_with(".rake") || file_path.ends_with(".gemspec")) && !ruby_projects.is_empty() {
1142                        // Resolve Ruby dependencies using project mappings
1143                        if let Some(resolved_path) = crate::parsers::ruby::resolve_ruby_require_to_path(
1144                            &import_info.imported_path,
1145                            &ruby_projects,
1146                            Some(&file_path),
1147                        ) {
1148                            // Look up file ID in database using exact match
1149                            match dep_index.get_file_id_by_path(&resolved_path) {
1150                                Ok(Some(id)) => {
1151                                    log::trace!("Resolved Ruby dependency: {} -> {} (file_id={})",
1152                                               import_info.imported_path, resolved_path, id);
1153                                    Some(id)
1154                                }
1155                                Ok(None) => {
1156                                    log::trace!("Ruby dependency resolved to path but file not in index: {} -> {}",
1157                                               import_info.imported_path, resolved_path);
1158                                    None
1159                                }
1160                                Err(e) => {
1161                                    log::debug!("Skipping Ruby dependency resolution for '{}': {}", resolved_path, e);
1162                                    None
1163                                }
1164                            }
1165                        } else {
1166                            log::trace!("Could not resolve Ruby require: {}", import_info.imported_path);
1167                            None
1168                        }
1169                    } else if file_path.ends_with(".c") || file_path.ends_with(".h") {
1170                        // Resolve C dependencies (relative #include paths)
1171                        if let Some(resolved_path) = crate::parsers::c::resolve_c_include_to_path(
1172                            &import_info.imported_path,
1173                            Some(&file_path),
1174                        ) {
1175                            // Look up file ID in database using exact match
1176                            match dep_index.get_file_id_by_path(&resolved_path) {
1177                                Ok(Some(id)) => {
1178                                    log::trace!("Resolved C dependency: {} -> {} (file_id={})",
1179                                               import_info.imported_path, resolved_path, id);
1180                                    Some(id)
1181                                }
1182                                Ok(None) => {
1183                                    log::trace!("C dependency resolved to path but file not in index: {} -> {}",
1184                                               import_info.imported_path, resolved_path);
1185                                    None
1186                                }
1187                                Err(e) => {
1188                                    log::debug!("Skipping C dependency resolution for '{}': {}", resolved_path, e);
1189                                    None
1190                                }
1191                            }
1192                        } else {
1193                            log::trace!("Could not resolve C include (system header): {}", import_info.imported_path);
1194                            None
1195                        }
1196                    } else if file_path.ends_with(".cpp") || file_path.ends_with(".cc") || file_path.ends_with(".cxx")
1197                           || file_path.ends_with(".hpp") || file_path.ends_with(".hxx") || file_path.ends_with(".h++")
1198                           || file_path.ends_with(".C") || file_path.ends_with(".H") {
1199                        // Resolve C++ dependencies (relative #include paths)
1200                        if let Some(resolved_path) = crate::parsers::cpp::resolve_cpp_include_to_path(
1201                            &import_info.imported_path,
1202                            Some(&file_path),
1203                        ) {
1204                            // Look up file ID in database using exact match
1205                            match dep_index.get_file_id_by_path(&resolved_path) {
1206                                Ok(Some(id)) => {
1207                                    log::trace!("Resolved C++ dependency: {} -> {} (file_id={})",
1208                                               import_info.imported_path, resolved_path, id);
1209                                    Some(id)
1210                                }
1211                                Ok(None) => {
1212                                    log::trace!("C++ dependency resolved to path but file not in index: {} -> {}",
1213                                               import_info.imported_path, resolved_path);
1214                                    None
1215                                }
1216                                Err(e) => {
1217                                    log::debug!("Skipping C++ dependency resolution for '{}': {}", resolved_path, e);
1218                                    None
1219                                }
1220                            }
1221                        } else {
1222                            log::trace!("Could not resolve C++ include (system header): {}", import_info.imported_path);
1223                            None
1224                        }
1225                    } else if file_path.ends_with(".cs") {
1226                        // Resolve C# dependencies (using namespace-to-path mapping)
1227                        if let Some(resolved_path) = crate::parsers::csharp::resolve_csharp_using_to_path(
1228                            &import_info.imported_path,
1229                            Some(&file_path),
1230                        ) {
1231                            // Look up file ID in database using exact match
1232                            match dep_index.get_file_id_by_path(&resolved_path) {
1233                                Ok(Some(id)) => {
1234                                    log::trace!("Resolved C# dependency: {} -> {} (file_id={})",
1235                                               import_info.imported_path, resolved_path, id);
1236                                    Some(id)
1237                                }
1238                                Ok(None) => {
1239                                    log::trace!("C# dependency resolved to path but file not in index: {} -> {}",
1240                                               import_info.imported_path, resolved_path);
1241                                    None
1242                                }
1243                                Err(e) => {
1244                                    log::debug!("Skipping C# dependency resolution for '{}': {}", resolved_path, e);
1245                                    None
1246                                }
1247                            }
1248                        } else {
1249                            log::trace!("Could not resolve C# using directive: {}", import_info.imported_path);
1250                            None
1251                        }
1252                    } else if file_path.ends_with(".zig") {
1253                        // Resolve Zig dependencies (relative @import paths)
1254                        if let Some(resolved_path) = crate::parsers::zig::resolve_zig_import_to_path(
1255                            &import_info.imported_path,
1256                            Some(&file_path),
1257                        ) {
1258                            // Look up file ID in database using exact match
1259                            match dep_index.get_file_id_by_path(&resolved_path) {
1260                                Ok(Some(id)) => {
1261                                    log::trace!("Resolved Zig dependency: {} -> {} (file_id={})",
1262                                               import_info.imported_path, resolved_path, id);
1263                                    Some(id)
1264                                }
1265                                Ok(None) => {
1266                                    log::trace!("Zig dependency resolved to path but file not in index: {} -> {}",
1267                                               import_info.imported_path, resolved_path);
1268                                    None
1269                                }
1270                                Err(e) => {
1271                                    log::debug!("Skipping Zig dependency resolution for '{}': {}", resolved_path, e);
1272                                    None
1273                                }
1274                            }
1275                        } else {
1276                            log::trace!("Could not resolve Zig import (external or stdlib): {}", import_info.imported_path);
1277                            None
1278                        }
1279                    } else if file_path.ends_with(".vue") || file_path.ends_with(".svelte") {
1280                        // Resolve Vue/Svelte dependencies (use TypeScript/JavaScript resolver for imports in <script> blocks)
1281                        let alias_map = find_nearest_tsconfig(&file_path, root, &tsconfigs);
1282                        if let Some(candidates_str) = crate::parsers::typescript::resolve_ts_import_to_path(
1283                            &import_info.imported_path,
1284                            Some(&file_path),
1285                            alias_map,
1286                        ) {
1287                            // Parse pipe-delimited candidates (e.g., "path.tsx|path.ts|path.jsx|path.js")
1288                            let candidates: Vec<&str> = candidates_str.split('|').collect();
1289
1290                            // Try each candidate in order until we find one in the database
1291                            let mut resolved_id = None;
1292                            for candidate_path in candidates {
1293                                // Normalize path to be relative to project root
1294                                // Convert absolute paths to relative (without requiring file to exist)
1295                                let normalized_candidate = if let Ok(rel_path) = std::path::Path::new(candidate_path).strip_prefix(root) {
1296                                    rel_path.to_string_lossy().to_string()
1297                                } else {
1298                                    // Not an absolute path or not under root - use as-is
1299                                    candidate_path.to_string()
1300                                };
1301
1302                                match dep_index.get_file_id_by_path(&normalized_candidate) {
1303                                    Ok(Some(id)) => {
1304                                        log::trace!("Resolved Vue/Svelte dependency: {} -> {} (file_id={})",
1305                                                   import_info.imported_path, candidate_path, id);
1306                                        resolved_id = Some(id);
1307                                        break; // Found a match, stop trying
1308                                    }
1309                                    Ok(None) => {
1310                                        log::trace!("Vue/Svelte candidate not in index: {}", candidate_path);
1311                                    }
1312                                    Err(e) => {
1313                                        log::debug!("Skipping Vue/Svelte dependency resolution for '{}': {}", normalized_candidate, e);
1314                                    }
1315                                }
1316                            }
1317
1318                            if resolved_id.is_none() {
1319                                log::trace!("Vue/Svelte dependency: no matching file found in database for any candidate: {}",
1320                                           candidates_str);
1321                            }
1322
1323                            resolved_id
1324                        } else {
1325                            log::trace!("Could not resolve Vue/Svelte import (non-relative or external): {}", import_info.imported_path);
1326                            None
1327                        }
1328                    } else {
1329                        None
1330                    };
1331
1332                    // resolved_file_id will be populated using deterministic language-specific resolution
1333                    // All language resolvers have been implemented!
1334                    resolved_deps.push(Dependency {
1335                        file_id,
1336                        imported_path: import_info.imported_path.clone(),
1337                        resolved_file_id,
1338                        import_type: import_info.import_type,
1339                        line_number: import_info.line_number,
1340                        imported_symbols: import_info.imported_symbols.clone(),
1341                    });
1342                }
1343
1344                // Clear existing dependencies for this file (incremental reindex)
1345                dep_index.clear_dependencies(file_id)?;
1346
1347                // Batch insert dependencies
1348                if !resolved_deps.is_empty() {
1349                    dep_index.batch_insert_dependencies(&resolved_deps)?;
1350                    total_deps_inserted += resolved_deps.len();
1351                }
1352            }
1353
1354            log::info!("Extracted {} dependencies", total_deps_inserted);
1355        }
1356
1357        // Step 2.6: Insert exports (after files are inserted and have IDs)
1358        if !all_exports.is_empty() {
1359            *progress_status.lock().unwrap() = "Extracting exports...".to_string();
1360            if show_progress {
1361                pb.set_message("Extracting exports...".to_string());
1362            }
1363
1364            // Reuse the tsconfigs parsed earlier for TypeScript/Vue path alias resolution
1365            let tsconfigs = crate::parsers::tsconfig::parse_all_tsconfigs(root)
1366                .unwrap_or_else(|e| {
1367                    log::warn!("Failed to parse tsconfig.json files: {}", e);
1368                    HashMap::new()
1369                });
1370
1371            // Create dependency index to resolve paths and insert exports
1372            let cache_for_exports = CacheManager::new(root);
1373            let dep_index = DependencyIndex::new(cache_for_exports);
1374
1375            let mut total_exports_inserted = 0;
1376
1377            // Process each file's exports
1378            for (file_path, export_infos) in all_exports {
1379                // Get file ID from database
1380                let file_id = match dep_index.get_file_id_by_path(&file_path)? {
1381                    Some(id) => id,
1382                    None => {
1383                        log::warn!("File not found in database (skipping exports): {}", file_path);
1384                        continue;
1385                    }
1386                };
1387
1388                // Resolve export source paths and insert
1389                for export_info in export_infos {
1390                    // Resolve export source path (same logic as imports)
1391                    let resolved_source_id = if file_path.ends_with(".ts") || file_path.ends_with(".tsx")
1392                            || file_path.ends_with(".js") || file_path.ends_with(".jsx")
1393                            || file_path.ends_with(".mts") || file_path.ends_with(".cts")
1394                            || file_path.ends_with(".mjs") || file_path.ends_with(".cjs")
1395                            || file_path.ends_with(".vue") {
1396                        // Resolve TypeScript/JavaScript/Vue export paths (relative imports and path aliases)
1397                        let alias_map = find_nearest_tsconfig(&file_path, root, &tsconfigs);
1398                        if let Some(candidates_str) = crate::parsers::typescript::resolve_ts_import_to_path(
1399                            &export_info.source_path,
1400                            Some(&file_path),
1401                            alias_map,
1402                        ) {
1403                            // Parse pipe-delimited candidates (e.g., "path.tsx|path.ts|path.jsx|path.js|path.vue")
1404                            let candidates: Vec<&str> = candidates_str.split('|').collect();
1405
1406                            // Try each candidate in order until we find one in the database
1407                            let mut resolved_id = None;
1408                            for candidate_path in candidates {
1409                                // Normalize path to be relative to project root
1410                                let normalized_candidate = if let Ok(rel_path) = std::path::Path::new(candidate_path).strip_prefix(root) {
1411                                    rel_path.to_string_lossy().to_string()
1412                                } else {
1413                                    candidate_path.to_string()
1414                                };
1415
1416                                match dep_index.get_file_id_by_path(&normalized_candidate) {
1417                                    Ok(Some(id)) => {
1418                                        log::trace!("Resolved export source: {} -> {} (file_id={})",
1419                                                   export_info.source_path, normalized_candidate, id);
1420                                        resolved_id = Some(id);
1421                                        break; // Found a match, stop trying
1422                                    }
1423                                    Ok(None) => {
1424                                        log::trace!("Export source candidate not in index: {}", candidate_path);
1425                                    }
1426                                    Err(e) => {
1427                                        log::debug!("Skipping export source resolution for '{}': {}", normalized_candidate, e);
1428                                    }
1429                                }
1430                            }
1431
1432                            if resolved_id.is_none() {
1433                                log::trace!("Export source: no matching file found in database for any candidate: {}",
1434                                           candidates_str);
1435                            }
1436
1437                            resolved_id
1438                        } else {
1439                            log::trace!("Could not resolve export source (non-relative or external): {}", export_info.source_path);
1440                            None
1441                        }
1442                    } else {
1443                        None
1444                    };
1445
1446                    // Insert export into database
1447                    dep_index.insert_export(
1448                        file_id,
1449                        export_info.exported_symbol,
1450                        export_info.source_path,
1451                        resolved_source_id,
1452                        export_info.line_number,
1453                    )?;
1454
1455                    total_exports_inserted += 1;
1456                }
1457            }
1458
1459            log::info!("Extracted {} exports", total_exports_inserted);
1460        }
1461
1462        log::info!("Indexed {} files", files_indexed);
1463
1464        // Step 3: Write trigram index
1465        *progress_status.lock().unwrap() = "Writing trigram index...".to_string();
1466        if show_progress {
1467            pb.set_message("Writing trigram index...".to_string());
1468        }
1469        let trigrams_path = self.cache.path().join("trigrams.bin");
1470        log::info!("Writing trigram index with {} trigrams to trigrams.bin",
1471                   trigram_index.trigram_count());
1472
1473        trigram_index.write(&trigrams_path)
1474            .context("Failed to write trigram index")?;
1475        log::info!("Wrote {} files to trigrams.bin", trigram_index.file_count());
1476
1477        // Step 4: Finalize content store (already been writing incrementally)
1478        *progress_status.lock().unwrap() = "Finalizing content store...".to_string();
1479        if show_progress {
1480            pb.set_message("Finalizing content store...".to_string());
1481        }
1482        content_writer.finalize_if_needed()
1483            .context("Failed to finalize content store")?;
1484        log::info!("Wrote {} files ({} bytes) to content.bin",
1485                   content_writer.file_count(), content_writer.content_size());
1486
1487        // Step 5: Update SQLite statistics from database totals (branch-aware)
1488        *progress_status.lock().unwrap() = "Updating statistics...".to_string();
1489        if show_progress {
1490            pb.set_message("Updating statistics...".to_string());
1491        }
1492        // Update stats for current branch only
1493        self.cache.update_stats(&branch)?;
1494
1495        // Update schema hash to mark cache as compatible with current binary
1496        self.cache.update_schema_hash()?;
1497
1498        pb.finish_with_message("Indexing complete");
1499
1500        // Return stats
1501        let stats = self.cache.stats()?;
1502        log::info!("Indexing complete: {} files",
1503                   stats.total_files);
1504
1505        Ok(stats)
1506    }
1507
1508    /// Discover all indexable files in the directory tree
1509    fn discover_files(&self, root: &Path) -> Result<Vec<PathBuf>> {
1510        let mut files = Vec::new();
1511
1512        // WalkBuilder from ignore crate automatically respects:
1513        // - .gitignore (when in a git repo)
1514        // - .ignore files
1515        // - Hidden files (can be configured)
1516        let walker = WalkBuilder::new(root)
1517            .follow_links(self.config.follow_symlinks)
1518            .git_ignore(true)  // Explicitly enable gitignore support (enabled by default, but be explicit)
1519            .git_global(false) // Don't use global gitignore
1520            .git_exclude(false) // Don't use .git/info/exclude
1521            .build();
1522
1523        for entry in walker {
1524            let entry = entry?;
1525            let path = entry.path();
1526
1527            // Only process files (not directories)
1528            if !entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) {
1529                continue;
1530            }
1531
1532            // Check if should be indexed
1533            if self.should_index(path) {
1534                files.push(path.to_path_buf());
1535            }
1536        }
1537
1538        Ok(files)
1539    }
1540
1541    /// Check if a file should be indexed based on config
1542    fn should_index(&self, path: &Path) -> bool {
1543        // Check file extension for supported languages
1544        let ext = match path.extension() {
1545            Some(ext) => ext.to_string_lossy(),
1546            None => return false,
1547        };
1548
1549        let lang = Language::from_extension(&ext);
1550
1551        // Only index files for languages with parser implementations
1552        if !lang.is_supported() {
1553            if !matches!(lang, Language::Unknown) {
1554                log::debug!("Skipping {} ({:?} parser not yet implemented)",
1555                           path.display(), lang);
1556            }
1557            return false;
1558        }
1559
1560        // If specific languages are configured, only index those
1561        if !self.config.languages.is_empty() && !self.config.languages.contains(&lang) {
1562            log::debug!("Skipping {} ({:?} not in configured languages)", path.display(), lang);
1563            return false;
1564        }
1565
1566        // Check file size limits
1567        if let Ok(metadata) = std::fs::metadata(path) {
1568            if metadata.len() > self.config.max_file_size as u64 {
1569                log::debug!("Skipping {} (too large: {} bytes)",
1570                           path.display(), metadata.len());
1571                return false;
1572            }
1573        }
1574
1575        // TODO: Check include/exclude patterns when glob support is added
1576        // For now, accept all files with supported language extensions
1577
1578        true
1579    }
1580
1581    /// Compute blake3 hash from file contents for change detection
1582    fn hash_content(&self, content: &[u8]) -> String {
1583        let hash = blake3::hash(content);
1584        hash.to_hex().to_string()
1585    }
1586
1587    /// Check available disk space before indexing
1588    ///
1589    /// Ensures there's enough free space to create the index. Warns if disk space is low.
1590    /// This prevents partial index writes and confusing error messages.
1591    fn check_disk_space(&self, root: &Path) -> Result<()> {
1592        // Get available space on the filesystem containing the cache directory
1593        let cache_path = self.cache.path();
1594
1595        // Use statvfs on Unix systems
1596        #[cfg(unix)]
1597        {
1598            // On Linux, we can use statvfs to get available space
1599            // For now, we'll use a simple heuristic: warn if we can't write a test file
1600            let test_file = cache_path.join(".space_check");
1601            match std::fs::write(&test_file, b"test") {
1602                Ok(_) => {
1603                    let _ = std::fs::remove_file(&test_file);
1604
1605                    // Try to estimate available space using df command
1606                    if let Ok(output) = std::process::Command::new("df")
1607                        .arg("-k")
1608                        .arg(cache_path.parent().unwrap_or(root))
1609                        .output()
1610                    {
1611                        if let Ok(df_output) = String::from_utf8(output.stdout) {
1612                            // Parse df output to get available KB
1613                            if let Some(line) = df_output.lines().nth(1) {
1614                                let parts: Vec<&str> = line.split_whitespace().collect();
1615                                if parts.len() >= 4 {
1616                                    if let Ok(available_kb) = parts[3].parse::<u64>() {
1617                                        let available_mb = available_kb / 1024;
1618
1619                                        // Warn if less than 100MB available
1620                                        if available_mb < 100 {
1621                                            log::warn!("Low disk space: only {}MB available. Indexing may fail.", available_mb);
1622                                            output::warn(&format!("Low disk space ({}MB available). Consider freeing up space.", available_mb));
1623                                        } else {
1624                                            log::debug!("Available disk space: {}MB", available_mb);
1625                                        }
1626                                    }
1627                                }
1628                            }
1629                        }
1630                    }
1631
1632                    Ok(())
1633                }
1634                Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
1635                    anyhow::bail!(
1636                        "Permission denied writing to cache directory: {}. Check file permissions.",
1637                        cache_path.display()
1638                    )
1639                }
1640                Err(e) => {
1641                    // If we can't write, it might be a disk space issue
1642                    log::warn!("Failed to write test file (possible disk space issue): {}", e);
1643                    Err(e).context("Failed to verify disk space - indexing may fail due to insufficient space")
1644                }
1645            }
1646        }
1647
1648        #[cfg(not(unix))]
1649        {
1650            // On Windows, try to write a test file
1651            let test_file = cache_path.join(".space_check");
1652            match std::fs::write(&test_file, b"test") {
1653                Ok(_) => {
1654                    let _ = std::fs::remove_file(&test_file);
1655                    Ok(())
1656                }
1657                Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
1658                    anyhow::bail!(
1659                        "Permission denied writing to cache directory: {}. Check file permissions.",
1660                        cache_path.display()
1661                    )
1662                }
1663                Err(e) => {
1664                    log::warn!("Failed to write test file (possible disk space issue): {}", e);
1665                    Err(e).context("Failed to verify disk space - indexing may fail due to insufficient space")
1666                }
1667            }
1668        }
1669    }
1670}
1671
1672#[cfg(test)]
1673mod tests {
1674    use super::*;
1675    use tempfile::TempDir;
1676    use std::fs;
1677
1678    #[test]
1679    fn test_indexer_creation() {
1680        let temp = TempDir::new().unwrap();
1681        let cache = CacheManager::new(temp.path());
1682        let config = IndexConfig::default();
1683        let indexer = Indexer::new(cache, config);
1684
1685        assert!(indexer.cache.path().ends_with(".reflex"));
1686    }
1687
1688    #[test]
1689    fn test_hash_content() {
1690        let temp = TempDir::new().unwrap();
1691        let cache = CacheManager::new(temp.path());
1692        let config = IndexConfig::default();
1693        let indexer = Indexer::new(cache, config);
1694
1695        let content1 = b"hello world";
1696        let content2 = b"hello world";
1697        let content3 = b"different content";
1698
1699        let hash1 = indexer.hash_content(content1);
1700        let hash2 = indexer.hash_content(content2);
1701        let hash3 = indexer.hash_content(content3);
1702
1703        // Same content should produce same hash
1704        assert_eq!(hash1, hash2);
1705
1706        // Different content should produce different hash
1707        assert_ne!(hash1, hash3);
1708
1709        // Hash should be hex string
1710        assert_eq!(hash1.len(), 64); // blake3 hash is 32 bytes = 64 hex chars
1711    }
1712
1713    #[test]
1714    fn test_should_index_rust_file() {
1715        let temp = TempDir::new().unwrap();
1716        let cache = CacheManager::new(temp.path());
1717        let config = IndexConfig::default();
1718        let indexer = Indexer::new(cache, config);
1719
1720        // Create a small Rust file
1721        let rust_file = temp.path().join("test.rs");
1722        fs::write(&rust_file, "fn main() {}").unwrap();
1723
1724        assert!(indexer.should_index(&rust_file));
1725    }
1726
1727    #[test]
1728    fn test_should_index_unsupported_extension() {
1729        let temp = TempDir::new().unwrap();
1730        let cache = CacheManager::new(temp.path());
1731        let config = IndexConfig::default();
1732        let indexer = Indexer::new(cache, config);
1733
1734        let unsupported_file = temp.path().join("test.txt");
1735        fs::write(&unsupported_file, "plain text").unwrap();
1736
1737        assert!(!indexer.should_index(&unsupported_file));
1738    }
1739
1740    #[test]
1741    fn test_should_index_no_extension() {
1742        let temp = TempDir::new().unwrap();
1743        let cache = CacheManager::new(temp.path());
1744        let config = IndexConfig::default();
1745        let indexer = Indexer::new(cache, config);
1746
1747        let no_ext_file = temp.path().join("Makefile");
1748        fs::write(&no_ext_file, "all:\n\techo hello").unwrap();
1749
1750        assert!(!indexer.should_index(&no_ext_file));
1751    }
1752
1753    #[test]
1754    fn test_should_index_size_limit() {
1755        let temp = TempDir::new().unwrap();
1756        let cache = CacheManager::new(temp.path());
1757
1758        // Config with 100 byte size limit
1759        let mut config = IndexConfig::default();
1760        config.max_file_size = 100;
1761
1762        let indexer = Indexer::new(cache, config);
1763
1764        // Create small file (should be indexed)
1765        let small_file = temp.path().join("small.rs");
1766        fs::write(&small_file, "fn main() {}").unwrap();
1767        assert!(indexer.should_index(&small_file));
1768
1769        // Create large file (should be skipped)
1770        let large_file = temp.path().join("large.rs");
1771        let large_content = "a".repeat(150);
1772        fs::write(&large_file, large_content).unwrap();
1773        assert!(!indexer.should_index(&large_file));
1774    }
1775
1776    #[test]
1777    fn test_discover_files_empty_dir() {
1778        let temp = TempDir::new().unwrap();
1779        let cache = CacheManager::new(temp.path());
1780        let config = IndexConfig::default();
1781        let indexer = Indexer::new(cache, config);
1782
1783        let files = indexer.discover_files(temp.path()).unwrap();
1784        assert_eq!(files.len(), 0);
1785    }
1786
1787    #[test]
1788    fn test_discover_files_single_file() {
1789        let temp = TempDir::new().unwrap();
1790        let cache = CacheManager::new(temp.path());
1791        let config = IndexConfig::default();
1792        let indexer = Indexer::new(cache, config);
1793
1794        // Create a Rust file
1795        let rust_file = temp.path().join("main.rs");
1796        fs::write(&rust_file, "fn main() {}").unwrap();
1797
1798        let files = indexer.discover_files(temp.path()).unwrap();
1799        assert_eq!(files.len(), 1);
1800        assert!(files[0].ends_with("main.rs"));
1801    }
1802
1803    #[test]
1804    fn test_discover_files_multiple_languages() {
1805        let temp = TempDir::new().unwrap();
1806        let cache = CacheManager::new(temp.path());
1807        let config = IndexConfig::default();
1808        let indexer = Indexer::new(cache, config);
1809
1810        // Create files of different languages
1811        fs::write(temp.path().join("main.rs"), "fn main() {}").unwrap();
1812        fs::write(temp.path().join("script.py"), "print('hello')").unwrap();
1813        fs::write(temp.path().join("app.js"), "console.log('hi')").unwrap();
1814        fs::write(temp.path().join("README.md"), "# Project").unwrap(); // Should be skipped
1815
1816        let files = indexer.discover_files(temp.path()).unwrap();
1817        assert_eq!(files.len(), 3); // Only supported languages
1818    }
1819
1820    #[test]
1821    fn test_discover_files_subdirectories() {
1822        let temp = TempDir::new().unwrap();
1823        let cache = CacheManager::new(temp.path());
1824        let config = IndexConfig::default();
1825        let indexer = Indexer::new(cache, config);
1826
1827        // Create nested directory structure
1828        let src_dir = temp.path().join("src");
1829        fs::create_dir(&src_dir).unwrap();
1830        fs::write(src_dir.join("main.rs"), "fn main() {}").unwrap();
1831        fs::write(src_dir.join("lib.rs"), "pub mod test {}").unwrap();
1832
1833        let tests_dir = temp.path().join("tests");
1834        fs::create_dir(&tests_dir).unwrap();
1835        fs::write(tests_dir.join("test.rs"), "#[test] fn test() {}").unwrap();
1836
1837        let files = indexer.discover_files(temp.path()).unwrap();
1838        assert_eq!(files.len(), 3);
1839    }
1840
1841    #[test]
1842    fn test_discover_files_respects_gitignore() {
1843        let temp = TempDir::new().unwrap();
1844
1845        // Initialize git repo (required for .gitignore to work with WalkBuilder)
1846        std::process::Command::new("git")
1847            .arg("init")
1848            .current_dir(temp.path())
1849            .output()
1850            .expect("Failed to initialize git repo");
1851
1852        let cache = CacheManager::new(temp.path());
1853        let config = IndexConfig::default();
1854        let indexer = Indexer::new(cache, config);
1855
1856        // Create .gitignore - use "ignored/" pattern to ignore the directory
1857        // Note: WalkBuilder respects .gitignore ONLY in git repositories
1858        fs::write(temp.path().join(".gitignore"), "ignored/\n").unwrap();
1859
1860        // Create files
1861        fs::write(temp.path().join("included.rs"), "fn main() {}").unwrap();
1862        fs::write(temp.path().join("also_included.py"), "print('hi')").unwrap();
1863
1864        let ignored_dir = temp.path().join("ignored");
1865        fs::create_dir(&ignored_dir).unwrap();
1866        fs::write(ignored_dir.join("excluded.rs"), "fn test() {}").unwrap();
1867
1868        let files = indexer.discover_files(temp.path()).unwrap();
1869
1870        // Verify the expected files are found
1871        assert!(files.iter().any(|f| f.ends_with("included.rs")), "Should find included.rs");
1872        assert!(files.iter().any(|f| f.ends_with("also_included.py")), "Should find also_included.py");
1873
1874        // Verify excluded.rs in ignored/ directory is NOT found
1875        // This is the key test - gitignore should filter it out
1876        assert!(!files.iter().any(|f| {
1877            let path_str = f.to_string_lossy();
1878            path_str.contains("ignored") && f.ends_with("excluded.rs")
1879        }), "Should NOT find excluded.rs in ignored/ directory (gitignore pattern)");
1880
1881        // Should find exactly 2 files (included.rs and also_included.py)
1882        // .gitignore file itself has no supported language extension, so it won't be indexed
1883        assert_eq!(files.len(), 2, "Should find exactly 2 files (not including .gitignore or ignored/excluded.rs)");
1884    }
1885
1886    #[test]
1887    fn test_index_empty_directory() {
1888        let temp = TempDir::new().unwrap();
1889        let cache = CacheManager::new(temp.path());
1890        let config = IndexConfig::default();
1891        let indexer = Indexer::new(cache, config);
1892
1893        let stats = indexer.index(temp.path(), false).unwrap();
1894
1895        assert_eq!(stats.total_files, 0);
1896    }
1897
1898    #[test]
1899    fn test_index_single_rust_file() {
1900        let temp = TempDir::new().unwrap();
1901        let project_root = temp.path().join("project");
1902        fs::create_dir(&project_root).unwrap();
1903
1904        let cache = CacheManager::new(&project_root);
1905        let config = IndexConfig::default();
1906        let indexer = Indexer::new(cache, config);
1907
1908        // Create a Rust file
1909        fs::write(
1910            project_root.join("main.rs"),
1911            "fn main() { println!(\"Hello\"); }"
1912        ).unwrap();
1913
1914        let stats = indexer.index(&project_root, false).unwrap();
1915
1916        assert_eq!(stats.total_files, 1);
1917        assert!(stats.files_by_language.get("Rust").is_some());
1918    }
1919
1920    #[test]
1921    fn test_index_multiple_files() {
1922        let temp = TempDir::new().unwrap();
1923        let project_root = temp.path().join("project");
1924        fs::create_dir(&project_root).unwrap();
1925
1926        let cache = CacheManager::new(&project_root);
1927        let config = IndexConfig::default();
1928        let indexer = Indexer::new(cache, config);
1929
1930        // Create multiple files
1931        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1932        fs::write(project_root.join("lib.rs"), "pub fn test() {}").unwrap();
1933        fs::write(project_root.join("script.py"), "def main(): pass").unwrap();
1934
1935        let stats = indexer.index(&project_root, false).unwrap();
1936
1937        assert_eq!(stats.total_files, 3);
1938        assert_eq!(stats.files_by_language.get("Rust"), Some(&2));
1939        assert_eq!(stats.files_by_language.get("Python"), Some(&1));
1940    }
1941
1942    #[test]
1943    fn test_index_creates_trigram_index() {
1944        let temp = TempDir::new().unwrap();
1945        let project_root = temp.path().join("project");
1946        fs::create_dir(&project_root).unwrap();
1947
1948        let cache = CacheManager::new(&project_root);
1949        let config = IndexConfig::default();
1950        let indexer = Indexer::new(cache, config);
1951
1952        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1953
1954        indexer.index(&project_root, false).unwrap();
1955
1956        // Verify trigrams.bin was created
1957        let trigrams_path = project_root.join(".reflex/trigrams.bin");
1958        assert!(trigrams_path.exists());
1959    }
1960
1961    #[test]
1962    fn test_index_creates_content_store() {
1963        let temp = TempDir::new().unwrap();
1964        let project_root = temp.path().join("project");
1965        fs::create_dir(&project_root).unwrap();
1966
1967        let cache = CacheManager::new(&project_root);
1968        let config = IndexConfig::default();
1969        let indexer = Indexer::new(cache, config);
1970
1971        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1972
1973        indexer.index(&project_root, false).unwrap();
1974
1975        // Verify content.bin was created
1976        let content_path = project_root.join(".reflex/content.bin");
1977        assert!(content_path.exists());
1978    }
1979
1980    #[test]
1981    fn test_index_incremental_no_changes() {
1982        let temp = TempDir::new().unwrap();
1983        let project_root = temp.path().join("project");
1984        fs::create_dir(&project_root).unwrap();
1985
1986        let cache = CacheManager::new(&project_root);
1987        let config = IndexConfig::default();
1988        let indexer = Indexer::new(cache, config);
1989
1990        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1991
1992        // First index
1993        let stats1 = indexer.index(&project_root, false).unwrap();
1994        assert_eq!(stats1.total_files, 1);
1995
1996        // Second index without changes
1997        let stats2 = indexer.index(&project_root, false).unwrap();
1998        assert_eq!(stats2.total_files, 1);
1999    }
2000
2001    #[test]
2002    fn test_index_incremental_with_changes() {
2003        let temp = TempDir::new().unwrap();
2004        let project_root = temp.path().join("project");
2005        fs::create_dir(&project_root).unwrap();
2006
2007        let cache = CacheManager::new(&project_root);
2008        let config = IndexConfig::default();
2009        let indexer = Indexer::new(cache, config);
2010
2011        let main_path = project_root.join("main.rs");
2012        fs::write(&main_path, "fn main() {}").unwrap();
2013
2014        // First index
2015        indexer.index(&project_root, false).unwrap();
2016
2017        // Modify file
2018        fs::write(&main_path, "fn main() { println!(\"changed\"); }").unwrap();
2019
2020        // Second index should detect change
2021        let stats = indexer.index(&project_root, false).unwrap();
2022        assert_eq!(stats.total_files, 1);
2023    }
2024
2025    #[test]
2026    fn test_index_incremental_new_file() {
2027        let temp = TempDir::new().unwrap();
2028        let project_root = temp.path().join("project");
2029        fs::create_dir(&project_root).unwrap();
2030
2031        let cache = CacheManager::new(&project_root);
2032        let config = IndexConfig::default();
2033        let indexer = Indexer::new(cache, config);
2034
2035        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2036
2037        // First index
2038        let stats1 = indexer.index(&project_root, false).unwrap();
2039        assert_eq!(stats1.total_files, 1);
2040
2041        // Add new file
2042        fs::write(project_root.join("lib.rs"), "pub fn test() {}").unwrap();
2043
2044        // Second index should include new file
2045        let stats2 = indexer.index(&project_root, false).unwrap();
2046        assert_eq!(stats2.total_files, 2);
2047    }
2048
2049    #[test]
2050    fn test_index_parallel_threads_config() {
2051        let temp = TempDir::new().unwrap();
2052        let project_root = temp.path().join("project");
2053        fs::create_dir(&project_root).unwrap();
2054
2055        let cache = CacheManager::new(&project_root);
2056
2057        // Test with explicit thread count
2058        let mut config = IndexConfig::default();
2059        config.parallel_threads = 2;
2060
2061        let indexer = Indexer::new(cache, config);
2062
2063        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2064
2065        let stats = indexer.index(&project_root, false).unwrap();
2066        assert_eq!(stats.total_files, 1);
2067    }
2068
2069    #[test]
2070    fn test_index_parallel_threads_auto() {
2071        let temp = TempDir::new().unwrap();
2072        let project_root = temp.path().join("project");
2073        fs::create_dir(&project_root).unwrap();
2074
2075        let cache = CacheManager::new(&project_root);
2076
2077        // Test with auto thread count (0 = auto)
2078        let mut config = IndexConfig::default();
2079        config.parallel_threads = 0;
2080
2081        let indexer = Indexer::new(cache, config);
2082
2083        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2084
2085        let stats = indexer.index(&project_root, false).unwrap();
2086        assert_eq!(stats.total_files, 1);
2087    }
2088
2089    #[test]
2090    fn test_index_respects_size_limit() {
2091        let temp = TempDir::new().unwrap();
2092        let project_root = temp.path().join("project");
2093        fs::create_dir(&project_root).unwrap();
2094
2095        let cache = CacheManager::new(&project_root);
2096
2097        // Very small size limit
2098        let mut config = IndexConfig::default();
2099        config.max_file_size = 50;
2100
2101        let indexer = Indexer::new(cache, config);
2102
2103        // Small file (should be indexed)
2104        fs::write(project_root.join("small.rs"), "fn a() {}").unwrap();
2105
2106        // Large file (should be skipped)
2107        let large_content = "fn main() {}\n".repeat(10);
2108        fs::write(project_root.join("large.rs"), large_content).unwrap();
2109
2110        let stats = indexer.index(&project_root, false).unwrap();
2111
2112        // Only small file should be indexed
2113        assert_eq!(stats.total_files, 1);
2114    }
2115
2116    #[test]
2117    fn test_index_mixed_languages() {
2118        let temp = TempDir::new().unwrap();
2119        let project_root = temp.path().join("project");
2120        fs::create_dir(&project_root).unwrap();
2121
2122        let cache = CacheManager::new(&project_root);
2123        let config = IndexConfig::default();
2124        let indexer = Indexer::new(cache, config);
2125
2126        // Create files in multiple languages
2127        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2128        fs::write(project_root.join("test.py"), "def test(): pass").unwrap();
2129        fs::write(project_root.join("app.js"), "function main() {}").unwrap();
2130        fs::write(project_root.join("lib.go"), "func main() {}").unwrap();
2131
2132        let stats = indexer.index(&project_root, false).unwrap();
2133
2134        assert_eq!(stats.total_files, 4);
2135        assert!(stats.files_by_language.contains_key("Rust"));
2136        assert!(stats.files_by_language.contains_key("Python"));
2137        assert!(stats.files_by_language.contains_key("JavaScript"));
2138        assert!(stats.files_by_language.contains_key("Go"));
2139    }
2140
2141    #[test]
2142    fn test_index_updates_cache_stats() {
2143        let temp = TempDir::new().unwrap();
2144        let project_root = temp.path().join("project");
2145        fs::create_dir(&project_root).unwrap();
2146
2147        let cache = CacheManager::new(&project_root);
2148        let config = IndexConfig::default();
2149        let indexer = Indexer::new(cache, config);
2150
2151        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2152
2153        indexer.index(&project_root, false).unwrap();
2154
2155        // Verify cache stats were updated
2156        let cache = CacheManager::new(&project_root);
2157        let stats = cache.stats().unwrap();
2158
2159        assert_eq!(stats.total_files, 1);
2160        assert!(stats.index_size_bytes > 0);
2161    }
2162}