Skip to main content

reflex/
indexer.rs

1//! Indexing engine for parsing source code
2//!
3//! The indexer scans the project directory, parses source files using Tree-sitter,
4//! and builds the symbol/token cache for fast querying.
5
6use anyhow::{Context, Result};
7use ignore::WalkBuilder;
8use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
9use rayon::prelude::*;
10use std::collections::HashMap;
11use std::path::{Path, PathBuf};
12use std::sync::atomic::{AtomicU64, Ordering};
13use std::sync::{Arc, Mutex};
14use std::time::Instant;
15
16use crate::cache::CacheManager;
17use crate::content_store::{ContentReader, ContentWriter};
18use crate::dependency::DependencyIndex;
19use crate::models::{Dependency, IndexConfig, IndexStats, Language, ImportType};
20use crate::output;
21use crate::parsers::{DependencyExtractor, ImportInfo, ExportInfo};
22use crate::parsers::rust::RustDependencyExtractor;
23use crate::parsers::python::PythonDependencyExtractor;
24use crate::parsers::typescript::TypeScriptDependencyExtractor;
25use crate::parsers::go::GoDependencyExtractor;
26use crate::parsers::java::JavaDependencyExtractor;
27use crate::parsers::c::CDependencyExtractor;
28use crate::parsers::cpp::CppDependencyExtractor;
29use crate::parsers::csharp::CSharpDependencyExtractor;
30use crate::parsers::php::PhpDependencyExtractor;
31use crate::parsers::ruby::RubyDependencyExtractor;
32use crate::parsers::kotlin::KotlinDependencyExtractor;
33use crate::parsers::zig::ZigDependencyExtractor;
34use crate::parsers::vue::VueDependencyExtractor;
35use crate::parsers::svelte::SvelteDependencyExtractor;
36use crate::trigram::TrigramIndex;
37
38/// Progress callback type: (current_file_count, total_file_count, status_message)
39/// Uses Arc to allow cloning for multi-threaded progress updates
40pub type ProgressCallback = Arc<dyn Fn(usize, usize, String) + Send + Sync>;
41
42/// Result of processing a single file (used for parallel processing)
43struct FileProcessingResult {
44    path: PathBuf,
45    path_str: String,
46    hash: String,
47    content: String,
48    language: Language,
49    line_count: usize,
50    dependencies: Vec<ImportInfo>,
51    exports: Vec<ExportInfo>,
52}
53
54/// Find the nearest tsconfig.json for a given source file
55///
56/// Walks up the directory tree from the source file to find the nearest tsconfig directory.
57/// Returns a reference to the PathAliasMap if found.
58fn find_nearest_tsconfig<'a>(
59    file_path: &str,
60    root: &Path,
61    tsconfigs: &'a HashMap<PathBuf, crate::parsers::tsconfig::PathAliasMap>,
62) -> Option<&'a crate::parsers::tsconfig::PathAliasMap> {
63    // Convert file_path to absolute path (relative to root)
64    let abs_file_path = if Path::new(file_path).is_absolute() {
65        PathBuf::from(file_path)
66    } else {
67        root.join(file_path)
68    };
69
70    // Start from the file's directory and walk up
71    let mut current_dir = abs_file_path.parent()?;
72
73    loop {
74        // Check if we have a tsconfig for this directory
75        if let Some(alias_map) = tsconfigs.get(current_dir) {
76            return Some(alias_map);
77        }
78
79        // Move up one directory
80        current_dir = current_dir.parent()?;
81
82        // Stop if we've reached the root
83        if current_dir == root || !current_dir.starts_with(root) {
84            break;
85        }
86    }
87
88    None
89}
90
91/// Manages the indexing process
92pub struct Indexer {
93    cache: CacheManager,
94    config: IndexConfig,
95}
96
97impl Indexer {
98    /// Create a new indexer with the given cache manager and config
99    pub fn new(cache: CacheManager, config: IndexConfig) -> Self {
100        Self { cache, config }
101    }
102
103    /// Build or update the index for the given root directory
104    pub fn index(&self, root: impl AsRef<Path>, show_progress: bool) -> Result<IndexStats> {
105        self.index_with_callback(root, show_progress, None)
106    }
107
108    /// Build or update the index with progress callback support
109    pub fn index_with_callback(
110        &self,
111        root: impl AsRef<Path>,
112        show_progress: bool,
113        progress_callback: Option<ProgressCallback>,
114    ) -> Result<IndexStats> {
115        let root = root.as_ref();
116        log::info!("Indexing directory: {:?}", root);
117
118        // Get git state (if in git repo)
119        let git_state = crate::git::get_git_state_optional(root)?;
120        let branch = git_state
121            .as_ref()
122            .map(|s| s.branch.clone())
123            .unwrap_or_else(|| "_default".to_string());
124
125        if let Some(ref state) = git_state {
126            log::info!(
127                "Git state: branch='{}', commit='{}', dirty={}",
128                state.branch,
129                state.commit,
130                state.dirty
131            );
132        } else {
133            log::info!("Not a git repository, using default branch");
134        }
135
136        // Configure thread pool for parallel processing
137        // 0 = auto (use 80% of available cores to avoid locking the system)
138        let num_threads = if self.config.parallel_threads == 0 {
139            let available_cores = std::thread::available_parallelism()
140                .map(|n| n.get())
141                .unwrap_or(4);
142            // Use 80% of available cores (minimum 1, maximum 8)
143            // Cap at 8 to prevent diminishing returns from cache contention on high-core systems
144            ((available_cores as f64 * 0.8).ceil() as usize).max(1).min(8)
145        } else {
146            self.config.parallel_threads
147        };
148
149        log::info!("Using {} threads for parallel indexing (out of {} available)",
150                   num_threads,
151                   std::thread::available_parallelism().map(|n| n.get()).unwrap_or(4));
152
153        // Ensure cache is initialized
154        self.cache.init()?;
155
156        // Check available disk space after cache is initialized
157        self.check_disk_space(root)?;
158
159        // Load existing hashes for incremental indexing (for current branch)
160        let existing_hashes = self.cache.load_hashes_for_branch(&branch)?;
161        log::debug!("Loaded {} existing file hashes for branch '{}'", existing_hashes.len(), branch);
162
163        // Step 1: Walk directory tree and collect files
164        let files = self.discover_files(root)?;
165        let total_files = files.len();
166        log::info!("Discovered {} files to index", total_files);
167
168        // Step 1.4: Parse tsconfig.json files for TypeScript/Vue path alias resolution
169        // Must be done before parallel processing so it's available during dependency extraction
170        let tsconfigs = crate::parsers::tsconfig::parse_all_tsconfigs(root)
171            .unwrap_or_else(|e| {
172                log::warn!("Failed to parse tsconfig.json files: {}", e);
173                HashMap::new()
174            });
175        if !tsconfigs.is_empty() {
176            log::info!("Found {} tsconfig.json files", tsconfigs.len());
177            for (config_dir, alias_map) in &tsconfigs {
178                log::debug!("  {} (base_url: {:?}, {} aliases)",
179                           config_dir.display(),
180                           alias_map.base_url,
181                           alias_map.aliases.len());
182            }
183        }
184
185        // Step 1.5: Quick incremental check - are all files unchanged?
186        // If yes, skip expensive rebuild entirely and return cached stats
187        if !existing_hashes.is_empty() && total_files == existing_hashes.len() {
188            // Same number of files - check if any changed by comparing hashes
189            let mut any_changed = false;
190
191            for file_path in &files {
192                // Normalize path to be relative to root (handles both ./ prefix and absolute paths)
193                let path_str = file_path.to_string_lossy().to_string();
194                let normalized_path = if let Ok(rel_path) = file_path.strip_prefix(root) {
195                    // Convert absolute path to relative
196                    rel_path.to_string_lossy().to_string()
197                } else {
198                    // Already relative, just strip ./ prefix
199                    path_str.trim_start_matches("./").to_string()
200                };
201
202                // Check if file exists in cache
203                if let Some(existing_hash) = existing_hashes.get(&normalized_path) {
204                    // Read and hash file to check if changed
205                    match std::fs::read_to_string(file_path) {
206                        Ok(content) => {
207                            let current_hash = self.hash_content(content.as_bytes());
208                            if &current_hash != existing_hash {
209                                any_changed = true;
210                                log::debug!("File changed: {}", path_str);
211                                break; // Early exit - we know we need to rebuild
212                            }
213                        }
214                        Err(_) => {
215                            any_changed = true;
216                            break;
217                        }
218                    }
219                } else {
220                    // File not in cache - something changed
221                    any_changed = true;
222                    break;
223                }
224            }
225
226            if !any_changed {
227                let content_path = self.cache.path().join("content.bin");
228                let trigrams_path = self.cache.path().join("trigrams.bin");
229
230                // Check if schema hash matches - if not, we need a full rebuild
231                // even though file contents haven't changed (binary format may differ)
232                let schema_ok = self.cache.check_schema_hash().unwrap_or(false);
233
234                if schema_ok && content_path.exists() && trigrams_path.exists() {
235                    if let Ok(reader) = ContentReader::open(&content_path) {
236                        if reader.file_count() > 0 {
237                            log::info!("No files changed - skipping index rebuild");
238                            return Ok(self.cache.stats()?);
239                        }
240                    }
241                    log::warn!("content.bin invalid despite hashes matching - forcing rebuild");
242                } else if !schema_ok {
243                    log::info!("Schema hash changed - forcing full rebuild");
244                } else {
245                    log::warn!("Binary index files missing - forcing rebuild");
246                }
247            }
248        } else if total_files != existing_hashes.len() {
249            log::info!("File count changed ({} -> {}) - full reindex required",
250                       existing_hashes.len(), total_files);
251        }
252
253        // Step 2: Build trigram index + content store
254        let mut new_hashes = HashMap::new();
255        let mut files_indexed = 0;
256        let mut file_metadata: Vec<(String, String, String, usize)> = Vec::new(); // For batch SQLite update
257        let mut all_dependencies: Vec<(String, Vec<ImportInfo>)> = Vec::new(); // For batch dependency insertion
258        let mut all_exports: Vec<(String, Vec<ExportInfo>)> = Vec::new(); // For batch export insertion
259
260        // Initialize trigram index and content store
261        let mut trigram_index = TrigramIndex::new();
262        let mut content_writer = ContentWriter::new();
263
264        // Enable batch-flush mode for trigram index if we have lots of files
265        if total_files > 10000 {
266            let temp_dir = self.cache.path().join("trigram_temp");
267            trigram_index.enable_batch_flush(temp_dir)
268                .context("Failed to enable batch-flush mode for trigram index")?;
269            log::info!("Enabled batch-flush mode for {} files", total_files);
270        }
271
272        // Initialize content writer to start streaming writes immediately
273        let content_path = self.cache.path().join("content.bin");
274        content_writer.init(content_path.clone())
275            .context("Failed to initialize content writer")?;
276
277        // Create progress bar (only if requested via --progress flag)
278        let pb = if show_progress {
279            let pb = ProgressBar::new(total_files as u64);
280            pb.set_draw_target(ProgressDrawTarget::stderr());
281            pb.set_style(
282                ProgressStyle::default_bar()
283                    .template("[{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} files ({percent}%) {msg}")
284                    .unwrap()
285                    .progress_chars("=>-")
286            );
287            // Force updates every 100ms to ensure progress is visible
288            pb.enable_steady_tick(std::time::Duration::from_millis(100));
289            pb
290        } else {
291            ProgressBar::hidden()
292        };
293
294        // Atomic counter for thread-safe progress updates
295        let progress_counter = Arc::new(AtomicU64::new(0));
296        // Shared status message for progress callback
297        let progress_status = Arc::new(Mutex::new("Indexing files...".to_string()));
298
299        let _start_time = Instant::now();
300
301        // Spawn a background thread to update progress bar and call callback during parallel processing
302        let counter_for_thread = Arc::clone(&progress_counter);
303        let status_for_thread = Arc::clone(&progress_status);
304        let pb_clone = pb.clone();
305        let callback_for_thread = progress_callback.clone();
306        let total_files_for_thread = total_files;
307        let progress_thread = if show_progress || callback_for_thread.is_some() {
308            Some(std::thread::spawn(move || {
309                loop {
310                    let count = counter_for_thread.load(Ordering::Relaxed);
311                    pb_clone.set_position(count);
312
313                    // Call progress callback if provided
314                    if let Some(ref callback) = callback_for_thread {
315                        let status = status_for_thread.lock().unwrap().clone();
316                        callback(count as usize, total_files_for_thread, status);
317                    }
318
319                    if count >= total_files_for_thread as u64 {
320                        break;
321                    }
322                    std::thread::sleep(std::time::Duration::from_millis(50));
323                }
324            }))
325        } else {
326            None
327        };
328
329        // Build a custom thread pool with limited threads
330        let pool = rayon::ThreadPoolBuilder::new()
331            .num_threads(num_threads)
332            .build()
333            .context("Failed to create thread pool")?;
334
335        // Process files in batches to avoid OOM on huge codebases
336        // Batch size: process 5000 files at a time to limit memory usage
337        const BATCH_SIZE: usize = 5000;
338        let num_batches = total_files.div_ceil(BATCH_SIZE);
339        log::info!("Processing {} files in {} batches of up to {} files",
340                   total_files, num_batches, BATCH_SIZE);
341
342        for (batch_idx, batch_files) in files.chunks(BATCH_SIZE).enumerate() {
343            log::info!("Processing batch {}/{} ({} files)",
344                       batch_idx + 1, num_batches, batch_files.len());
345
346            // Process files in parallel using rayon with custom thread pool
347            let counter_clone = Arc::clone(&progress_counter);
348            let results: Vec<Option<FileProcessingResult>> = pool.install(|| {
349                batch_files
350                    .par_iter()
351                    .map(|file_path| {
352                // Normalize path to be relative to root (handles both ./ prefix and absolute paths)
353                let path_str = file_path.to_string_lossy().to_string();
354                let normalized_path = if let Ok(rel_path) = file_path.strip_prefix(root) {
355                    // Convert absolute path to relative
356                    rel_path.to_string_lossy().to_string()
357                } else {
358                    // Already relative, just strip ./ prefix
359                    path_str.trim_start_matches("./").to_string()
360                };
361
362                // Read file content once (used for hashing, trigrams, and parsing)
363                let content = match std::fs::read_to_string(&file_path) {
364                    Ok(c) => c,
365                    Err(e) => {
366                        log::warn!("Failed to read {}: {}", path_str, e);
367                        // Update progress
368                        counter_clone.fetch_add(1, Ordering::Relaxed);
369                        return None;
370                    }
371                };
372
373                // Compute hash from content (no duplicate file read!)
374                let hash = self.hash_content(content.as_bytes());
375
376                // Detect language
377                let ext = file_path.extension()
378                    .and_then(|e| e.to_str())
379                    .unwrap_or("");
380                let language = Language::from_extension(ext);
381
382                // Count lines in the file
383                let line_count = content.lines().count();
384
385                // Extract dependencies and exports for supported languages
386                let dependencies = match language {
387                    Language::Rust => {
388                        match RustDependencyExtractor::extract_dependencies(&content) {
389                            Ok(deps) => deps,
390                            Err(e) => {
391                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
392                                Vec::new()
393                            }
394                        }
395                    }
396                    Language::Python => {
397                        match PythonDependencyExtractor::extract_dependencies(&content) {
398                            Ok(deps) => deps,
399                            Err(e) => {
400                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
401                                Vec::new()
402                            }
403                        }
404                    }
405                    Language::TypeScript | Language::JavaScript => {
406                        // Find nearest tsconfig for path alias resolution
407                        let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
408                        match TypeScriptDependencyExtractor::extract_dependencies_with_alias_map(&content, alias_map) {
409                            Ok(deps) => deps,
410                            Err(e) => {
411                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
412                                Vec::new()
413                            }
414                        }
415                    }
416                    Language::Go => {
417                        match GoDependencyExtractor::extract_dependencies(&content) {
418                            Ok(deps) => deps,
419                            Err(e) => {
420                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
421                                Vec::new()
422                            }
423                        }
424                    }
425                    Language::Java => {
426                        match JavaDependencyExtractor::extract_dependencies(&content) {
427                            Ok(deps) => deps,
428                            Err(e) => {
429                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
430                                Vec::new()
431                            }
432                        }
433                    }
434                    Language::C => {
435                        match CDependencyExtractor::extract_dependencies(&content) {
436                            Ok(deps) => deps,
437                            Err(e) => {
438                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
439                                Vec::new()
440                            }
441                        }
442                    }
443                    Language::Cpp => {
444                        match CppDependencyExtractor::extract_dependencies(&content) {
445                            Ok(deps) => deps,
446                            Err(e) => {
447                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
448                                Vec::new()
449                            }
450                        }
451                    }
452                    Language::CSharp => {
453                        match CSharpDependencyExtractor::extract_dependencies(&content) {
454                            Ok(deps) => deps,
455                            Err(e) => {
456                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
457                                Vec::new()
458                            }
459                        }
460                    }
461                    Language::PHP => {
462                        match PhpDependencyExtractor::extract_dependencies(&content) {
463                            Ok(deps) => deps,
464                            Err(e) => {
465                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
466                                Vec::new()
467                            }
468                        }
469                    }
470                    Language::Ruby => {
471                        match RubyDependencyExtractor::extract_dependencies(&content) {
472                            Ok(deps) => deps,
473                            Err(e) => {
474                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
475                                Vec::new()
476                            }
477                        }
478                    }
479                    Language::Kotlin => {
480                        match KotlinDependencyExtractor::extract_dependencies(&content) {
481                            Ok(deps) => deps,
482                            Err(e) => {
483                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
484                                Vec::new()
485                            }
486                        }
487                    }
488                    Language::Zig => {
489                        match ZigDependencyExtractor::extract_dependencies(&content) {
490                            Ok(deps) => deps,
491                            Err(e) => {
492                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
493                                Vec::new()
494                            }
495                        }
496                    }
497                    Language::Vue => {
498                        // Find nearest tsconfig for path alias resolution
499                        let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
500                        match VueDependencyExtractor::extract_dependencies_with_alias_map(&content, alias_map) {
501                            Ok(deps) => deps,
502                            Err(e) => {
503                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
504                                Vec::new()
505                            }
506                        }
507                    }
508                    Language::Svelte => {
509                        match SvelteDependencyExtractor::extract_dependencies(&content) {
510                            Ok(deps) => deps,
511                            Err(e) => {
512                                log::warn!("Failed to extract dependencies from {}: {}", path_str, e);
513                                Vec::new()
514                            }
515                        }
516                    }
517                    // Other languages not yet implemented
518                    _ => Vec::new(),
519                };
520
521                // Extract exports (for barrel export tracking)
522                let exports = match language {
523                    Language::TypeScript | Language::JavaScript => {
524                        // Find nearest tsconfig for path alias resolution
525                        let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
526                        match TypeScriptDependencyExtractor::extract_export_declarations(&content, alias_map) {
527                            Ok(exports) => exports,
528                            Err(e) => {
529                                log::warn!("Failed to extract exports from {}: {}", path_str, e);
530                                Vec::new()
531                            }
532                        }
533                    }
534                    Language::Vue => {
535                        // Find nearest tsconfig for path alias resolution
536                        let alias_map = find_nearest_tsconfig(&path_str, root, &tsconfigs);
537                        match VueDependencyExtractor::extract_export_declarations(&content, alias_map) {
538                            Ok(exports) => exports,
539                            Err(e) => {
540                                log::warn!("Failed to extract exports from {}: {}", path_str, e);
541                                Vec::new()
542                            }
543                        }
544                    }
545                    // Other languages not yet implemented for export tracking
546                    _ => Vec::new(),
547                };
548
549                // Update progress atomically
550                counter_clone.fetch_add(1, Ordering::Relaxed);
551
552                Some(FileProcessingResult {
553                    path: file_path.clone(),
554                    path_str: normalized_path.to_string(),
555                    hash,
556                    content,
557                    language,
558                    line_count,
559                    dependencies,
560                    exports,
561                })
562                })
563                .collect()
564            });
565
566            // Process batch results immediately (streaming approach to minimize memory)
567            for result in results.into_iter().flatten() {
568                // Add file to trigram index (get file_id)
569                let file_id = trigram_index.add_file(result.path.clone());
570
571                // Index file content directly (avoid accumulating all trigrams)
572                trigram_index.index_file(file_id, &result.content);
573
574                // Add to content store
575                content_writer.add_file(result.path.clone(), &result.content);
576
577                files_indexed += 1;
578
579                // Prepare file metadata for batch database update
580                file_metadata.push((
581                    result.path_str.clone(),
582                    result.hash.clone(),
583                    format!("{:?}", result.language),
584                    result.line_count
585                ));
586
587                // Collect dependencies for batch insertion (if any)
588                if !result.dependencies.is_empty() {
589                    all_dependencies.push((result.path_str.clone(), result.dependencies));
590                }
591
592                // Collect exports for batch insertion (if any)
593                if !result.exports.is_empty() {
594                    all_exports.push((result.path_str.clone(), result.exports));
595                }
596
597                new_hashes.insert(result.path_str, result.hash);
598            }
599
600            // Flush trigram index batch to disk if batch-flush mode is enabled
601            if total_files > 10000 {
602                let flush_msg = format!("Flushing batch {}/{}...", batch_idx + 1, num_batches);
603                if show_progress {
604                    pb.set_message(flush_msg.clone());
605                }
606                *progress_status.lock().unwrap() = flush_msg;
607                trigram_index.flush_batch()
608                    .context("Failed to flush trigram batch")?;
609            }
610        }
611
612        // Wait for progress thread to finish
613        if let Some(thread) = progress_thread {
614            let _ = thread.join();
615        }
616
617        // Update progress bar to final count
618        if show_progress {
619            let final_count = progress_counter.load(Ordering::Relaxed);
620            pb.set_position(final_count);
621        }
622
623        // Finalize trigram index (sort and deduplicate posting lists)
624        *progress_status.lock().unwrap() = "Finalizing trigram index...".to_string();
625        if show_progress {
626            pb.set_message("Finalizing trigram index...".to_string());
627        }
628        trigram_index.finalize();
629
630        // Update progress bar message for post-processing
631        *progress_status.lock().unwrap() = "Writing file metadata to database...".to_string();
632        if show_progress {
633            pb.set_message("Writing file metadata to database...".to_string());
634        }
635
636        // Batch write file metadata AND branch hashes in a SINGLE atomic transaction
637        // This ensures that if files are inserted, their hashes are guaranteed to be inserted too
638        if !file_metadata.is_empty() {
639            // Prepare files data (path, language, line_count)
640            let files_without_hash: Vec<(String, String, usize)> = file_metadata
641                .iter()
642                .map(|(path, _hash, lang, lines)| (path.clone(), lang.clone(), *lines))
643                .collect();
644
645        // Record files for this branch (for branch-aware indexing)
646        *progress_status.lock().unwrap() = "Recording branch files...".to_string();
647        if show_progress {
648            pb.set_message("Recording branch files...".to_string());
649        }
650
651            // Prepare branch files data (path, hash)
652            let branch_files: Vec<(String, String)> = file_metadata
653                .iter()
654                .map(|(path, hash, _, _)| (path.clone(), hash.clone()))
655                .collect();
656
657            // Use atomic method that combines both operations
658            self.cache.batch_update_files_and_branch(
659                &files_without_hash,
660                &branch_files,
661                &branch,
662                git_state.as_ref().map(|s| s.commit.as_str()),
663            ).context("Failed to batch update files and branch hashes")?;
664
665            log::info!("Wrote metadata and hashes for {} files to database", file_metadata.len());
666        }
667
668        // Update branch metadata
669        self.cache.update_branch_metadata(
670            &branch,
671            git_state.as_ref().map(|s| s.commit.as_str()),
672            file_metadata.len(),
673            git_state.as_ref().map(|s| s.dirty).unwrap_or(false),
674        )?;
675
676        // Force WAL checkpoint to ensure background processes see all committed data
677        // This is critical when spawning background symbol indexer immediately after
678        self.cache.checkpoint_wal()
679            .context("Failed to checkpoint WAL")?;
680        log::debug!("WAL checkpoint completed - database is fully synced");
681
682        // Step 2.5: Insert dependencies (after files are inserted and have IDs)
683        if !all_dependencies.is_empty() {
684            *progress_status.lock().unwrap() = "Extracting dependencies...".to_string();
685            if show_progress {
686                pb.set_message("Extracting dependencies...".to_string());
687            }
688
689            // Find and parse all go.mod files for Go projects (monorepo support)
690            let go_modules = crate::parsers::go::parse_all_go_modules(root)
691                .unwrap_or_else(|e| {
692                    log::warn!("Failed to parse go.mod files: {}", e);
693                    Vec::new()
694                });
695            if !go_modules.is_empty() {
696                log::info!("Found {} Go modules", go_modules.len());
697                for module in &go_modules {
698                    log::debug!("  {} (project: {})", module.name, module.project_root);
699                }
700            }
701
702            // Find and parse all pom.xml/build.gradle files for Java projects (monorepo support)
703            let java_projects = crate::parsers::java::parse_all_java_projects(root)
704                .unwrap_or_else(|e| {
705                    log::warn!("Failed to parse Java project configs: {}", e);
706                    Vec::new()
707                });
708            if !java_projects.is_empty() {
709                log::info!("Found {} Java projects", java_projects.len());
710                for project in &java_projects {
711                    log::debug!("  {} (project: {})", project.package_name, project.project_root);
712                }
713            }
714
715            // Find and parse all Python package configs for Python projects (monorepo support)
716            let python_packages = crate::parsers::python::parse_all_python_packages(root)
717                .unwrap_or_else(|e| {
718                    log::warn!("Failed to parse Python package configs: {}", e);
719                    Vec::new()
720                });
721            if !python_packages.is_empty() {
722                log::info!("Found {} Python packages", python_packages.len());
723                for package in &python_packages {
724                    log::debug!("  {} (project: {})", package.name, package.project_root);
725                }
726            }
727
728            // Find and parse *.gemspec files for Ruby projects (monorepo support)
729            let ruby_projects = crate::parsers::ruby::parse_all_ruby_projects(root)
730                .unwrap_or_else(|e| {
731                    log::warn!("Failed to parse Ruby project configs: {}", e);
732                    Vec::new()
733                });
734            if !ruby_projects.is_empty() {
735                log::info!("Found {} Ruby projects", ruby_projects.len());
736                for project in &ruby_projects {
737                    log::debug!("  {} (project: {})", project.gem_name, project.project_root);
738                }
739            }
740
741            // Find and parse all Cargo.toml files for Rust workspace support
742            let rust_crates = crate::parsers::rust::parse_all_rust_crates(root)
743                .unwrap_or_else(|e| {
744                    log::warn!("Failed to parse Cargo.toml files: {}", e);
745                    Vec::new()
746                });
747            if !rust_crates.is_empty() {
748                log::info!("Found {} Rust workspace crates", rust_crates.len());
749                for krate in &rust_crates {
750                    log::debug!("  {} (root: {})", krate.name, krate.root_path.display());
751                }
752            }
753
754            // Note: Kotlin projects use the same java_projects above (same build systems: Maven/Gradle)
755
756            // Find and parse all composer.json files for PHP projects (monorepo support)
757            let php_psr4_mappings = crate::parsers::php::parse_all_composer_psr4(root)
758                .unwrap_or_else(|e| {
759                    log::warn!("Failed to parse composer.json files: {}", e);
760                    Vec::new()
761                });
762            if !php_psr4_mappings.is_empty() {
763                log::info!("Found {} PSR-4 mappings from composer.json files", php_psr4_mappings.len());
764                for mapping in &php_psr4_mappings {
765                    log::debug!("  {} => {} (project: {})", mapping.namespace_prefix, mapping.directory, mapping.project_root);
766                }
767            }
768
769            // Find and parse all tsconfig.json files for TypeScript/Vue projects (monorepo support)
770            let tsconfigs = crate::parsers::tsconfig::parse_all_tsconfigs(root)
771                .unwrap_or_else(|e| {
772                    log::warn!("Failed to parse tsconfig.json files: {}", e);
773                    HashMap::new()
774                });
775            if !tsconfigs.is_empty() {
776                log::info!("Found {} tsconfig.json files", tsconfigs.len());
777                for (config_dir, alias_map) in &tsconfigs {
778                    log::debug!("  {} (base_url: {:?}, {} aliases)",
779                               config_dir.display(),
780                               alias_map.base_url,
781                               alias_map.aliases.len());
782                }
783            }
784
785            // Create dependency index to resolve paths and insert dependencies
786            let cache_for_deps = CacheManager::new(root);
787            let dep_index = DependencyIndex::new(cache_for_deps);
788
789            let mut total_deps_inserted = 0;
790
791            // Process each file's dependencies
792            for (file_path, import_infos) in all_dependencies {
793                // Get file ID from database
794                let file_id = match dep_index.get_file_id_by_path(&file_path)? {
795                    Some(id) => id,
796                    None => {
797                        log::warn!("File not found in database (skipping dependencies): {}", file_path);
798                        continue;
799                    }
800                };
801
802                // Reclassify and filter dependencies
803                let mut resolved_deps = Vec::new();
804
805                for mut import_info in import_infos {
806                    // Reclassify Go imports using module names (if Go project)
807                    if file_path.ends_with(".go") {
808                        // Check if the import matches any Go module
809                        let mut reclassified = false;
810                        for module in &go_modules {
811                            import_info.import_type = crate::parsers::go::reclassify_go_import(
812                                &import_info.imported_path,
813                                Some(&module.name),
814                            );
815                            // If it's internal, we've found the right module
816                            if matches!(import_info.import_type, ImportType::Internal) {
817                                reclassified = true;
818                                break;
819                            }
820                        }
821                        // If no module matched, use base classification
822                        if !reclassified {
823                            import_info.import_type = crate::parsers::go::reclassify_go_import(
824                                &import_info.imported_path,
825                                None,
826                            );
827                        }
828                    }
829
830                    // Reclassify Java imports using package names (if Java project)
831                    if file_path.ends_with(".java") {
832                        // Check if the import matches any Java project
833                        let mut reclassified = false;
834                        for project in &java_projects {
835                            import_info.import_type = crate::parsers::java::reclassify_java_import(
836                                &import_info.imported_path,
837                                Some(&project.package_name),
838                            );
839                            // If it's internal, we've found the right project
840                            if matches!(import_info.import_type, ImportType::Internal) {
841                                reclassified = true;
842                                break;
843                            }
844                        }
845                        // If no project matched, use base classification
846                        if !reclassified {
847                            import_info.import_type = crate::parsers::java::reclassify_java_import(
848                                &import_info.imported_path,
849                                None,
850                            );
851                        }
852                    }
853
854                    // Reclassify Python imports using package names (if Python project)
855                    if file_path.ends_with(".py") {
856                        // Check if the import matches any Python package
857                        let mut reclassified = false;
858                        for package in &python_packages {
859                            import_info.import_type = crate::parsers::python::reclassify_python_import(
860                                &import_info.imported_path,
861                                Some(&package.name),
862                            );
863                            // If it's internal, we've found the right package
864                            if matches!(import_info.import_type, ImportType::Internal) {
865                                reclassified = true;
866                                break;
867                            }
868                        }
869                        // If no package matched, use base classification
870                        if !reclassified {
871                            import_info.import_type = crate::parsers::python::reclassify_python_import(
872                                &import_info.imported_path,
873                                None,
874                            );
875                        }
876                    }
877
878                    // Reclassify Ruby imports using gem names (if Ruby project)
879                    if file_path.ends_with(".rb") || file_path.ends_with(".rake") || file_path.ends_with(".gemspec") {
880                        // Check if the import matches any Ruby project
881                        let mut reclassified = false;
882                        for project in &ruby_projects {
883                            let gem_names = vec![project.gem_name.clone()];
884                            import_info.import_type = crate::parsers::ruby::reclassify_ruby_import(
885                                &import_info.imported_path,
886                                &gem_names,
887                            );
888                            // If it's internal, we've found the right project
889                            if matches!(import_info.import_type, ImportType::Internal) {
890                                reclassified = true;
891                                break;
892                            }
893                        }
894                        // If no project matched, use base classification (will be External or Stdlib)
895                        if !reclassified {
896                            import_info.import_type = crate::parsers::ruby::reclassify_ruby_import(
897                                &import_info.imported_path,
898                                &[],
899                            );
900                        }
901                    }
902
903                    // Reclassify Kotlin imports using package names (if Kotlin project)
904                    if file_path.ends_with(".kt") || file_path.ends_with(".kts") {
905                        // Check if the import matches any Java/Kotlin project (same build systems)
906                        let mut reclassified = false;
907                        for project in &java_projects {
908                            import_info.import_type = crate::parsers::kotlin::reclassify_kotlin_import(
909                                &import_info.imported_path,
910                                Some(&project.package_name),
911                            );
912                            // If it's internal, we've found the right project
913                            if matches!(import_info.import_type, ImportType::Internal) {
914                                reclassified = true;
915                                break;
916                            }
917                        }
918                        // If no project matched, use base classification
919                        if !reclassified {
920                            import_info.import_type = crate::parsers::kotlin::reclassify_kotlin_import(
921                                &import_info.imported_path,
922                                None,
923                            );
924                        }
925                    }
926
927                    // Reclassify Rust imports using workspace crates
928                    if file_path.ends_with(".rs") && !rust_crates.is_empty() {
929                        let new_type = crate::parsers::rust::reclassify_rust_import(
930                            &import_info.imported_path,
931                            &rust_crates,
932                        );
933                        if matches!(new_type, ImportType::Internal) {
934                            import_info.import_type = new_type;
935                        }
936                    }
937
938                    // ONLY insert Internal dependencies - skip External and Stdlib
939                    if !matches!(import_info.import_type, ImportType::Internal) {
940                        continue;
941                    }
942
943                    // Resolve PHP dependencies using PSR-4 (deterministic)
944                    let resolved_file_id = if file_path.ends_with(".php") && !php_psr4_mappings.is_empty() {
945                        // Use PSR-4 to resolve namespace to file path
946                        if let Some(resolved_path) = crate::parsers::php::resolve_php_namespace_to_path(
947                            &import_info.imported_path,
948                            &php_psr4_mappings,
949                        ) {
950                            // Look up file ID in database using exact match
951                            match dep_index.get_file_id_by_path(&resolved_path) {
952                                Ok(Some(id)) => {
953                                    log::trace!("Resolved PHP dependency: {} -> {} (file_id={})",
954                                               import_info.imported_path, resolved_path, id);
955                                    Some(id)
956                                }
957                                Ok(None) => {
958                                    log::trace!("PHP dependency resolved to path but file not in index: {} -> {}",
959                                               import_info.imported_path, resolved_path);
960                                    None
961                                }
962                                Err(e) => {
963                                    log::debug!("Skipping PHP dependency resolution for '{}': {}", resolved_path, e);
964                                    None
965                                }
966                            }
967                        } else {
968                            log::trace!("Could not resolve PHP namespace using PSR-4: {}",
969                                       import_info.imported_path);
970                            None
971                        }
972                    } else if file_path.ends_with(".py") && !python_packages.is_empty() {
973                        // Resolve Python dependencies using package mappings
974                        if let Some(resolved_path) = crate::parsers::python::resolve_python_import_to_path(
975                            &import_info.imported_path,
976                            &python_packages,
977                            Some(&file_path),
978                        ) {
979                            // Look up file ID in database using exact match
980                            match dep_index.get_file_id_by_path(&resolved_path) {
981                                Ok(Some(id)) => {
982                                    log::trace!("Resolved Python dependency: {} -> {} (file_id={})",
983                                               import_info.imported_path, resolved_path, id);
984                                    Some(id)
985                                }
986                                Ok(None) => {
987                                    log::trace!("Python dependency resolved to path but file not in index: {} -> {}",
988                                               import_info.imported_path, resolved_path);
989                                    None
990                                }
991                                Err(e) => {
992                                    log::debug!("Skipping Python dependency resolution for '{}': {}", resolved_path, e);
993                                    None
994                                }
995                            }
996                        } else {
997                            log::trace!("Could not resolve Python import: {}", import_info.imported_path);
998                            None
999                        }
1000                    } else if file_path.ends_with(".go") && !go_modules.is_empty() {
1001                        // Resolve Go dependencies using module mappings
1002                        if let Some(resolved_path) = crate::parsers::go::resolve_go_import_to_path(
1003                            &import_info.imported_path,
1004                            &go_modules,
1005                            Some(&file_path),
1006                        ) {
1007                            // Look up file ID in database using exact match
1008                            match dep_index.get_file_id_by_path(&resolved_path) {
1009                                Ok(Some(id)) => {
1010                                    log::trace!("Resolved Go dependency: {} -> {} (file_id={})",
1011                                               import_info.imported_path, resolved_path, id);
1012                                    Some(id)
1013                                }
1014                                Ok(None) => {
1015                                    log::trace!("Go dependency resolved to path but file not in index: {} -> {}",
1016                                               import_info.imported_path, resolved_path);
1017                                    None
1018                                }
1019                                Err(e) => {
1020                                    log::debug!("Skipping Go dependency resolution for '{}': {}", resolved_path, e);
1021                                    None
1022                                }
1023                            }
1024                        } else {
1025                            log::trace!("Could not resolve Go import: {}", import_info.imported_path);
1026                            None
1027                        }
1028                    } else if file_path.ends_with(".ts") || file_path.ends_with(".tsx")
1029                            || file_path.ends_with(".js") || file_path.ends_with(".jsx")
1030                            || file_path.ends_with(".mts") || file_path.ends_with(".cts")
1031                            || file_path.ends_with(".mjs") || file_path.ends_with(".cjs") {
1032                        // Resolve TypeScript/JavaScript dependencies (relative imports and path aliases)
1033                        let alias_map = find_nearest_tsconfig(&file_path, root, &tsconfigs);
1034                        if let Some(candidates_str) = crate::parsers::typescript::resolve_ts_import_to_path(
1035                            &import_info.imported_path,
1036                            Some(&file_path),
1037                            alias_map,
1038                        ) {
1039                            // Parse pipe-delimited candidates (e.g., "path.tsx|path.ts|path.jsx|path.js")
1040                            let candidates: Vec<&str> = candidates_str.split('|').collect();
1041
1042                            // Try each candidate in order until we find one in the database
1043                            let mut resolved_id = None;
1044                            for candidate_path in candidates {
1045                                // Normalize path to be relative to project root
1046                                // Convert absolute paths to relative (without requiring file to exist)
1047                                let normalized_candidate = if let Ok(rel_path) = std::path::Path::new(candidate_path).strip_prefix(root) {
1048                                    rel_path.to_string_lossy().to_string()
1049                                } else {
1050                                    // Not an absolute path or not under root - use as-is
1051                                    candidate_path.to_string()
1052                                };
1053
1054                                log::debug!("Looking up TS/JS candidate: '{}' (from '{}')", normalized_candidate, candidate_path);
1055                                match dep_index.get_file_id_by_path(&normalized_candidate) {
1056                                    Ok(Some(id)) => {
1057                                        log::debug!("Resolved TS/JS dependency: {} -> {} (file_id={})",
1058                                                   import_info.imported_path, normalized_candidate, id);
1059                                        resolved_id = Some(id);
1060                                        break; // Found a match, stop trying
1061                                    }
1062                                    Ok(None) => {
1063                                        log::trace!("TS/JS candidate not in index: {}", candidate_path);
1064                                    }
1065                                    Err(e) => {
1066                                        log::debug!("Skipping TS/JS dependency resolution for '{}': {}", normalized_candidate, e);
1067                                    }
1068                                }
1069                            }
1070
1071                            if resolved_id.is_none() {
1072                                log::trace!("TS/JS dependency: no matching file found in database for any candidate: {}",
1073                                           candidates_str);
1074                            }
1075
1076                            resolved_id
1077                        } else {
1078                            log::trace!("Could not resolve TS/JS import (non-relative or external): {}", import_info.imported_path);
1079                            None
1080                        }
1081                    } else if file_path.ends_with(".rs") {
1082                        // Resolve Rust dependencies (crate::, super::, self::, mod declarations)
1083                        // Falls back to workspace resolution for cross-crate imports
1084                        let resolved_path_opt = crate::parsers::rust::resolve_rust_use_to_path(
1085                            &import_info.imported_path,
1086                            Some(&file_path),
1087                            Some(root.to_str().unwrap_or("")),
1088                        ).or_else(|| {
1089                            crate::parsers::rust::resolve_rust_workspace_path(
1090                                &import_info.imported_path,
1091                                &rust_crates,
1092                            )
1093                        });
1094
1095                        if let Some(resolved_path) = resolved_path_opt {
1096                            // Look up file ID in database using exact match
1097                            match dep_index.get_file_id_by_path(&resolved_path) {
1098                                Ok(Some(id)) => {
1099                                    log::trace!("Resolved Rust dependency: {} -> {} (file_id={})",
1100                                               import_info.imported_path, resolved_path, id);
1101                                    Some(id)
1102                                }
1103                                Ok(None) => {
1104                                    log::trace!("Rust dependency resolved to path but file not in index: {} -> {}",
1105                                               import_info.imported_path, resolved_path);
1106                                    None
1107                                }
1108                                Err(e) => {
1109                                    log::debug!("Skipping Rust dependency resolution for '{}': {}", resolved_path, e);
1110                                    None
1111                                }
1112                            }
1113                        } else {
1114                            log::trace!("Could not resolve Rust import (external or stdlib): {}", import_info.imported_path);
1115                            None
1116                        }
1117                    } else if file_path.ends_with(".java") && !java_projects.is_empty() {
1118                        // Resolve Java dependencies using project mappings
1119                        if let Some(resolved_path) = crate::parsers::java::resolve_java_import_to_path(
1120                            &import_info.imported_path,
1121                            &java_projects,
1122                            Some(&file_path),
1123                        ) {
1124                            // Look up file ID in database using exact match
1125                            match dep_index.get_file_id_by_path(&resolved_path) {
1126                                Ok(Some(id)) => {
1127                                    log::trace!("Resolved Java dependency: {} -> {} (file_id={})",
1128                                               import_info.imported_path, resolved_path, id);
1129                                    Some(id)
1130                                }
1131                                Ok(None) => {
1132                                    log::trace!("Java dependency resolved to path but file not in index: {} -> {}",
1133                                               import_info.imported_path, resolved_path);
1134                                    None
1135                                }
1136                                Err(e) => {
1137                                    log::debug!("Skipping Java dependency resolution for '{}': {}", resolved_path, e);
1138                                    None
1139                                }
1140                            }
1141                        } else {
1142                            log::trace!("Could not resolve Java import: {}", import_info.imported_path);
1143                            None
1144                        }
1145                    } else if (file_path.ends_with(".kt") || file_path.ends_with(".kts")) && !java_projects.is_empty() {
1146                        // Resolve Kotlin dependencies using project mappings (same build systems as Java)
1147                        if let Some(resolved_path) = crate::parsers::java::resolve_kotlin_import_to_path(
1148                            &import_info.imported_path,
1149                            &java_projects,
1150                            Some(&file_path),
1151                        ) {
1152                            // Look up file ID in database using exact match
1153                            match dep_index.get_file_id_by_path(&resolved_path) {
1154                                Ok(Some(id)) => {
1155                                    log::trace!("Resolved Kotlin dependency: {} -> {} (file_id={})",
1156                                               import_info.imported_path, resolved_path, id);
1157                                    Some(id)
1158                                }
1159                                Ok(None) => {
1160                                    log::trace!("Kotlin dependency resolved to path but file not in index: {} -> {}",
1161                                               import_info.imported_path, resolved_path);
1162                                    None
1163                                }
1164                                Err(e) => {
1165                                    log::debug!("Skipping Kotlin dependency resolution for '{}': {}", resolved_path, e);
1166                                    None
1167                                }
1168                            }
1169                        } else {
1170                            log::trace!("Could not resolve Kotlin import: {}", import_info.imported_path);
1171                            None
1172                        }
1173                    } else if (file_path.ends_with(".rb") || file_path.ends_with(".rake") || file_path.ends_with(".gemspec")) && !ruby_projects.is_empty() {
1174                        // Resolve Ruby dependencies using project mappings
1175                        if let Some(resolved_path) = crate::parsers::ruby::resolve_ruby_require_to_path(
1176                            &import_info.imported_path,
1177                            &ruby_projects,
1178                            Some(&file_path),
1179                        ) {
1180                            // Look up file ID in database using exact match
1181                            match dep_index.get_file_id_by_path(&resolved_path) {
1182                                Ok(Some(id)) => {
1183                                    log::trace!("Resolved Ruby dependency: {} -> {} (file_id={})",
1184                                               import_info.imported_path, resolved_path, id);
1185                                    Some(id)
1186                                }
1187                                Ok(None) => {
1188                                    log::trace!("Ruby dependency resolved to path but file not in index: {} -> {}",
1189                                               import_info.imported_path, resolved_path);
1190                                    None
1191                                }
1192                                Err(e) => {
1193                                    log::debug!("Skipping Ruby dependency resolution for '{}': {}", resolved_path, e);
1194                                    None
1195                                }
1196                            }
1197                        } else {
1198                            log::trace!("Could not resolve Ruby require: {}", import_info.imported_path);
1199                            None
1200                        }
1201                    } else if file_path.ends_with(".c") || file_path.ends_with(".h") {
1202                        // Resolve C dependencies (relative #include paths)
1203                        if let Some(resolved_path) = crate::parsers::c::resolve_c_include_to_path(
1204                            &import_info.imported_path,
1205                            Some(&file_path),
1206                        ) {
1207                            // Look up file ID in database using exact match
1208                            match dep_index.get_file_id_by_path(&resolved_path) {
1209                                Ok(Some(id)) => {
1210                                    log::trace!("Resolved C dependency: {} -> {} (file_id={})",
1211                                               import_info.imported_path, resolved_path, id);
1212                                    Some(id)
1213                                }
1214                                Ok(None) => {
1215                                    log::trace!("C dependency resolved to path but file not in index: {} -> {}",
1216                                               import_info.imported_path, resolved_path);
1217                                    None
1218                                }
1219                                Err(e) => {
1220                                    log::debug!("Skipping C dependency resolution for '{}': {}", resolved_path, e);
1221                                    None
1222                                }
1223                            }
1224                        } else {
1225                            log::trace!("Could not resolve C include (system header): {}", import_info.imported_path);
1226                            None
1227                        }
1228                    } else if file_path.ends_with(".cpp") || file_path.ends_with(".cc") || file_path.ends_with(".cxx")
1229                           || file_path.ends_with(".hpp") || file_path.ends_with(".hxx") || file_path.ends_with(".h++")
1230                           || file_path.ends_with(".C") || file_path.ends_with(".H") {
1231                        // Resolve C++ dependencies (relative #include paths)
1232                        if let Some(resolved_path) = crate::parsers::cpp::resolve_cpp_include_to_path(
1233                            &import_info.imported_path,
1234                            Some(&file_path),
1235                        ) {
1236                            // Look up file ID in database using exact match
1237                            match dep_index.get_file_id_by_path(&resolved_path) {
1238                                Ok(Some(id)) => {
1239                                    log::trace!("Resolved C++ dependency: {} -> {} (file_id={})",
1240                                               import_info.imported_path, resolved_path, id);
1241                                    Some(id)
1242                                }
1243                                Ok(None) => {
1244                                    log::trace!("C++ dependency resolved to path but file not in index: {} -> {}",
1245                                               import_info.imported_path, resolved_path);
1246                                    None
1247                                }
1248                                Err(e) => {
1249                                    log::debug!("Skipping C++ dependency resolution for '{}': {}", resolved_path, e);
1250                                    None
1251                                }
1252                            }
1253                        } else {
1254                            log::trace!("Could not resolve C++ include (system header): {}", import_info.imported_path);
1255                            None
1256                        }
1257                    } else if file_path.ends_with(".cs") {
1258                        // Resolve C# dependencies (using namespace-to-path mapping)
1259                        if let Some(resolved_path) = crate::parsers::csharp::resolve_csharp_using_to_path(
1260                            &import_info.imported_path,
1261                            Some(&file_path),
1262                        ) {
1263                            // Look up file ID in database using exact match
1264                            match dep_index.get_file_id_by_path(&resolved_path) {
1265                                Ok(Some(id)) => {
1266                                    log::trace!("Resolved C# dependency: {} -> {} (file_id={})",
1267                                               import_info.imported_path, resolved_path, id);
1268                                    Some(id)
1269                                }
1270                                Ok(None) => {
1271                                    log::trace!("C# dependency resolved to path but file not in index: {} -> {}",
1272                                               import_info.imported_path, resolved_path);
1273                                    None
1274                                }
1275                                Err(e) => {
1276                                    log::debug!("Skipping C# dependency resolution for '{}': {}", resolved_path, e);
1277                                    None
1278                                }
1279                            }
1280                        } else {
1281                            log::trace!("Could not resolve C# using directive: {}", import_info.imported_path);
1282                            None
1283                        }
1284                    } else if file_path.ends_with(".zig") {
1285                        // Resolve Zig dependencies (relative @import paths)
1286                        if let Some(resolved_path) = crate::parsers::zig::resolve_zig_import_to_path(
1287                            &import_info.imported_path,
1288                            Some(&file_path),
1289                        ) {
1290                            // Look up file ID in database using exact match
1291                            match dep_index.get_file_id_by_path(&resolved_path) {
1292                                Ok(Some(id)) => {
1293                                    log::trace!("Resolved Zig dependency: {} -> {} (file_id={})",
1294                                               import_info.imported_path, resolved_path, id);
1295                                    Some(id)
1296                                }
1297                                Ok(None) => {
1298                                    log::trace!("Zig dependency resolved to path but file not in index: {} -> {}",
1299                                               import_info.imported_path, resolved_path);
1300                                    None
1301                                }
1302                                Err(e) => {
1303                                    log::debug!("Skipping Zig dependency resolution for '{}': {}", resolved_path, e);
1304                                    None
1305                                }
1306                            }
1307                        } else {
1308                            log::trace!("Could not resolve Zig import (external or stdlib): {}", import_info.imported_path);
1309                            None
1310                        }
1311                    } else if file_path.ends_with(".vue") || file_path.ends_with(".svelte") {
1312                        // Resolve Vue/Svelte dependencies (use TypeScript/JavaScript resolver for imports in <script> blocks)
1313                        let alias_map = find_nearest_tsconfig(&file_path, root, &tsconfigs);
1314                        if let Some(candidates_str) = crate::parsers::typescript::resolve_ts_import_to_path(
1315                            &import_info.imported_path,
1316                            Some(&file_path),
1317                            alias_map,
1318                        ) {
1319                            // Parse pipe-delimited candidates (e.g., "path.tsx|path.ts|path.jsx|path.js")
1320                            let candidates: Vec<&str> = candidates_str.split('|').collect();
1321
1322                            // Try each candidate in order until we find one in the database
1323                            let mut resolved_id = None;
1324                            for candidate_path in candidates {
1325                                // Normalize path to be relative to project root
1326                                // Convert absolute paths to relative (without requiring file to exist)
1327                                let normalized_candidate = if let Ok(rel_path) = std::path::Path::new(candidate_path).strip_prefix(root) {
1328                                    rel_path.to_string_lossy().to_string()
1329                                } else {
1330                                    // Not an absolute path or not under root - use as-is
1331                                    candidate_path.to_string()
1332                                };
1333
1334                                match dep_index.get_file_id_by_path(&normalized_candidate) {
1335                                    Ok(Some(id)) => {
1336                                        log::trace!("Resolved Vue/Svelte dependency: {} -> {} (file_id={})",
1337                                                   import_info.imported_path, candidate_path, id);
1338                                        resolved_id = Some(id);
1339                                        break; // Found a match, stop trying
1340                                    }
1341                                    Ok(None) => {
1342                                        log::trace!("Vue/Svelte candidate not in index: {}", candidate_path);
1343                                    }
1344                                    Err(e) => {
1345                                        log::debug!("Skipping Vue/Svelte dependency resolution for '{}': {}", normalized_candidate, e);
1346                                    }
1347                                }
1348                            }
1349
1350                            if resolved_id.is_none() {
1351                                log::trace!("Vue/Svelte dependency: no matching file found in database for any candidate: {}",
1352                                           candidates_str);
1353                            }
1354
1355                            resolved_id
1356                        } else {
1357                            log::trace!("Could not resolve Vue/Svelte import (non-relative or external): {}", import_info.imported_path);
1358                            None
1359                        }
1360                    } else {
1361                        None
1362                    };
1363
1364                    // resolved_file_id will be populated using deterministic language-specific resolution
1365                    // All language resolvers have been implemented!
1366                    resolved_deps.push(Dependency {
1367                        file_id,
1368                        imported_path: import_info.imported_path.clone(),
1369                        resolved_file_id,
1370                        import_type: import_info.import_type,
1371                        line_number: import_info.line_number,
1372                        imported_symbols: import_info.imported_symbols.clone(),
1373                    });
1374                }
1375
1376                // Clear existing dependencies for this file (incremental reindex)
1377                dep_index.clear_dependencies(file_id)?;
1378
1379                // Batch insert dependencies
1380                if !resolved_deps.is_empty() {
1381                    dep_index.batch_insert_dependencies(&resolved_deps)?;
1382                    total_deps_inserted += resolved_deps.len();
1383                }
1384            }
1385
1386            log::info!("Extracted {} dependencies", total_deps_inserted);
1387        }
1388
1389        // Step 2.6: Insert exports (after files are inserted and have IDs)
1390        if !all_exports.is_empty() {
1391            *progress_status.lock().unwrap() = "Extracting exports...".to_string();
1392            if show_progress {
1393                pb.set_message("Extracting exports...".to_string());
1394            }
1395
1396            // Reuse the tsconfigs parsed earlier for TypeScript/Vue path alias resolution
1397            let tsconfigs = crate::parsers::tsconfig::parse_all_tsconfigs(root)
1398                .unwrap_or_else(|e| {
1399                    log::warn!("Failed to parse tsconfig.json files: {}", e);
1400                    HashMap::new()
1401                });
1402
1403            // Create dependency index to resolve paths and insert exports
1404            let cache_for_exports = CacheManager::new(root);
1405            let dep_index = DependencyIndex::new(cache_for_exports);
1406
1407            let mut total_exports_inserted = 0;
1408
1409            // Process each file's exports
1410            for (file_path, export_infos) in all_exports {
1411                // Get file ID from database
1412                let file_id = match dep_index.get_file_id_by_path(&file_path)? {
1413                    Some(id) => id,
1414                    None => {
1415                        log::warn!("File not found in database (skipping exports): {}", file_path);
1416                        continue;
1417                    }
1418                };
1419
1420                // Resolve export source paths and insert
1421                for export_info in export_infos {
1422                    // Resolve export source path (same logic as imports)
1423                    let resolved_source_id = if file_path.ends_with(".ts") || file_path.ends_with(".tsx")
1424                            || file_path.ends_with(".js") || file_path.ends_with(".jsx")
1425                            || file_path.ends_with(".mts") || file_path.ends_with(".cts")
1426                            || file_path.ends_with(".mjs") || file_path.ends_with(".cjs")
1427                            || file_path.ends_with(".vue") {
1428                        // Resolve TypeScript/JavaScript/Vue export paths (relative imports and path aliases)
1429                        let alias_map = find_nearest_tsconfig(&file_path, root, &tsconfigs);
1430                        if let Some(candidates_str) = crate::parsers::typescript::resolve_ts_import_to_path(
1431                            &export_info.source_path,
1432                            Some(&file_path),
1433                            alias_map,
1434                        ) {
1435                            // Parse pipe-delimited candidates (e.g., "path.tsx|path.ts|path.jsx|path.js|path.vue")
1436                            let candidates: Vec<&str> = candidates_str.split('|').collect();
1437
1438                            // Try each candidate in order until we find one in the database
1439                            let mut resolved_id = None;
1440                            for candidate_path in candidates {
1441                                // Normalize path to be relative to project root
1442                                let normalized_candidate = if let Ok(rel_path) = std::path::Path::new(candidate_path).strip_prefix(root) {
1443                                    rel_path.to_string_lossy().to_string()
1444                                } else {
1445                                    candidate_path.to_string()
1446                                };
1447
1448                                match dep_index.get_file_id_by_path(&normalized_candidate) {
1449                                    Ok(Some(id)) => {
1450                                        log::trace!("Resolved export source: {} -> {} (file_id={})",
1451                                                   export_info.source_path, normalized_candidate, id);
1452                                        resolved_id = Some(id);
1453                                        break; // Found a match, stop trying
1454                                    }
1455                                    Ok(None) => {
1456                                        log::trace!("Export source candidate not in index: {}", candidate_path);
1457                                    }
1458                                    Err(e) => {
1459                                        log::debug!("Skipping export source resolution for '{}': {}", normalized_candidate, e);
1460                                    }
1461                                }
1462                            }
1463
1464                            if resolved_id.is_none() {
1465                                log::trace!("Export source: no matching file found in database for any candidate: {}",
1466                                           candidates_str);
1467                            }
1468
1469                            resolved_id
1470                        } else {
1471                            log::trace!("Could not resolve export source (non-relative or external): {}", export_info.source_path);
1472                            None
1473                        }
1474                    } else {
1475                        None
1476                    };
1477
1478                    // Insert export into database
1479                    dep_index.insert_export(
1480                        file_id,
1481                        export_info.exported_symbol,
1482                        export_info.source_path,
1483                        resolved_source_id,
1484                        export_info.line_number,
1485                    )?;
1486
1487                    total_exports_inserted += 1;
1488                }
1489            }
1490
1491            log::info!("Extracted {} exports", total_exports_inserted);
1492        }
1493
1494        log::info!("Indexed {} files", files_indexed);
1495
1496        // Step 3: Write trigram index
1497        *progress_status.lock().unwrap() = "Writing trigram index...".to_string();
1498        if show_progress {
1499            pb.set_message("Writing trigram index...".to_string());
1500        }
1501        let trigrams_path = self.cache.path().join("trigrams.bin");
1502        log::info!("Writing trigram index with {} trigrams to trigrams.bin",
1503                   trigram_index.trigram_count());
1504
1505        trigram_index.write(&trigrams_path)
1506            .context("Failed to write trigram index")?;
1507        log::info!("Wrote {} files to trigrams.bin", trigram_index.file_count());
1508
1509        // Step 4: Finalize content store (already been writing incrementally)
1510        *progress_status.lock().unwrap() = "Finalizing content store...".to_string();
1511        if show_progress {
1512            pb.set_message("Finalizing content store...".to_string());
1513        }
1514        content_writer.finalize_if_needed()
1515            .context("Failed to finalize content store")?;
1516        log::info!("Wrote {} files ({} bytes) to content.bin",
1517                   content_writer.file_count(), content_writer.content_size());
1518
1519        // Step 5: Update SQLite statistics from database totals (branch-aware)
1520        *progress_status.lock().unwrap() = "Updating statistics...".to_string();
1521        if show_progress {
1522            pb.set_message("Updating statistics...".to_string());
1523        }
1524        // Update stats for current branch only
1525        self.cache.update_stats(&branch)?;
1526
1527        // Update schema hash to mark cache as compatible with current binary
1528        self.cache.update_schema_hash()?;
1529
1530        pb.finish_with_message("Indexing complete");
1531
1532        // Return stats
1533        let stats = self.cache.stats()?;
1534        log::info!("Indexing complete: {} files",
1535                   stats.total_files);
1536
1537        Ok(stats)
1538    }
1539
1540    /// Discover all indexable files in the directory tree
1541    fn discover_files(&self, root: &Path) -> Result<Vec<PathBuf>> {
1542        let mut files = Vec::new();
1543
1544        // WalkBuilder from ignore crate automatically respects:
1545        // - .gitignore (when in a git repo)
1546        // - .ignore files
1547        // - Hidden files (can be configured)
1548        let walker = WalkBuilder::new(root)
1549            .follow_links(self.config.follow_symlinks)
1550            .git_ignore(true)  // Explicitly enable gitignore support (enabled by default, but be explicit)
1551            .git_global(false) // Don't use global gitignore
1552            .git_exclude(false) // Don't use .git/info/exclude
1553            .build();
1554
1555        for entry in walker {
1556            let entry = entry?;
1557            let path = entry.path();
1558
1559            // Only process files (not directories)
1560            if !entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) {
1561                continue;
1562            }
1563
1564            // Check if should be indexed
1565            if self.should_index(path) {
1566                files.push(path.to_path_buf());
1567            }
1568        }
1569
1570        Ok(files)
1571    }
1572
1573    /// Check if a file should be indexed based on config
1574    fn should_index(&self, path: &Path) -> bool {
1575        // Check file extension for supported languages
1576        let ext = match path.extension() {
1577            Some(ext) => ext.to_string_lossy(),
1578            None => return false,
1579        };
1580
1581        let lang = Language::from_extension(&ext);
1582
1583        // Only index files for languages with parser implementations
1584        if !lang.is_supported() {
1585            if !matches!(lang, Language::Unknown) {
1586                log::debug!("Skipping {} ({:?} parser not yet implemented)",
1587                           path.display(), lang);
1588            }
1589            return false;
1590        }
1591
1592        // If specific languages are configured, only index those
1593        if !self.config.languages.is_empty() && !self.config.languages.contains(&lang) {
1594            log::debug!("Skipping {} ({:?} not in configured languages)", path.display(), lang);
1595            return false;
1596        }
1597
1598        // Check file size limits
1599        if let Ok(metadata) = std::fs::metadata(path) {
1600            if metadata.len() > self.config.max_file_size as u64 {
1601                log::debug!("Skipping {} (too large: {} bytes)",
1602                           path.display(), metadata.len());
1603                return false;
1604            }
1605        }
1606
1607        // TODO: Check include/exclude patterns when glob support is added
1608        // For now, accept all files with supported language extensions
1609
1610        true
1611    }
1612
1613    /// Compute blake3 hash from file contents for change detection
1614    fn hash_content(&self, content: &[u8]) -> String {
1615        let hash = blake3::hash(content);
1616        hash.to_hex().to_string()
1617    }
1618
1619    /// Check available disk space before indexing
1620    ///
1621    /// Ensures there's enough free space to create the index. Warns if disk space is low.
1622    /// This prevents partial index writes and confusing error messages.
1623    fn check_disk_space(&self, root: &Path) -> Result<()> {
1624        // Get available space on the filesystem containing the cache directory
1625        let cache_path = self.cache.path();
1626
1627        // Use statvfs on Unix systems
1628        #[cfg(unix)]
1629        {
1630            // On Linux, we can use statvfs to get available space
1631            // For now, we'll use a simple heuristic: warn if we can't write a test file
1632            let test_file = cache_path.join(".space_check");
1633            match std::fs::write(&test_file, b"test") {
1634                Ok(_) => {
1635                    let _ = std::fs::remove_file(&test_file);
1636
1637                    // Try to estimate available space using df command
1638                    if let Ok(output) = std::process::Command::new("df")
1639                        .arg("-k")
1640                        .arg(cache_path.parent().unwrap_or(root))
1641                        .output()
1642                    {
1643                        if let Ok(df_output) = String::from_utf8(output.stdout) {
1644                            // Parse df output to get available KB
1645                            if let Some(line) = df_output.lines().nth(1) {
1646                                let parts: Vec<&str> = line.split_whitespace().collect();
1647                                if parts.len() >= 4 {
1648                                    if let Ok(available_kb) = parts[3].parse::<u64>() {
1649                                        let available_mb = available_kb / 1024;
1650
1651                                        // Warn if less than 100MB available
1652                                        if available_mb < 100 {
1653                                            log::warn!("Low disk space: only {}MB available. Indexing may fail.", available_mb);
1654                                            output::warn(&format!("Low disk space ({}MB available). Consider freeing up space.", available_mb));
1655                                        } else {
1656                                            log::debug!("Available disk space: {}MB", available_mb);
1657                                        }
1658                                    }
1659                                }
1660                            }
1661                        }
1662                    }
1663
1664                    Ok(())
1665                }
1666                Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
1667                    anyhow::bail!(
1668                        "Permission denied writing to cache directory: {}. Check file permissions.",
1669                        cache_path.display()
1670                    )
1671                }
1672                Err(e) => {
1673                    // If we can't write, it might be a disk space issue
1674                    log::warn!("Failed to write test file (possible disk space issue): {}", e);
1675                    Err(e).context("Failed to verify disk space - indexing may fail due to insufficient space")
1676                }
1677            }
1678        }
1679
1680        #[cfg(not(unix))]
1681        {
1682            // On Windows, try to write a test file
1683            let test_file = cache_path.join(".space_check");
1684            match std::fs::write(&test_file, b"test") {
1685                Ok(_) => {
1686                    let _ = std::fs::remove_file(&test_file);
1687                    Ok(())
1688                }
1689                Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
1690                    anyhow::bail!(
1691                        "Permission denied writing to cache directory: {}. Check file permissions.",
1692                        cache_path.display()
1693                    )
1694                }
1695                Err(e) => {
1696                    log::warn!("Failed to write test file (possible disk space issue): {}", e);
1697                    Err(e).context("Failed to verify disk space - indexing may fail due to insufficient space")
1698                }
1699            }
1700        }
1701    }
1702}
1703
1704#[cfg(test)]
1705mod tests {
1706    use super::*;
1707    use tempfile::TempDir;
1708    use std::fs;
1709
1710    #[test]
1711    fn test_indexer_creation() {
1712        let temp = TempDir::new().unwrap();
1713        let cache = CacheManager::new(temp.path());
1714        let config = IndexConfig::default();
1715        let indexer = Indexer::new(cache, config);
1716
1717        assert!(indexer.cache.path().ends_with(".reflex"));
1718    }
1719
1720    #[test]
1721    fn test_hash_content() {
1722        let temp = TempDir::new().unwrap();
1723        let cache = CacheManager::new(temp.path());
1724        let config = IndexConfig::default();
1725        let indexer = Indexer::new(cache, config);
1726
1727        let content1 = b"hello world";
1728        let content2 = b"hello world";
1729        let content3 = b"different content";
1730
1731        let hash1 = indexer.hash_content(content1);
1732        let hash2 = indexer.hash_content(content2);
1733        let hash3 = indexer.hash_content(content3);
1734
1735        // Same content should produce same hash
1736        assert_eq!(hash1, hash2);
1737
1738        // Different content should produce different hash
1739        assert_ne!(hash1, hash3);
1740
1741        // Hash should be hex string
1742        assert_eq!(hash1.len(), 64); // blake3 hash is 32 bytes = 64 hex chars
1743    }
1744
1745    #[test]
1746    fn test_should_index_rust_file() {
1747        let temp = TempDir::new().unwrap();
1748        let cache = CacheManager::new(temp.path());
1749        let config = IndexConfig::default();
1750        let indexer = Indexer::new(cache, config);
1751
1752        // Create a small Rust file
1753        let rust_file = temp.path().join("test.rs");
1754        fs::write(&rust_file, "fn main() {}").unwrap();
1755
1756        assert!(indexer.should_index(&rust_file));
1757    }
1758
1759    #[test]
1760    fn test_should_index_unsupported_extension() {
1761        let temp = TempDir::new().unwrap();
1762        let cache = CacheManager::new(temp.path());
1763        let config = IndexConfig::default();
1764        let indexer = Indexer::new(cache, config);
1765
1766        let unsupported_file = temp.path().join("test.txt");
1767        fs::write(&unsupported_file, "plain text").unwrap();
1768
1769        assert!(!indexer.should_index(&unsupported_file));
1770    }
1771
1772    #[test]
1773    fn test_should_index_no_extension() {
1774        let temp = TempDir::new().unwrap();
1775        let cache = CacheManager::new(temp.path());
1776        let config = IndexConfig::default();
1777        let indexer = Indexer::new(cache, config);
1778
1779        let no_ext_file = temp.path().join("Makefile");
1780        fs::write(&no_ext_file, "all:\n\techo hello").unwrap();
1781
1782        assert!(!indexer.should_index(&no_ext_file));
1783    }
1784
1785    #[test]
1786    fn test_should_index_size_limit() {
1787        let temp = TempDir::new().unwrap();
1788        let cache = CacheManager::new(temp.path());
1789
1790        // Config with 100 byte size limit
1791        let mut config = IndexConfig::default();
1792        config.max_file_size = 100;
1793
1794        let indexer = Indexer::new(cache, config);
1795
1796        // Create small file (should be indexed)
1797        let small_file = temp.path().join("small.rs");
1798        fs::write(&small_file, "fn main() {}").unwrap();
1799        assert!(indexer.should_index(&small_file));
1800
1801        // Create large file (should be skipped)
1802        let large_file = temp.path().join("large.rs");
1803        let large_content = "a".repeat(150);
1804        fs::write(&large_file, large_content).unwrap();
1805        assert!(!indexer.should_index(&large_file));
1806    }
1807
1808    #[test]
1809    fn test_discover_files_empty_dir() {
1810        let temp = TempDir::new().unwrap();
1811        let cache = CacheManager::new(temp.path());
1812        let config = IndexConfig::default();
1813        let indexer = Indexer::new(cache, config);
1814
1815        let files = indexer.discover_files(temp.path()).unwrap();
1816        assert_eq!(files.len(), 0);
1817    }
1818
1819    #[test]
1820    fn test_discover_files_single_file() {
1821        let temp = TempDir::new().unwrap();
1822        let cache = CacheManager::new(temp.path());
1823        let config = IndexConfig::default();
1824        let indexer = Indexer::new(cache, config);
1825
1826        // Create a Rust file
1827        let rust_file = temp.path().join("main.rs");
1828        fs::write(&rust_file, "fn main() {}").unwrap();
1829
1830        let files = indexer.discover_files(temp.path()).unwrap();
1831        assert_eq!(files.len(), 1);
1832        assert!(files[0].ends_with("main.rs"));
1833    }
1834
1835    #[test]
1836    fn test_discover_files_multiple_languages() {
1837        let temp = TempDir::new().unwrap();
1838        let cache = CacheManager::new(temp.path());
1839        let config = IndexConfig::default();
1840        let indexer = Indexer::new(cache, config);
1841
1842        // Create files of different languages
1843        fs::write(temp.path().join("main.rs"), "fn main() {}").unwrap();
1844        fs::write(temp.path().join("script.py"), "print('hello')").unwrap();
1845        fs::write(temp.path().join("app.js"), "console.log('hi')").unwrap();
1846        fs::write(temp.path().join("README.md"), "# Project").unwrap(); // Should be skipped
1847
1848        let files = indexer.discover_files(temp.path()).unwrap();
1849        assert_eq!(files.len(), 3); // Only supported languages
1850    }
1851
1852    #[test]
1853    fn test_discover_files_subdirectories() {
1854        let temp = TempDir::new().unwrap();
1855        let cache = CacheManager::new(temp.path());
1856        let config = IndexConfig::default();
1857        let indexer = Indexer::new(cache, config);
1858
1859        // Create nested directory structure
1860        let src_dir = temp.path().join("src");
1861        fs::create_dir(&src_dir).unwrap();
1862        fs::write(src_dir.join("main.rs"), "fn main() {}").unwrap();
1863        fs::write(src_dir.join("lib.rs"), "pub mod test {}").unwrap();
1864
1865        let tests_dir = temp.path().join("tests");
1866        fs::create_dir(&tests_dir).unwrap();
1867        fs::write(tests_dir.join("test.rs"), "#[test] fn test() {}").unwrap();
1868
1869        let files = indexer.discover_files(temp.path()).unwrap();
1870        assert_eq!(files.len(), 3);
1871    }
1872
1873    #[test]
1874    fn test_discover_files_respects_gitignore() {
1875        let temp = TempDir::new().unwrap();
1876
1877        // Initialize git repo (required for .gitignore to work with WalkBuilder)
1878        std::process::Command::new("git")
1879            .arg("init")
1880            .current_dir(temp.path())
1881            .output()
1882            .expect("Failed to initialize git repo");
1883
1884        let cache = CacheManager::new(temp.path());
1885        let config = IndexConfig::default();
1886        let indexer = Indexer::new(cache, config);
1887
1888        // Create .gitignore - use "ignored/" pattern to ignore the directory
1889        // Note: WalkBuilder respects .gitignore ONLY in git repositories
1890        fs::write(temp.path().join(".gitignore"), "ignored/\n").unwrap();
1891
1892        // Create files
1893        fs::write(temp.path().join("included.rs"), "fn main() {}").unwrap();
1894        fs::write(temp.path().join("also_included.py"), "print('hi')").unwrap();
1895
1896        let ignored_dir = temp.path().join("ignored");
1897        fs::create_dir(&ignored_dir).unwrap();
1898        fs::write(ignored_dir.join("excluded.rs"), "fn test() {}").unwrap();
1899
1900        let files = indexer.discover_files(temp.path()).unwrap();
1901
1902        // Verify the expected files are found
1903        assert!(files.iter().any(|f| f.ends_with("included.rs")), "Should find included.rs");
1904        assert!(files.iter().any(|f| f.ends_with("also_included.py")), "Should find also_included.py");
1905
1906        // Verify excluded.rs in ignored/ directory is NOT found
1907        // This is the key test - gitignore should filter it out
1908        assert!(!files.iter().any(|f| {
1909            let path_str = f.to_string_lossy();
1910            path_str.contains("ignored") && f.ends_with("excluded.rs")
1911        }), "Should NOT find excluded.rs in ignored/ directory (gitignore pattern)");
1912
1913        // Should find exactly 2 files (included.rs and also_included.py)
1914        // .gitignore file itself has no supported language extension, so it won't be indexed
1915        assert_eq!(files.len(), 2, "Should find exactly 2 files (not including .gitignore or ignored/excluded.rs)");
1916    }
1917
1918    #[test]
1919    fn test_index_empty_directory() {
1920        let temp = TempDir::new().unwrap();
1921        let cache = CacheManager::new(temp.path());
1922        let config = IndexConfig::default();
1923        let indexer = Indexer::new(cache, config);
1924
1925        let stats = indexer.index(temp.path(), false).unwrap();
1926
1927        assert_eq!(stats.total_files, 0);
1928    }
1929
1930    #[test]
1931    fn test_index_single_rust_file() {
1932        let temp = TempDir::new().unwrap();
1933        let project_root = temp.path().join("project");
1934        fs::create_dir(&project_root).unwrap();
1935
1936        let cache = CacheManager::new(&project_root);
1937        let config = IndexConfig::default();
1938        let indexer = Indexer::new(cache, config);
1939
1940        // Create a Rust file
1941        fs::write(
1942            project_root.join("main.rs"),
1943            "fn main() { println!(\"Hello\"); }"
1944        ).unwrap();
1945
1946        let stats = indexer.index(&project_root, false).unwrap();
1947
1948        assert_eq!(stats.total_files, 1);
1949        assert!(stats.files_by_language.get("Rust").is_some());
1950    }
1951
1952    #[test]
1953    fn test_index_multiple_files() {
1954        let temp = TempDir::new().unwrap();
1955        let project_root = temp.path().join("project");
1956        fs::create_dir(&project_root).unwrap();
1957
1958        let cache = CacheManager::new(&project_root);
1959        let config = IndexConfig::default();
1960        let indexer = Indexer::new(cache, config);
1961
1962        // Create multiple files
1963        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1964        fs::write(project_root.join("lib.rs"), "pub fn test() {}").unwrap();
1965        fs::write(project_root.join("script.py"), "def main(): pass").unwrap();
1966
1967        let stats = indexer.index(&project_root, false).unwrap();
1968
1969        assert_eq!(stats.total_files, 3);
1970        assert_eq!(stats.files_by_language.get("Rust"), Some(&2));
1971        assert_eq!(stats.files_by_language.get("Python"), Some(&1));
1972    }
1973
1974    #[test]
1975    fn test_index_creates_trigram_index() {
1976        let temp = TempDir::new().unwrap();
1977        let project_root = temp.path().join("project");
1978        fs::create_dir(&project_root).unwrap();
1979
1980        let cache = CacheManager::new(&project_root);
1981        let config = IndexConfig::default();
1982        let indexer = Indexer::new(cache, config);
1983
1984        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
1985
1986        indexer.index(&project_root, false).unwrap();
1987
1988        // Verify trigrams.bin was created
1989        let trigrams_path = project_root.join(".reflex/trigrams.bin");
1990        assert!(trigrams_path.exists());
1991    }
1992
1993    #[test]
1994    fn test_index_creates_content_store() {
1995        let temp = TempDir::new().unwrap();
1996        let project_root = temp.path().join("project");
1997        fs::create_dir(&project_root).unwrap();
1998
1999        let cache = CacheManager::new(&project_root);
2000        let config = IndexConfig::default();
2001        let indexer = Indexer::new(cache, config);
2002
2003        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2004
2005        indexer.index(&project_root, false).unwrap();
2006
2007        // Verify content.bin was created
2008        let content_path = project_root.join(".reflex/content.bin");
2009        assert!(content_path.exists());
2010    }
2011
2012    #[test]
2013    fn test_index_incremental_no_changes() {
2014        let temp = TempDir::new().unwrap();
2015        let project_root = temp.path().join("project");
2016        fs::create_dir(&project_root).unwrap();
2017
2018        let cache = CacheManager::new(&project_root);
2019        let config = IndexConfig::default();
2020        let indexer = Indexer::new(cache, config);
2021
2022        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2023
2024        // First index
2025        let stats1 = indexer.index(&project_root, false).unwrap();
2026        assert_eq!(stats1.total_files, 1);
2027
2028        // Second index without changes
2029        let stats2 = indexer.index(&project_root, false).unwrap();
2030        assert_eq!(stats2.total_files, 1);
2031    }
2032
2033    #[test]
2034    fn test_index_incremental_with_changes() {
2035        let temp = TempDir::new().unwrap();
2036        let project_root = temp.path().join("project");
2037        fs::create_dir(&project_root).unwrap();
2038
2039        let cache = CacheManager::new(&project_root);
2040        let config = IndexConfig::default();
2041        let indexer = Indexer::new(cache, config);
2042
2043        let main_path = project_root.join("main.rs");
2044        fs::write(&main_path, "fn main() {}").unwrap();
2045
2046        // First index
2047        indexer.index(&project_root, false).unwrap();
2048
2049        // Modify file
2050        fs::write(&main_path, "fn main() { println!(\"changed\"); }").unwrap();
2051
2052        // Second index should detect change
2053        let stats = indexer.index(&project_root, false).unwrap();
2054        assert_eq!(stats.total_files, 1);
2055    }
2056
2057    #[test]
2058    fn test_index_incremental_new_file() {
2059        let temp = TempDir::new().unwrap();
2060        let project_root = temp.path().join("project");
2061        fs::create_dir(&project_root).unwrap();
2062
2063        let cache = CacheManager::new(&project_root);
2064        let config = IndexConfig::default();
2065        let indexer = Indexer::new(cache, config);
2066
2067        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2068
2069        // First index
2070        let stats1 = indexer.index(&project_root, false).unwrap();
2071        assert_eq!(stats1.total_files, 1);
2072
2073        // Add new file
2074        fs::write(project_root.join("lib.rs"), "pub fn test() {}").unwrap();
2075
2076        // Second index should include new file
2077        let stats2 = indexer.index(&project_root, false).unwrap();
2078        assert_eq!(stats2.total_files, 2);
2079    }
2080
2081    #[test]
2082    fn test_index_parallel_threads_config() {
2083        let temp = TempDir::new().unwrap();
2084        let project_root = temp.path().join("project");
2085        fs::create_dir(&project_root).unwrap();
2086
2087        let cache = CacheManager::new(&project_root);
2088
2089        // Test with explicit thread count
2090        let mut config = IndexConfig::default();
2091        config.parallel_threads = 2;
2092
2093        let indexer = Indexer::new(cache, config);
2094
2095        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2096
2097        let stats = indexer.index(&project_root, false).unwrap();
2098        assert_eq!(stats.total_files, 1);
2099    }
2100
2101    #[test]
2102    fn test_index_parallel_threads_auto() {
2103        let temp = TempDir::new().unwrap();
2104        let project_root = temp.path().join("project");
2105        fs::create_dir(&project_root).unwrap();
2106
2107        let cache = CacheManager::new(&project_root);
2108
2109        // Test with auto thread count (0 = auto)
2110        let mut config = IndexConfig::default();
2111        config.parallel_threads = 0;
2112
2113        let indexer = Indexer::new(cache, config);
2114
2115        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2116
2117        let stats = indexer.index(&project_root, false).unwrap();
2118        assert_eq!(stats.total_files, 1);
2119    }
2120
2121    #[test]
2122    fn test_index_respects_size_limit() {
2123        let temp = TempDir::new().unwrap();
2124        let project_root = temp.path().join("project");
2125        fs::create_dir(&project_root).unwrap();
2126
2127        let cache = CacheManager::new(&project_root);
2128
2129        // Very small size limit
2130        let mut config = IndexConfig::default();
2131        config.max_file_size = 50;
2132
2133        let indexer = Indexer::new(cache, config);
2134
2135        // Small file (should be indexed)
2136        fs::write(project_root.join("small.rs"), "fn a() {}").unwrap();
2137
2138        // Large file (should be skipped)
2139        let large_content = "fn main() {}\n".repeat(10);
2140        fs::write(project_root.join("large.rs"), large_content).unwrap();
2141
2142        let stats = indexer.index(&project_root, false).unwrap();
2143
2144        // Only small file should be indexed
2145        assert_eq!(stats.total_files, 1);
2146    }
2147
2148    #[test]
2149    fn test_index_mixed_languages() {
2150        let temp = TempDir::new().unwrap();
2151        let project_root = temp.path().join("project");
2152        fs::create_dir(&project_root).unwrap();
2153
2154        let cache = CacheManager::new(&project_root);
2155        let config = IndexConfig::default();
2156        let indexer = Indexer::new(cache, config);
2157
2158        // Create files in multiple languages
2159        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2160        fs::write(project_root.join("test.py"), "def test(): pass").unwrap();
2161        fs::write(project_root.join("app.js"), "function main() {}").unwrap();
2162        fs::write(project_root.join("lib.go"), "func main() {}").unwrap();
2163
2164        let stats = indexer.index(&project_root, false).unwrap();
2165
2166        assert_eq!(stats.total_files, 4);
2167        assert!(stats.files_by_language.contains_key("Rust"));
2168        assert!(stats.files_by_language.contains_key("Python"));
2169        assert!(stats.files_by_language.contains_key("JavaScript"));
2170        assert!(stats.files_by_language.contains_key("Go"));
2171    }
2172
2173    #[test]
2174    fn test_index_updates_cache_stats() {
2175        let temp = TempDir::new().unwrap();
2176        let project_root = temp.path().join("project");
2177        fs::create_dir(&project_root).unwrap();
2178
2179        let cache = CacheManager::new(&project_root);
2180        let config = IndexConfig::default();
2181        let indexer = Indexer::new(cache, config);
2182
2183        fs::write(project_root.join("main.rs"), "fn main() {}").unwrap();
2184
2185        indexer.index(&project_root, false).unwrap();
2186
2187        // Verify cache stats were updated
2188        let cache = CacheManager::new(&project_root);
2189        let stats = cache.stats().unwrap();
2190
2191        assert_eq!(stats.total_files, 1);
2192        assert!(stats.index_size_bytes > 0);
2193    }
2194}