reflex/
query.rs

1//! Query engine for searching indexed code
2//!
3//! The query engine loads the memory-mapped cache and executes
4//! deterministic searches based on lexical, structural, or symbol patterns.
5
6use anyhow::{Context, Result};
7use regex::Regex;
8
9use crate::cache::CacheManager;
10use crate::content_store::ContentReader;
11use crate::models::{
12    IndexStatus, IndexWarning, IndexWarningDetails, Language, QueryResponse, SearchResult, Span,
13    SymbolKind,
14};
15use crate::parsers::ParserFactory;
16use crate::regex_trigrams::extract_trigrams_from_regex;
17use crate::trigram::TrigramIndex;
18
19/// Query filter options
20#[derive(Debug, Clone)]
21pub struct QueryFilter {
22    /// Language filter (None = all languages)
23    pub language: Option<Language>,
24    /// Symbol kind filter (None = all kinds)
25    pub kind: Option<SymbolKind>,
26    /// Use AST pattern matching (vs lexical search)
27    pub use_ast: bool,
28    /// Use regex pattern matching
29    pub use_regex: bool,
30    /// Maximum number of results
31    pub limit: Option<usize>,
32    /// Search symbol definitions only (vs full-text)
33    pub symbols_mode: bool,
34    /// Show full symbol body (from span.start_line to span.end_line)
35    pub expand: bool,
36    /// File path filter (substring match)
37    pub file_pattern: Option<String>,
38    /// Exact symbol name match (no substring matching)
39    pub exact: bool,
40    /// Use substring matching instead of word-boundary matching (opt-in, expansive)
41    pub use_contains: bool,
42    /// Query timeout in seconds (0 = no timeout)
43    pub timeout_secs: u64,
44    /// Glob patterns to include (empty = all files)
45    pub glob_patterns: Vec<String>,
46    /// Glob patterns to exclude (applied after includes)
47    pub exclude_patterns: Vec<String>,
48    /// Return only unique file paths (deduplicated)
49    pub paths_only: bool,
50}
51
52impl Default for QueryFilter {
53    fn default() -> Self {
54        Self {
55            language: None,
56            kind: None,
57            use_ast: false,
58            use_regex: false,
59            limit: None,
60            symbols_mode: false,
61            expand: false,
62            file_pattern: None,
63            exact: false,
64            use_contains: false,  // Default: word-boundary matching
65            timeout_secs: 30, // 30 seconds default timeout
66            glob_patterns: Vec::new(),
67            exclude_patterns: Vec::new(),
68            paths_only: false,
69        }
70    }
71}
72
73/// Manages query execution against the index
74pub struct QueryEngine {
75    cache: CacheManager,
76}
77
78impl QueryEngine {
79    /// Create a new query engine with the given cache manager
80    pub fn new(cache: CacheManager) -> Self {
81        Self { cache }
82    }
83
84    /// Execute a query and return matching results with index metadata
85    ///
86    /// This is the preferred method for programmatic/JSON output as it includes
87    /// index freshness information that AI agents can use to decide whether to re-index.
88    pub fn search_with_metadata(&self, pattern: &str, filter: QueryFilter) -> Result<QueryResponse> {
89        log::info!("Executing query with metadata: pattern='{}', filter={:?}", pattern, filter);
90
91        // Ensure cache exists
92        if !self.cache.exists() {
93            anyhow::bail!(
94                "Index not found. Run 'rfx index' to build the cache first."
95            );
96        }
97
98        // Validate cache integrity
99        if let Err(e) = self.cache.validate() {
100            anyhow::bail!(
101                "Cache appears to be corrupted: {}. Run 'rfx clear' followed by 'rfx index' to rebuild.",
102                e
103            );
104        }
105
106        // Get index status and warning (without printing warnings to stderr)
107        let (status, can_trust_results, warning) = self.get_index_status()?;
108
109        // Execute the search
110        let results = self.search_internal(pattern, filter)?;
111
112        Ok(QueryResponse {
113            status,
114            can_trust_results,
115            warning,
116            results,
117        })
118    }
119
120    /// Execute a query and return matching results (legacy method)
121    ///
122    /// This method prints warnings to stderr and returns just the results.
123    /// For programmatic use, prefer `search_with_metadata()`.
124    pub fn search(&self, pattern: &str, filter: QueryFilter) -> Result<Vec<SearchResult>> {
125        log::info!("Executing query: pattern='{}', filter={:?}", pattern, filter);
126
127        // Ensure cache exists
128        if !self.cache.exists() {
129            anyhow::bail!(
130                "Index not found. Run 'rfx index' to build the cache first."
131            );
132        }
133
134        // Validate cache integrity
135        if let Err(e) = self.cache.validate() {
136            anyhow::bail!(
137                "Cache appears to be corrupted: {}. Run 'rfx clear' followed by 'rfx index' to rebuild.",
138                e
139            );
140        }
141
142        // Show non-blocking warnings about branch state and staleness
143        self.check_index_freshness()?;
144
145        // Execute the search
146        self.search_internal(pattern, filter)
147    }
148
149    /// Internal search implementation (used by both search methods)
150    fn search_internal(&self, pattern: &str, filter: QueryFilter) -> Result<Vec<SearchResult>> {
151        use std::time::{Duration, Instant};
152
153        // Start timeout timer if configured
154        let start_time = Instant::now();
155        let timeout = if filter.timeout_secs > 0 {
156            Some(Duration::from_secs(filter.timeout_secs))
157        } else {
158            None
159        };
160
161        // PHASE 1: Get initial candidates (choose search strategy)
162        let mut results = if filter.use_regex {
163            // Regex pattern search with trigram optimization
164            self.get_regex_candidates(pattern, timeout.as_ref(), &start_time)?
165        } else {
166            // Standard trigram-based full-text search
167            self.get_trigram_candidates(pattern, &filter)?
168        };
169
170        // Check timeout after Phase 1
171        if let Some(timeout_duration) = timeout {
172            if start_time.elapsed() > timeout_duration {
173                anyhow::bail!(
174                    "Query timeout exceeded ({} seconds).\n\
175                     \n\
176                     The query took too long to complete. Try one of these approaches:\n\
177                     • Use a more specific search pattern (longer patterns = faster search)\n\
178                     • Add a language filter with --lang to narrow the search space\n\
179                     • Add a file filter with --file to search specific directories\n\
180                     • Increase the timeout with --timeout <seconds>\n\
181                     \n\
182                     Example: rfx query \"{}\" --lang rust --timeout 60",
183                    filter.timeout_secs,
184                    pattern
185                );
186            }
187        }
188
189        // PHASE 2: Enrich with symbol information or AST pattern matching (if needed)
190        if filter.use_ast {
191            // AST pattern matching: Execute Tree-sitter query on candidate files
192            results = self.enrich_with_ast(results, pattern, filter.language)?;
193        } else if filter.symbols_mode || filter.kind.is_some() {
194            // Symbol enrichment: Parse candidate files and extract symbol definitions
195            results = self.enrich_with_symbols(results, pattern, &filter)?;
196        }
197
198        // PHASE 3: Apply filters
199        if let Some(lang) = filter.language {
200            results.retain(|r| r.lang == lang);
201        }
202
203        // Apply kind filter (only relevant for symbol searches)
204        // Special case: --kind function also includes methods (methods are functions in classes)
205        if let Some(ref kind) = filter.kind {
206            results.retain(|r| {
207                if matches!(kind, SymbolKind::Function) {
208                    // When searching for functions, also include methods
209                    matches!(r.kind, SymbolKind::Function | SymbolKind::Method)
210                } else {
211                    r.kind == *kind
212                }
213            });
214        }
215
216        // Apply file path filter (substring match)
217        if let Some(ref file_pattern) = filter.file_pattern {
218            results.retain(|r| r.path.contains(file_pattern));
219        }
220
221        // Apply glob pattern filters
222        if !filter.glob_patterns.is_empty() || !filter.exclude_patterns.is_empty() {
223            use globset::{Glob, GlobSetBuilder};
224
225            // Build include matcher (if patterns specified)
226            let include_matcher = if !filter.glob_patterns.is_empty() {
227                let mut builder = GlobSetBuilder::new();
228                for pattern in &filter.glob_patterns {
229                    match Glob::new(pattern) {
230                        Ok(glob) => {
231                            builder.add(glob);
232                        }
233                        Err(e) => {
234                            log::warn!("Invalid glob pattern '{}': {}", pattern, e);
235                        }
236                    }
237                }
238                match builder.build() {
239                    Ok(matcher) => Some(matcher),
240                    Err(e) => {
241                        log::warn!("Failed to build glob matcher: {}", e);
242                        None
243                    }
244                }
245            } else {
246                None
247            };
248
249            // Build exclude matcher (if patterns specified)
250            let exclude_matcher = if !filter.exclude_patterns.is_empty() {
251                let mut builder = GlobSetBuilder::new();
252                for pattern in &filter.exclude_patterns {
253                    match Glob::new(pattern) {
254                        Ok(glob) => {
255                            builder.add(glob);
256                        }
257                        Err(e) => {
258                            log::warn!("Invalid exclude pattern '{}': {}", pattern, e);
259                        }
260                    }
261                }
262                match builder.build() {
263                    Ok(matcher) => Some(matcher),
264                    Err(e) => {
265                        log::warn!("Failed to build exclude matcher: {}", e);
266                        None
267                    }
268                }
269            } else {
270                None
271            };
272
273            // Apply filters
274            results.retain(|r| {
275                // If include patterns specified, path must match at least one
276                let included = if let Some(ref matcher) = include_matcher {
277                    matcher.is_match(&r.path)
278                } else {
279                    true // No include patterns = include all
280                };
281
282                // If exclude patterns specified, path must NOT match any
283                let excluded = if let Some(ref matcher) = exclude_matcher {
284                    matcher.is_match(&r.path)
285                } else {
286                    false // No exclude patterns = exclude none
287                };
288
289                included && !excluded
290            });
291        }
292
293        // Apply exact name filter (only for symbol searches)
294        if filter.exact && filter.symbols_mode {
295            results.retain(|r| r.symbol.as_deref() == Some(pattern));
296        }
297
298        // Expand symbol bodies if requested
299        // Works for both symbol-mode and regex searches (if regex matched a symbol definition)
300        if filter.expand {
301            // Load content store to fetch full symbol bodies
302            let content_path = self.cache.path().join("content.bin");
303            if let Ok(content_reader) = ContentReader::open(&content_path) {
304                for result in &mut results {
305                    // Only expand if the result has a meaningful span (not just a single line)
306                    if result.span.start_line < result.span.end_line {
307                        // Find the file_id for this result's path
308                        if let Some(file_id) = Self::find_file_id(&content_reader, &result.path) {
309                            // Fetch the full span content
310                            if let Ok(content) = content_reader.get_file_content(file_id) {
311                                let lines: Vec<&str> = content.lines().collect();
312                                let start_idx = (result.span.start_line as usize).saturating_sub(1);
313                                let end_idx = (result.span.end_line as usize).min(lines.len());
314
315                                if start_idx < end_idx {
316                                    let full_body = lines[start_idx..end_idx].join("\n");
317                                    result.preview = full_body;
318                                }
319                            }
320                        }
321                    }
322                }
323            }
324        }
325
326        // Step 4: Deduplicate by path if paths-only mode
327        if filter.paths_only {
328            use std::collections::HashSet;
329            let mut seen_paths = HashSet::new();
330            results.retain(|r| seen_paths.insert(r.path.clone()));
331        }
332
333        // Step 5: Sort results deterministically (by path, then line number)
334        results.sort_by(|a, b| {
335            a.path.cmp(&b.path)
336                .then_with(|| a.span.start_line.cmp(&b.span.start_line))
337        });
338
339        // Step 6: Apply limit
340        if let Some(limit) = filter.limit {
341            results.truncate(limit);
342        }
343
344        log::info!("Query returned {} results", results.len());
345
346        Ok(results)
347    }
348
349    /// Search for symbols by exact name match
350    pub fn find_symbol(&self, name: &str) -> Result<Vec<SearchResult>> {
351        let filter = QueryFilter {
352            symbols_mode: true,
353            ..Default::default()
354        };
355        self.search(name, filter)
356    }
357
358    /// Search using a Tree-sitter AST pattern
359    pub fn search_ast(&self, pattern: &str, lang: Option<Language>) -> Result<Vec<SearchResult>> {
360        let filter = QueryFilter {
361            language: lang,
362            use_ast: true,
363            ..Default::default()
364        };
365
366        self.search(pattern, filter)
367    }
368
369    /// Execute AST query on all indexed files (no trigram filtering)
370    ///
371    /// WARNING: This method scans the entire codebase (500ms-2s+).
372    /// In 95% of cases, use --symbols instead which is 10-100x faster.
373    ///
374    /// # Algorithm
375    /// 1. Get all indexed files for the specified language
376    /// 2. Apply glob/exclude filters to reduce file set
377    /// 3. Load file contents for all matching files
378    /// 4. Execute AST query pattern using Tree-sitter
379    /// 5. Apply remaining filters and return results
380    ///
381    /// # Performance
382    /// - Parses entire codebase (not just trigram candidates)
383    /// - Expected: 500ms-2s for medium codebases, 2-10s for large codebases
384    /// - Use --glob to limit scope for better performance
385    ///
386    /// # Requirements
387    /// - Language must be specified (AST queries are language-specific)
388    /// - AST pattern must be valid S-expression syntax
389    pub fn search_ast_all_files(&self, ast_pattern: &str, filter: QueryFilter) -> Result<Vec<SearchResult>> {
390        log::info!("Executing AST query on all files: pattern='{}', filter={:?}", ast_pattern, filter);
391
392        // Require language for AST queries
393        let lang = filter.language.ok_or_else(|| anyhow::anyhow!(
394            "Language must be specified for AST pattern matching. Use --lang to specify the language.\n\
395             \n\
396             Example: rfx query \"(function_definition) @fn\" --ast --lang python"
397        ))?;
398
399        // Ensure cache exists
400        if !self.cache.exists() {
401            anyhow::bail!(
402                "Index not found. Run 'rfx index' to build the cache first."
403            );
404        }
405
406        // Show non-blocking warnings about branch state and staleness
407        self.check_index_freshness()?;
408
409        // Load content store
410        let content_path = self.cache.path().join("content.bin");
411        let content_reader = ContentReader::open(&content_path)
412            .context("Failed to open content store")?;
413
414        // Build glob matchers ONCE before file iteration (performance optimization)
415        use globset::{Glob, GlobSetBuilder};
416
417        let include_matcher = if !filter.glob_patterns.is_empty() {
418            let mut builder = GlobSetBuilder::new();
419            for pattern in &filter.glob_patterns {
420                if let Ok(glob) = Glob::new(pattern) {
421                    builder.add(glob);
422                }
423            }
424            builder.build().ok()
425        } else {
426            None
427        };
428
429        let exclude_matcher = if !filter.exclude_patterns.is_empty() {
430            let mut builder = GlobSetBuilder::new();
431            for pattern in &filter.exclude_patterns {
432                if let Ok(glob) = Glob::new(pattern) {
433                    builder.add(glob);
434                }
435            }
436            builder.build().ok()
437        } else {
438            None
439        };
440
441        // Get all files matching the language and glob filters
442        let mut candidates: Vec<SearchResult> = Vec::new();
443
444        for file_id in 0..content_reader.file_count() {
445            let file_path = match content_reader.get_file_path(file_id as u32) {
446                Some(p) => p,
447                None => continue,
448            };
449
450            // Detect language from file extension
451            let ext = file_path.extension()
452                .and_then(|e| e.to_str())
453                .unwrap_or("");
454            let detected_lang = Language::from_extension(ext);
455
456            // Filter by language
457            if detected_lang != lang {
458                continue;
459            }
460
461            let file_path_str = file_path.to_string_lossy().to_string();
462
463            // Apply glob/exclude filters BEFORE loading content (performance optimization)
464            let included = include_matcher.as_ref().map_or(true, |m| m.is_match(&file_path_str));
465            let excluded = exclude_matcher.as_ref().map_or(false, |m| m.is_match(&file_path_str));
466
467            if !included || excluded {
468                continue;
469            }
470
471            // Create a dummy candidate for this file (AST query will replace it)
472            candidates.push(SearchResult {
473                path: file_path_str,
474                lang: detected_lang,
475                span: Span { start_line: 1, start_col: 1, end_line: 1, end_col: 1 },
476                symbol: None,
477                kind: SymbolKind::Unknown("ast_query".to_string()),
478                scope: None,
479                preview: String::new(),
480            });
481        }
482
483        log::info!("AST query scanning {} files for language {:?}", candidates.len(), lang);
484
485        if candidates.is_empty() {
486            log::warn!("No files found for language {:?}. Check your language filter or glob patterns.", lang);
487            return Ok(Vec::new());
488        }
489
490        // Execute the AST query on all candidate files
491        // This will load file contents and parse them with tree-sitter
492        let mut results = self.enrich_with_ast(candidates, ast_pattern, filter.language)?;
493
494        log::debug!("AST query found {} matches before filtering", results.len());
495
496        // Apply remaining filters (same as search_internal Phase 3)
497
498        // Apply kind filter
499        if let Some(ref kind) = filter.kind {
500            results.retain(|r| {
501                if matches!(kind, SymbolKind::Function) {
502                    matches!(r.kind, SymbolKind::Function | SymbolKind::Method)
503                } else {
504                    r.kind == *kind
505                }
506            });
507        }
508
509        // Note: exact filter doesn't make sense for AST queries (pattern is S-expression, not symbol name)
510
511        // Expand symbol bodies if requested
512        if filter.expand {
513            let content_path = self.cache.path().join("content.bin");
514            if let Ok(content_reader) = ContentReader::open(&content_path) {
515                for result in &mut results {
516                    if result.span.start_line < result.span.end_line {
517                        if let Some(file_id) = Self::find_file_id(&content_reader, &result.path) {
518                            if let Ok(content) = content_reader.get_file_content(file_id) {
519                                let lines: Vec<&str> = content.lines().collect();
520                                let start_idx = (result.span.start_line as usize).saturating_sub(1);
521                                let end_idx = (result.span.end_line as usize).min(lines.len());
522
523                                if start_idx < end_idx {
524                                    let full_body = lines[start_idx..end_idx].join("\n");
525                                    result.preview = full_body;
526                                }
527                            }
528                        }
529                    }
530                }
531            }
532        }
533
534        // Deduplicate by path if paths-only mode
535        if filter.paths_only {
536            use std::collections::HashSet;
537            let mut seen_paths = HashSet::new();
538            results.retain(|r| seen_paths.insert(r.path.clone()));
539        }
540
541        // Sort results deterministically
542        results.sort_by(|a, b| {
543            a.path.cmp(&b.path)
544                .then_with(|| a.span.start_line.cmp(&b.span.start_line))
545        });
546
547        // Apply limit
548        if let Some(limit) = filter.limit {
549            results.truncate(limit);
550        }
551
552        log::info!("AST query returned {} results", results.len());
553
554        Ok(results)
555    }
556
557    /// Search using AST pattern with separate text pattern for trigram filtering
558    ///
559    /// This allows efficient AST queries by:
560    /// 1. Using text_pattern for Phase 1 trigram filtering (narrows to candidate files)
561    /// 2. Using ast_pattern for Phase 2 AST matching (structure-aware filtering)
562    ///
563    /// # Example
564    /// ```ignore
565    /// // Find async functions: trigram search for "fn ", AST match for function_item
566    /// engine.search_ast_with_text_filter("fn ", "(function_item (async))", filter)?;
567    /// ```
568    pub fn search_ast_with_text_filter(
569        &self,
570        text_pattern: &str,
571        ast_pattern: &str,
572        filter: QueryFilter,
573    ) -> Result<Vec<SearchResult>> {
574        log::info!("Executing AST query with text filter: text='{}', ast='{}', filter={:?}",
575                   text_pattern, ast_pattern, filter);
576
577        // Ensure cache exists
578        if !self.cache.exists() {
579            anyhow::bail!(
580                "Index not found. Run 'rfx index' to build the cache first."
581            );
582        }
583
584        // Show non-blocking warnings about branch state and staleness
585        self.check_index_freshness()?;
586
587        // Start timeout timer if configured
588        use std::time::{Duration, Instant};
589        let start_time = Instant::now();
590        let timeout = if filter.timeout_secs > 0 {
591            Some(Duration::from_secs(filter.timeout_secs))
592        } else {
593            None
594        };
595
596        // PHASE 1: Get initial candidates using text pattern (trigram search)
597        let candidates = if filter.use_regex {
598            self.get_regex_candidates(text_pattern, timeout.as_ref(), &start_time)?
599        } else {
600            self.get_trigram_candidates(text_pattern, &filter)?
601        };
602
603        log::debug!("Phase 1 found {} candidate locations", candidates.len());
604
605        // PHASE 2: Execute AST query on candidates
606        let mut results = self.enrich_with_ast(candidates, ast_pattern, filter.language)?;
607
608        log::debug!("Phase 2 AST matching found {} results", results.len());
609
610        // PHASE 3: Apply filters
611        if let Some(lang) = filter.language {
612            results.retain(|r| r.lang == lang);
613        }
614
615        if let Some(ref kind) = filter.kind {
616            results.retain(|r| {
617                if matches!(kind, SymbolKind::Function) {
618                    matches!(r.kind, SymbolKind::Function | SymbolKind::Method)
619                } else {
620                    r.kind == *kind
621                }
622            });
623        }
624
625        if let Some(ref file_pattern) = filter.file_pattern {
626            results.retain(|r| r.path.contains(file_pattern));
627        }
628
629        // Apply glob pattern filters (same logic as in search_internal)
630        if !filter.glob_patterns.is_empty() || !filter.exclude_patterns.is_empty() {
631            use globset::{Glob, GlobSetBuilder};
632
633            let include_matcher = if !filter.glob_patterns.is_empty() {
634                let mut builder = GlobSetBuilder::new();
635                for pattern in &filter.glob_patterns {
636                    if let Ok(glob) = Glob::new(pattern) {
637                        builder.add(glob);
638                    }
639                }
640                builder.build().ok()
641            } else {
642                None
643            };
644
645            let exclude_matcher = if !filter.exclude_patterns.is_empty() {
646                let mut builder = GlobSetBuilder::new();
647                for pattern in &filter.exclude_patterns {
648                    if let Ok(glob) = Glob::new(pattern) {
649                        builder.add(glob);
650                    }
651                }
652                builder.build().ok()
653            } else {
654                None
655            };
656
657            results.retain(|r| {
658                let included = include_matcher.as_ref().map_or(true, |m| m.is_match(&r.path));
659                let excluded = exclude_matcher.as_ref().map_or(false, |m| m.is_match(&r.path));
660                included && !excluded
661            });
662        }
663
664        if filter.exact && filter.symbols_mode {
665            results.retain(|r| r.symbol.as_deref() == Some(text_pattern));
666        }
667
668        // Expand symbol bodies if requested
669        if filter.expand {
670            let content_path = self.cache.path().join("content.bin");
671            if let Ok(content_reader) = ContentReader::open(&content_path) {
672                for result in &mut results {
673                    if result.span.start_line < result.span.end_line {
674                        if let Some(file_id) = Self::find_file_id(&content_reader, &result.path) {
675                            if let Ok(content) = content_reader.get_file_content(file_id) {
676                                let lines: Vec<&str> = content.lines().collect();
677                                let start_idx = (result.span.start_line as usize).saturating_sub(1);
678                                let end_idx = (result.span.end_line as usize).min(lines.len());
679
680                                if start_idx < end_idx {
681                                    let full_body = lines[start_idx..end_idx].join("\n");
682                                    result.preview = full_body;
683                                }
684                            }
685                        }
686                    }
687                }
688            }
689        }
690
691        // Sort results deterministically
692        results.sort_by(|a, b| {
693            a.path.cmp(&b.path)
694                .then_with(|| a.span.start_line.cmp(&b.span.start_line))
695        });
696
697        // Apply limit
698        if let Some(limit) = filter.limit {
699            results.truncate(limit);
700        }
701
702        log::info!("AST query returned {} results", results.len());
703
704        Ok(results)
705    }
706
707    /// List all symbols of a specific kind
708    pub fn list_by_kind(&self, kind: SymbolKind) -> Result<Vec<SearchResult>> {
709        let filter = QueryFilter {
710            kind: Some(kind),
711            symbols_mode: true,
712            ..Default::default()
713        };
714
715        self.search("*", filter)
716    }
717
718    /// Enrich text match candidates with symbol information by parsing files
719    ///
720    /// Takes a list of text match candidates and replaces them with actual symbol
721    /// definitions where the symbol name matches the pattern.
722    ///
723    /// # Algorithm
724    /// 1. Group candidates by file_id for efficient processing
725    /// 2. Parse each file with tree-sitter to extract symbols
726    /// 3. For each symbol, check if its name matches the pattern
727    ///    - If use_regex=true: match symbol name against regex pattern
728    ///    - If use_contains=true: substring match (contains)
729    ///    - Default: exact match
730    /// 4. Return symbol results (not the original text matches)
731    ///
732    /// # Performance
733    /// Only parses files that have text matches, so typically 10-100 files
734    /// instead of the entire codebase (62K+ files).
735    ///
736    /// # Optimizations
737    /// 1. Language filtering: Skips files with unsupported languages (no parsers)
738    /// 2. Parallel processing: Uses Rayon to parse files concurrently across CPU cores
739    fn enrich_with_symbols(&self, candidates: Vec<SearchResult>, pattern: &str, filter: &QueryFilter) -> Result<Vec<SearchResult>> {
740        // Load content store for file reading
741        let content_path = self.cache.path().join("content.bin");
742        let content_reader = ContentReader::open(&content_path)
743            .context("Failed to open content store")?;
744
745        // Load trigram index for file path lookups
746        let trigrams_path = self.cache.path().join("trigrams.bin");
747        let trigram_index = if trigrams_path.exists() {
748            TrigramIndex::load(&trigrams_path)?
749        } else {
750            Self::rebuild_trigram_index(&content_reader)?
751        };
752
753        // Group candidates by file, filtering out unsupported languages
754        use std::collections::HashMap;
755        let mut files_by_path: HashMap<String, Vec<SearchResult>> = HashMap::new();
756        let mut skipped_unsupported = 0;
757
758        for candidate in candidates {
759            // Skip files with unsupported languages (no parser available)
760            if !candidate.lang.is_supported() {
761                skipped_unsupported += 1;
762                continue;
763            }
764
765            files_by_path
766                .entry(candidate.path.clone())
767                .or_insert_with(Vec::new)
768                .push(candidate);
769        }
770
771        let total_files = files_by_path.len();
772        log::debug!("Processing {} candidate files for symbol enrichment (skipped {} unsupported language files)",
773                   total_files, skipped_unsupported);
774
775        // Warn if pattern is very broad (may take time to parse all files)
776        if total_files > 1000 {
777            log::warn!(
778                "Pattern '{}' matched {} files. This may take some time to parse.",
779                pattern,
780                total_files
781            );
782            log::warn!("Consider using a more specific pattern or adding --lang/--file filters to narrow the search.");
783        }
784
785        // Convert to vec for parallel processing
786        let mut files_to_process: Vec<String> = files_by_path.keys().cloned().collect();
787
788        // PHASE 2a: Line-based pre-filtering (skip files where ALL matches are in comments/strings)
789        // This reduces tree-sitter parsing workload by 2-5x for most queries
790        let mut files_to_skip: std::collections::HashSet<String> = std::collections::HashSet::new();
791
792        for file_path in &files_to_process {
793            // Get the language for this file
794            let ext = std::path::Path::new(file_path)
795                .extension()
796                .and_then(|e| e.to_str())
797                .unwrap_or("");
798            let lang = Language::from_extension(ext);
799
800            // Get line filter for this language (if available)
801            if let Some(line_filter) = crate::line_filter::get_filter(lang) {
802                // Find file_id for this path
803                let file_id = match Self::find_file_id_by_path(&content_reader, &trigram_index, file_path) {
804                    Some(id) => id,
805                    None => continue,
806                };
807
808                // Load file content
809                let content = match content_reader.get_file_content(file_id) {
810                    Ok(c) => c,
811                    Err(_) => continue,
812                };
813
814                // Check if ALL pattern occurrences are in comments/strings
815                let mut all_in_non_code = true;
816                for line in content.lines() {
817                    // Find all occurrences of the pattern in this line
818                    let mut search_start = 0;
819                    while let Some(pos) = line[search_start..].find(pattern) {
820                        let absolute_pos = search_start + pos;
821
822                        // Check if this occurrence is in code (not comment/string)
823                        let in_comment = line_filter.is_in_comment(line, absolute_pos);
824                        let in_string = line_filter.is_in_string(line, absolute_pos);
825
826                        if !in_comment && !in_string {
827                            // Found at least one occurrence in actual code
828                            all_in_non_code = false;
829                            break;
830                        }
831
832                        search_start = absolute_pos + pattern.len();
833                    }
834
835                    if !all_in_non_code {
836                        break;
837                    }
838                }
839
840                // If ALL occurrences are in comments/strings, skip this file
841                if all_in_non_code {
842                    // Double-check: make sure there was at least one occurrence
843                    if content.contains(pattern) {
844                        files_to_skip.insert(file_path.clone());
845                        log::debug!("Pre-filter: Skipping {} (all matches in comments/strings)", file_path);
846                    }
847                }
848            }
849        }
850
851        // Filter out files we're skipping
852        files_to_process.retain(|path| !files_to_skip.contains(path));
853
854        log::debug!("Pre-filter: Skipped {} files where all matches are in comments/strings (parsing {} files)",
855                   files_to_skip.len(), files_to_process.len());
856
857        // Configure thread pool for parallel processing (use 80% of available cores, capped at 8)
858        let num_threads = {
859            let available_cores = std::thread::available_parallelism()
860                .map(|n| n.get())
861                .unwrap_or(4);
862            // Use 80% of available cores (minimum 1, maximum 8) to avoid locking the system
863            // Cap at 8 to prevent diminishing returns from cache contention on high-core systems
864            ((available_cores as f64 * 0.8).ceil() as usize).max(1).min(8)
865        };
866
867        log::debug!("Using {} threads for parallel symbol extraction (out of {} available cores)",
868                   num_threads,
869                   std::thread::available_parallelism().map(|n| n.get()).unwrap_or(4));
870
871        // Build a custom thread pool with limited threads
872        let pool = rayon::ThreadPoolBuilder::new()
873            .num_threads(num_threads)
874            .build()
875            .context("Failed to create thread pool for symbol extraction")?;
876
877        // Parse files in parallel using custom thread pool
878        use rayon::prelude::*;
879
880        let all_symbols: Vec<SearchResult> = pool.install(|| {
881            files_to_process
882                .par_iter()
883                .flat_map(|file_path| {
884                // Find file_id for this path
885                let file_id = match Self::find_file_id_by_path(&content_reader, &trigram_index, file_path) {
886                    Some(id) => id,
887                    None => {
888                        log::warn!("Could not find file_id for path: {}", file_path);
889                        return Vec::new();
890                    }
891                };
892
893                let content = match content_reader.get_file_content(file_id) {
894                    Ok(c) => c,
895                    Err(e) => {
896                        log::warn!("Failed to read file {}: {}", file_path, e);
897                        return Vec::new();
898                    }
899                };
900
901                // Detect language
902                let ext = std::path::Path::new(file_path)
903                    .extension()
904                    .and_then(|e| e.to_str())
905                    .unwrap_or("");
906                let lang = Language::from_extension(ext);
907
908                // Parse file to extract symbols
909                match ParserFactory::parse(file_path, content, lang) {
910                    Ok(symbols) => {
911                        log::debug!("Parsed {} symbols from {}", symbols.len(), file_path);
912                        symbols
913                    }
914                    Err(e) => {
915                        log::debug!("Failed to parse {}: {}", file_path, e);
916                        Vec::new()
917                    }
918                }
919            })
920            .collect()
921        });
922
923        // Filter symbols by pattern
924        let filtered: Vec<SearchResult> = if filter.use_regex {
925            // Compile regex for symbol name matching
926            let regex = Regex::new(pattern)
927                .with_context(|| format!("Invalid regex pattern: {}", pattern))?;
928
929            all_symbols
930                .into_iter()
931                .filter(|sym| {
932                    sym.symbol.as_deref().map_or(false, |s| regex.is_match(s))
933                })
934                .collect()
935        } else if filter.use_contains {
936            // Substring match (opt-in with --contains)
937            all_symbols
938                .into_iter()
939                .filter(|sym| sym.symbol.as_deref().map_or(false, |s| s.contains(pattern)))
940                .collect()
941        } else {
942            // Exact match (default)
943            all_symbols
944                .into_iter()
945                .filter(|sym| sym.symbol.as_deref().map_or(false, |s| s == pattern))
946                .collect()
947        };
948
949        log::info!("Symbol enrichment found {} matches for pattern '{}'", filtered.len(), pattern);
950
951        Ok(filtered)
952    }
953
954    /// Enrich text match candidates with AST pattern matching
955    ///
956    /// Takes a list of text match candidates and executes a Tree-sitter AST query
957    /// on the candidate files, returning only matches that satisfy the AST pattern.
958    ///
959    /// # Algorithm
960    /// 1. Extract unique file paths from candidates
961    /// 2. Load file contents for each candidate file
962    /// 3. Execute AST query pattern using Tree-sitter
963    /// 4. Return AST matches
964    ///
965    /// # Performance
966    /// Only parses files that have text matches, so typically 10-100 files
967    /// instead of the entire codebase (62K+ files).
968    ///
969    /// # Requirements
970    /// - Language must be specified (AST queries are language-specific)
971    /// - AST pattern must be valid S-expression syntax
972    fn enrich_with_ast(&self, candidates: Vec<SearchResult>, ast_pattern: &str, language: Option<Language>) -> Result<Vec<SearchResult>> {
973        // Require language for AST queries
974        let lang = language.ok_or_else(|| anyhow::anyhow!(
975            "Language must be specified for AST pattern matching. Use --lang to specify the language."
976        ))?;
977
978        // Load content store for file reading
979        let content_path = self.cache.path().join("content.bin");
980        let content_reader = ContentReader::open(&content_path)
981            .context("Failed to open content store")?;
982
983        // Load trigram index for file path lookups
984        let trigrams_path = self.cache.path().join("trigrams.bin");
985        let trigram_index = if trigrams_path.exists() {
986            TrigramIndex::load(&trigrams_path)?
987        } else {
988            Self::rebuild_trigram_index(&content_reader)?
989        };
990
991        // Collect unique file paths from candidates and load their contents
992        use std::collections::HashMap;
993        let mut file_contents: HashMap<String, String> = HashMap::new();
994
995        for candidate in &candidates {
996            if file_contents.contains_key(&candidate.path) {
997                continue;
998            }
999
1000            // Find file_id for this path
1001            let file_id = match Self::find_file_id_by_path(&content_reader, &trigram_index, &candidate.path) {
1002                Some(id) => id,
1003                None => {
1004                    log::warn!("Could not find file_id for path: {}", candidate.path);
1005                    continue;
1006                }
1007            };
1008
1009            // Load file content
1010            let content = match content_reader.get_file_content(file_id) {
1011                Ok(c) => c,
1012                Err(e) => {
1013                    log::warn!("Failed to read file {}: {}", candidate.path, e);
1014                    continue;
1015                }
1016            };
1017
1018            file_contents.insert(candidate.path.clone(), content.to_string());
1019        }
1020
1021        log::debug!("Executing AST query on {} candidate files with language {:?}", file_contents.len(), lang);
1022
1023        // Execute AST query using the ast_query module
1024        let results = crate::ast_query::execute_ast_query(candidates, ast_pattern, lang, &file_contents)?;
1025
1026        log::info!("AST query found {} matches for pattern '{}'", results.len(), ast_pattern);
1027
1028        Ok(results)
1029    }
1030
1031    /// Helper to find file_id by path string
1032    fn find_file_id_by_path(
1033        content_reader: &ContentReader,
1034        trigram_index: &TrigramIndex,
1035        target_path: &str,
1036    ) -> Option<u32> {
1037        // Try trigram index first (faster)
1038        for file_id in 0..trigram_index.file_count() {
1039            if let Some(path) = trigram_index.get_file(file_id as u32) {
1040                if path.to_string_lossy() == target_path {
1041                    return Some(file_id as u32);
1042                }
1043            }
1044        }
1045
1046        // Fallback to content reader
1047        for file_id in 0..content_reader.file_count() {
1048            if let Some(path) = content_reader.get_file_path(file_id as u32) {
1049                if path.to_string_lossy() == target_path {
1050                    return Some(file_id as u32);
1051                }
1052            }
1053        }
1054
1055        None
1056    }
1057
1058    /// Get candidate results using trigram-based full-text search
1059    fn get_trigram_candidates(&self, pattern: &str, filter: &QueryFilter) -> Result<Vec<SearchResult>> {
1060        // Load content store
1061        let content_path = self.cache.path().join("content.bin");
1062        let content_reader = ContentReader::open(&content_path)
1063            .context("Failed to open content store")?;
1064
1065        // Load trigram index from disk (or rebuild if missing)
1066        let trigrams_path = self.cache.path().join("trigrams.bin");
1067        let trigram_index = if trigrams_path.exists() {
1068            match TrigramIndex::load(&trigrams_path) {
1069                Ok(index) => {
1070                    log::debug!("Loaded trigram index from disk: {} trigrams, {} files",
1071                               index.trigram_count(), index.file_count());
1072                    index
1073                }
1074                Err(e) => {
1075                    log::warn!("Failed to load trigram index from disk: {}", e);
1076                    log::warn!("Rebuilding trigram index from content store...");
1077                    Self::rebuild_trigram_index(&content_reader)?
1078                }
1079            }
1080        } else {
1081            log::debug!("trigrams.bin not found, rebuilding from content store");
1082            Self::rebuild_trigram_index(&content_reader)?
1083        };
1084
1085        // Search using trigrams
1086        let candidates = trigram_index.search(pattern);
1087        log::debug!("Found {} candidate locations from trigram search", candidates.len());
1088
1089        // Clone pattern to owned String for thread safety
1090        let pattern_owned = pattern.to_string();
1091
1092        // Group candidates by file for efficient processing
1093        use std::collections::HashMap;
1094        let mut candidates_by_file: HashMap<u32, Vec<crate::trigram::FileLocation>> = HashMap::new();
1095        for loc in candidates {
1096            candidates_by_file
1097                .entry(loc.file_id)
1098                .or_insert_with(Vec::new)
1099                .push(loc);
1100        }
1101
1102        log::debug!("Scanning {} files with trigram matches", candidates_by_file.len());
1103
1104        // Process files in parallel using rayon
1105        use rayon::prelude::*;
1106
1107        let results: Vec<SearchResult> = candidates_by_file
1108            .par_iter()
1109            .flat_map(|(file_id, locations)| {
1110                // Get file metadata
1111                let file_path = match trigram_index.get_file(*file_id) {
1112                    Some(p) => p,
1113                    None => return Vec::new(),
1114                };
1115
1116                let content = match content_reader.get_file_content(*file_id) {
1117                    Ok(c) => c,
1118                    Err(_) => return Vec::new(),
1119                };
1120
1121                let file_path_str = file_path.to_string_lossy().to_string();
1122
1123                // Detect language once per file
1124                let ext = file_path.extension()
1125                    .and_then(|e| e.to_str())
1126                    .unwrap_or("");
1127                let lang = Language::from_extension(ext);
1128
1129                // Split content into lines once
1130                let lines: Vec<&str> = content.lines().collect();
1131
1132                // Use a HashSet to deduplicate results by line number
1133                let mut seen_lines: std::collections::HashSet<usize> = std::collections::HashSet::new();
1134                let mut file_results = Vec::new();
1135
1136                // Only check the specific lines indicated by trigram posting lists
1137                for loc in locations {
1138                    let line_no = loc.line_no as usize;
1139
1140                    // Skip if we've already processed this line
1141                    if seen_lines.contains(&line_no) {
1142                        continue;
1143                    }
1144
1145                    // Bounds check
1146                    if line_no == 0 || line_no > lines.len() {
1147                        log::debug!("Line {} out of bounds (file has {} lines)", line_no, lines.len());
1148                        continue;
1149                    }
1150
1151                    let line = lines[line_no - 1];
1152
1153                    // Apply word-boundary or substring matching based on filter
1154                    // - Default (not contains, not regex): Word-boundary matching (restrictive)
1155                    // - --contains or --regex: Substring matching (expansive)
1156                    let line_matches = if filter.use_contains || filter.use_regex {
1157                        // Substring matching (expansive)
1158                        line.contains(&pattern_owned)
1159                    } else {
1160                        // Word-boundary matching (restrictive, default)
1161                        Self::has_word_boundary_match(line, &pattern_owned)
1162                    };
1163
1164                    if !line_matches {
1165                        continue;
1166                    }
1167
1168                    seen_lines.insert(line_no);
1169
1170                    // Create a text match result (no symbol lookup for performance)
1171                    file_results.push(SearchResult {
1172                        path: file_path_str.clone(),
1173                        lang: lang.clone(),
1174                        kind: SymbolKind::Unknown("text_match".to_string()),
1175                        symbol: Some(pattern_owned.clone()),
1176                        span: Span {
1177                            start_line: line_no,
1178                            end_line: line_no,
1179                            start_col: 0,
1180                            end_col: 0,
1181                        },
1182                        scope: None,
1183                        preview: line.to_string(),
1184                    });
1185                }
1186
1187                file_results
1188            })
1189            .collect();
1190
1191        Ok(results)
1192    }
1193
1194    /// Get candidate results using regex patterns with trigram optimization
1195    ///
1196    /// # Algorithm
1197    ///
1198    /// 1. Extract literal sequences from the regex pattern (≥3 chars)
1199    /// 2. If literals found: search for files containing ANY of the literals (UNION)
1200    /// 3. If no literals: fall back to full content scan
1201    /// 4. Compile regex and verify matches in candidate files
1202    /// 5. Return matching results with context
1203    ///
1204    /// # File Selection Strategy
1205    ///
1206    /// Uses UNION of files containing any literal (conservative approach):
1207    /// - For alternation patterns `(a|b)`: Correctly searches files with a OR b
1208    /// - For sequential patterns `a.*b`: Searches files with a OR b (may include extra files)
1209    /// - Trade-off: Ensures correctness at the cost of scanning 2-3x more files for sequential patterns
1210    /// - Performance impact is minimal due to memory-mapped I/O (<5ms overhead typically)
1211    ///
1212    /// # Performance
1213    ///
1214    /// - Best case (pattern with literals): <20ms (trigram optimization)
1215    /// - Typical case (alternation/sequential): 5-15ms on small codebases (<100 files)
1216    /// - Worst case (no literals like `.*`): ~100ms (full scan)
1217    fn get_regex_candidates(&self, pattern: &str, timeout: Option<&std::time::Duration>, start_time: &std::time::Instant) -> Result<Vec<SearchResult>> {
1218        // Step 1: Compile the regex
1219        let regex = Regex::new(pattern)
1220            .with_context(|| format!("Invalid regex pattern: {}", pattern))?;
1221
1222        // Check timeout before expensive operations
1223        if let Some(timeout_duration) = timeout {
1224            if start_time.elapsed() > *timeout_duration {
1225                anyhow::bail!(
1226                    "Query timeout exceeded ({} seconds) during regex compilation",
1227                    timeout_duration.as_secs()
1228                );
1229            }
1230        }
1231
1232        // Step 2: Extract trigrams from regex
1233        let trigrams = extract_trigrams_from_regex(pattern);
1234
1235        // Load content store
1236        let content_path = self.cache.path().join("content.bin");
1237        let content_reader = ContentReader::open(&content_path)
1238            .context("Failed to open content store")?;
1239
1240        let mut results = Vec::new();
1241
1242        if trigrams.is_empty() {
1243            // No trigrams - fall back to full scan
1244            log::warn!("Regex pattern '{}' has no literals (≥3 chars), falling back to full content scan", pattern);
1245            log::warn!("This may be slow on large codebases. Consider using patterns with literal text.");
1246
1247            // Scan all files
1248            for file_id in 0..content_reader.file_count() {
1249                let file_path = content_reader.get_file_path(file_id as u32)
1250                    .context("Invalid file_id")?;
1251                let content = content_reader.get_file_content(file_id as u32)?;
1252
1253                self.find_regex_matches_in_file(
1254                    &regex,
1255                    file_path,
1256                    content,
1257                    &mut results,
1258                )?;
1259            }
1260        } else {
1261            // Use trigrams to narrow down candidates
1262            log::debug!("Using {} trigrams to narrow regex search candidates", trigrams.len());
1263
1264            // Load trigram index
1265            let trigrams_path = self.cache.path().join("trigrams.bin");
1266            let trigram_index = if trigrams_path.exists() {
1267                TrigramIndex::load(&trigrams_path)?
1268            } else {
1269                Self::rebuild_trigram_index(&content_reader)?
1270            };
1271
1272            // Extract the literal sequences from the regex pattern
1273            use crate::regex_trigrams::extract_literal_sequences;
1274            let literals = extract_literal_sequences(pattern);
1275
1276            if literals.is_empty() {
1277                log::warn!("Regex extraction found trigrams but no literal sequences - this shouldn't happen");
1278                // Fall back to full scan
1279                for file_id in 0..content_reader.file_count() {
1280                    let file_path = content_reader.get_file_path(file_id as u32)
1281                        .context("Invalid file_id")?;
1282                    let content = content_reader.get_file_content(file_id as u32)?;
1283                    self.find_regex_matches_in_file(&regex, file_path, content, &mut results)?;
1284                }
1285            } else {
1286                // Search for each literal sequence and union the results
1287                // This ensures we find matches for ANY literal (important for alternation patterns like (a|b))
1288                // Trade-off: May scan more files than necessary for sequential patterns (a.*b),
1289                // but ensures correctness for all regex patterns
1290                use std::collections::HashSet;
1291                let mut candidate_files: HashSet<u32> = HashSet::new();
1292
1293                for literal in &literals {
1294                    // Search for this literal in the trigram index
1295                    let candidates = trigram_index.search(literal);
1296                    let file_ids: HashSet<u32> = candidates.iter().map(|loc| loc.file_id).collect();
1297
1298                    log::debug!("Literal '{}' found in {} files", literal, file_ids.len());
1299
1300                    // Union with existing candidate files (not intersection)
1301                    // This ensures we search files containing ANY of the literals
1302                    candidate_files.extend(file_ids);
1303                }
1304
1305                let final_candidates = candidate_files;
1306                log::debug!("After union: searching {} files that contain any literal", final_candidates.len());
1307
1308                // Verify regex matches in candidate files only
1309                for &file_id in &final_candidates {
1310                    let file_path = trigram_index.get_file(file_id)
1311                        .context("Invalid file_id from trigram search")?;
1312                    let content = content_reader.get_file_content(file_id)?;
1313
1314                    self.find_regex_matches_in_file(
1315                        &regex,
1316                        file_path,
1317                        content,
1318                        &mut results,
1319                    )?;
1320                }
1321            }
1322        }
1323
1324        log::info!("Regex search found {} matches for pattern '{}'", results.len(), pattern);
1325        Ok(results)
1326    }
1327
1328    /// Find all regex matches in a single file
1329    fn find_regex_matches_in_file(
1330        &self,
1331        regex: &Regex,
1332        file_path: &std::path::Path,
1333        content: &str,
1334        results: &mut Vec<SearchResult>,
1335    ) -> Result<()> {
1336        let file_path_str = file_path.to_string_lossy().to_string();
1337
1338        // Detect language from file extension
1339        let ext = file_path.extension()
1340            .and_then(|e| e.to_str())
1341            .unwrap_or("");
1342        let lang = Language::from_extension(ext);
1343
1344        // Find all regex matches line by line
1345        for (line_idx, line) in content.lines().enumerate() {
1346            if regex.is_match(line) {
1347                let line_no = line_idx + 1;
1348
1349                // Create text match result
1350                // Note: We don't extract symbol names from regex matches because:
1351                // 1. Regex might match partial identifiers (e.g., "UserController" in "ListUserController")
1352                // 2. Regex might match across language-specific delimiters (namespaces, scopes, etc.)
1353                // 3. Accurate symbol extraction requires tree-sitter parsing (expensive)
1354                // The user can see the full context in the 'preview' field
1355                results.push(SearchResult {
1356                    path: file_path_str.clone(),
1357                    lang: lang.clone(),
1358                    kind: SymbolKind::Unknown("regex_match".to_string()),
1359                    symbol: None,  // No symbol name for regex matches
1360                    span: Span {
1361                        start_line: line_no,
1362                        end_line: line_no,
1363                        start_col: 0,
1364                        end_col: 0,
1365                    },
1366                    scope: None,
1367                    preview: line.to_string(),
1368                });
1369            }
1370        }
1371
1372        Ok(())
1373    }
1374
1375    /// Helper function to find file_id in ContentReader by matching path
1376    fn find_file_id(content_reader: &ContentReader, target_path: &str) -> Option<u32> {
1377        for file_id in 0..content_reader.file_count() {
1378            if let Some(path) = content_reader.get_file_path(file_id as u32) {
1379                if path.to_string_lossy() == target_path {
1380                    return Some(file_id as u32);
1381                }
1382            }
1383        }
1384        None
1385    }
1386
1387    /// Rebuild trigram index from content store (fallback when trigrams.bin is missing)
1388    fn rebuild_trigram_index(content_reader: &ContentReader) -> Result<TrigramIndex> {
1389        log::debug!("Rebuilding trigram index from {} files", content_reader.file_count());
1390        let mut trigram_index = TrigramIndex::new();
1391
1392        for file_id in 0..content_reader.file_count() {
1393            let file_path = content_reader.get_file_path(file_id as u32)
1394                .context("Invalid file_id")?
1395                .to_path_buf();
1396            let content = content_reader.get_file_content(file_id as u32)?;
1397
1398            let idx = trigram_index.add_file(file_path);
1399            trigram_index.index_file(idx, content);
1400        }
1401
1402        trigram_index.finalize();
1403        log::debug!("Trigram index rebuilt with {} trigrams", trigram_index.trigram_count());
1404
1405        Ok(trigram_index)
1406    }
1407
1408    /// Check if pattern appears at word boundaries in a line
1409    ///
1410    /// Word boundary is defined as:
1411    /// - Start/end of string
1412    /// - Transition between word characters (\w) and non-word characters (\W)
1413    ///
1414    /// This is used for default (restrictive) matching to find complete identifiers
1415    /// rather than substrings. For example:
1416    /// - "Error" matches "Error" but not "NetworkError"
1417    /// - "parse" matches "parse()" but not "parseUser()"
1418    fn has_word_boundary_match(line: &str, pattern: &str) -> bool {
1419        // Build regex: \bpattern\b
1420        let escaped_pattern = regex::escape(pattern);
1421        let pattern_with_boundaries = format!(r"\b{}\b", escaped_pattern);
1422
1423        if let Ok(re) = Regex::new(&pattern_with_boundaries) {
1424            re.is_match(line)
1425        } else {
1426            // If regex fails (shouldn't happen with escaped pattern), fall back to substring
1427            log::debug!("Word boundary regex failed for pattern '{}', falling back to substring", pattern);
1428            line.contains(pattern)
1429        }
1430    }
1431
1432    /// Get index status for programmatic use (doesn't print warnings)
1433    ///
1434    /// Returns (status, can_trust_results, warning) tuple for JSON output.
1435    /// This is optimized for AI agents to detect staleness and auto-reindex.
1436    fn get_index_status(&self) -> Result<(IndexStatus, bool, Option<IndexWarning>)> {
1437        let root = std::env::current_dir()?;
1438
1439        // Check git state if in a git repo
1440        if crate::git::is_git_repo(&root) {
1441            if let Ok(current_branch) = crate::git::get_current_branch(&root) {
1442                // Check if we're on a different branch than what was indexed
1443                if !self.cache.branch_exists(&current_branch).unwrap_or(false) {
1444                    let warning = IndexWarning {
1445                        reason: format!("Branch '{}' has not been indexed", current_branch),
1446                        action_required: "rfx index".to_string(),
1447                        details: Some(IndexWarningDetails {
1448                            current_branch: Some(current_branch),
1449                            indexed_branch: None,
1450                            current_commit: None,
1451                            indexed_commit: None,
1452                        }),
1453                    };
1454                    return Ok((IndexStatus::Stale, false, Some(warning)));
1455                }
1456
1457                // Branch exists - check if commit changed
1458                if let (Ok(current_commit), Ok(branch_info)) =
1459                    (crate::git::get_current_commit(&root), self.cache.get_branch_info(&current_branch)) {
1460
1461                    if branch_info.commit_sha != current_commit {
1462                        let warning = IndexWarning {
1463                            reason: format!(
1464                                "Commit changed from {} to {}",
1465                                &branch_info.commit_sha[..7],
1466                                &current_commit[..7]
1467                            ),
1468                            action_required: "rfx index".to_string(),
1469                            details: Some(IndexWarningDetails {
1470                                current_branch: Some(current_branch.clone()),
1471                                indexed_branch: Some(current_branch.clone()),
1472                                current_commit: Some(current_commit.clone()),
1473                                indexed_commit: Some(branch_info.commit_sha.clone()),
1474                            }),
1475                        };
1476                        return Ok((IndexStatus::Stale, false, Some(warning)));
1477                    }
1478
1479                    // If commits match, do a quick file freshness check
1480                    if let Ok(branch_files) = self.cache.get_branch_files(&current_branch) {
1481                        let mut checked = 0;
1482                        let mut changed = 0;
1483                        const SAMPLE_SIZE: usize = 10;
1484
1485                        for (path, _indexed_hash) in branch_files.iter().take(SAMPLE_SIZE) {
1486                            checked += 1;
1487                            let file_path = std::path::Path::new(path);
1488
1489                            if let Ok(metadata) = std::fs::metadata(file_path) {
1490                                if let Ok(modified) = metadata.modified() {
1491                                    let indexed_time = branch_info.last_indexed;
1492                                    let file_time = modified.duration_since(std::time::UNIX_EPOCH)
1493                                        .unwrap_or_default()
1494                                        .as_secs() as i64;
1495
1496                                    if file_time > indexed_time {
1497                                        // File modified after indexing - likely stale
1498                                        // Note: We skip hash verification for performance (mtime check is sufficient)
1499                                        changed += 1;
1500                                    }
1501                                }
1502                            }
1503                        }
1504
1505                        if changed > 0 {
1506                            let warning = IndexWarning {
1507                                reason: format!("{} of {} sampled files modified", changed, checked),
1508                                action_required: "rfx index".to_string(),
1509                                details: Some(IndexWarningDetails {
1510                                    current_branch: Some(current_branch.clone()),
1511                                    indexed_branch: Some(branch_info.branch.clone()),
1512                                    current_commit: Some(current_commit.clone()),
1513                                    indexed_commit: Some(branch_info.commit_sha.clone()),
1514                                }),
1515                            };
1516                            return Ok((IndexStatus::Stale, false, Some(warning)));
1517                        }
1518                    }
1519
1520                    // All checks passed - index is fresh
1521                    return Ok((IndexStatus::Fresh, true, None));
1522                }
1523            }
1524        }
1525
1526        // Not in a git repo or couldn't get git info - assume fresh
1527        Ok((IndexStatus::Fresh, true, None))
1528    }
1529
1530    /// Check index freshness and show non-blocking warnings
1531    ///
1532    /// This performs lightweight checks to warn users if their index might be stale:
1533    /// 1. Branch mismatch: indexed different branch
1534    /// 2. Commit changed: HEAD moved since indexing
1535    /// 3. File changes: quick mtime check on sample of files (if available)
1536    fn check_index_freshness(&self) -> Result<()> {
1537        let root = std::env::current_dir()?;
1538
1539        // Check git state if in a git repo
1540        if crate::git::is_git_repo(&root) {
1541            if let Ok(current_branch) = crate::git::get_current_branch(&root) {
1542                // Check if we're on a different branch than what was indexed
1543                if !self.cache.branch_exists(&current_branch).unwrap_or(false) {
1544                    eprintln!("⚠️  WARNING: Index not found for branch '{}'. Run 'rfx index' to index this branch.", current_branch);
1545                    return Ok(());
1546                }
1547
1548                // Branch exists - check if commit changed
1549                if let (Ok(current_commit), Ok(branch_info)) =
1550                    (crate::git::get_current_commit(&root), self.cache.get_branch_info(&current_branch)) {
1551
1552                    if branch_info.commit_sha != current_commit {
1553                        eprintln!("⚠️  WARNING: Index may be stale (commit changed: {} → {}). Consider running 'rfx index'.",
1554                                 &branch_info.commit_sha[..7], &current_commit[..7]);
1555                        return Ok(());
1556                    }
1557
1558                    // If commits match, do a quick file freshness check
1559                    // Sample up to 10 files to check for modifications (cheap mtime check)
1560                    if let Ok(branch_files) = self.cache.get_branch_files(&current_branch) {
1561                        let mut checked = 0;
1562                        let mut changed = 0;
1563                        const SAMPLE_SIZE: usize = 10;
1564
1565                        for (path, _indexed_hash) in branch_files.iter().take(SAMPLE_SIZE) {
1566                            checked += 1;
1567                            let file_path = std::path::Path::new(path);
1568
1569                            // Check if file exists and has been modified (mtime/size heuristic)
1570                            if let Ok(metadata) = std::fs::metadata(file_path) {
1571                                if let Ok(modified) = metadata.modified() {
1572                                    let indexed_time = branch_info.last_indexed;
1573                                    let file_time = modified.duration_since(std::time::UNIX_EPOCH)
1574                                        .unwrap_or_default()
1575                                        .as_secs() as i64;
1576
1577                                    // If file modified after indexing, it might be stale
1578                                    if file_time > indexed_time {
1579                                        // File modified after indexing - likely stale
1580                                        // Note: We skip hash verification for performance (mtime check is sufficient)
1581                                        // This may cause false positives if files were touched without changes,
1582                                        // but the warning is non-blocking and vastly better than slow queries
1583                                        changed += 1;
1584                                    }
1585                                }
1586                            }
1587                        }
1588
1589                        if changed > 0 {
1590                            eprintln!("⚠️  WARNING: {} of {} sampled files changed since indexing. Consider running 'rfx index'.", changed, checked);
1591                        }
1592                    }
1593                }
1594            }
1595        }
1596
1597        Ok(())
1598    }
1599}
1600
1601#[cfg(test)]
1602mod tests {
1603    use super::*;
1604    use crate::indexer::Indexer;
1605    use crate::models::IndexConfig;
1606    use std::fs;
1607    use tempfile::TempDir;
1608
1609    // ==================== Basic Tests ====================
1610
1611    #[test]
1612    fn test_query_engine_creation() {
1613        let temp = TempDir::new().unwrap();
1614        let cache = CacheManager::new(temp.path());
1615        let engine = QueryEngine::new(cache);
1616
1617        assert!(engine.cache.path().ends_with(".reflex"));
1618    }
1619
1620    #[test]
1621    fn test_filter_modes() {
1622        // Test that symbols_mode works as expected
1623        let filter_fulltext = QueryFilter::default();
1624        assert!(!filter_fulltext.symbols_mode);
1625
1626        let filter_symbols = QueryFilter {
1627            symbols_mode: true,
1628            ..Default::default()
1629        };
1630        assert!(filter_symbols.symbols_mode);
1631
1632        // Test that kind implies symbols_mode (handled in CLI layer)
1633        let filter_with_kind = QueryFilter {
1634            kind: Some(SymbolKind::Function),
1635            symbols_mode: true,
1636            ..Default::default()
1637        };
1638        assert!(filter_with_kind.symbols_mode);
1639    }
1640
1641    // ==================== Search Mode Tests ====================
1642
1643    #[test]
1644    fn test_fulltext_search() {
1645        let temp = TempDir::new().unwrap();
1646        let project = temp.path().join("project");
1647        fs::create_dir(&project).unwrap();
1648
1649        // Create test files
1650        fs::write(project.join("main.rs"), "fn main() {\n    println!(\"hello\");\n}").unwrap();
1651        fs::write(project.join("lib.rs"), "pub fn hello() {}").unwrap();
1652
1653        // Index the project
1654        let cache = CacheManager::new(&project);
1655        let indexer = Indexer::new(cache, IndexConfig::default());
1656        indexer.index(&project, false).unwrap();
1657
1658        // Search for "hello"
1659        let cache = CacheManager::new(&project);
1660        let engine = QueryEngine::new(cache);
1661        let filter = QueryFilter::default(); // full-text mode
1662        let results = engine.search("hello", filter).unwrap();
1663
1664        // Should find both occurrences (println and function name)
1665        assert!(results.len() >= 2);
1666        assert!(results.iter().any(|r| r.path.contains("main.rs")));
1667        assert!(results.iter().any(|r| r.path.contains("lib.rs")));
1668    }
1669
1670    #[test]
1671    fn test_symbol_search() {
1672        let temp = TempDir::new().unwrap();
1673        let project = temp.path().join("project");
1674        fs::create_dir(&project).unwrap();
1675
1676        // Create test file with function definition and call
1677        fs::write(
1678            project.join("main.rs"),
1679            "fn greet() {}\nfn main() {\n    greet();\n}"
1680        ).unwrap();
1681
1682        // Index
1683        let cache = CacheManager::new(&project);
1684        let indexer = Indexer::new(cache, IndexConfig::default());
1685        indexer.index(&project, false).unwrap();
1686
1687        let cache = CacheManager::new(&project);
1688
1689        // Symbol search (definitions only)
1690        let engine = QueryEngine::new(cache);
1691        let filter = QueryFilter {
1692            symbols_mode: true,
1693            ..Default::default()
1694        };
1695        let results = engine.search("greet", filter).unwrap();
1696
1697        // Should find only the definition, not the call
1698        assert!(results.len() >= 1);
1699        assert!(results.iter().any(|r| r.kind == SymbolKind::Function));
1700    }
1701
1702    #[test]
1703    fn test_regex_search() {
1704        let temp = TempDir::new().unwrap();
1705        let project = temp.path().join("project");
1706        fs::create_dir(&project).unwrap();
1707
1708        fs::write(
1709            project.join("main.rs"),
1710            "fn test1() {}\nfn test2() {}\nfn other() {}"
1711        ).unwrap();
1712
1713        let cache = CacheManager::new(&project);
1714        let indexer = Indexer::new(cache, IndexConfig::default());
1715        indexer.index(&project, false).unwrap();
1716
1717        let cache = CacheManager::new(&project);
1718
1719        let engine = QueryEngine::new(cache);
1720        let filter = QueryFilter {
1721            use_regex: true,
1722            ..Default::default()
1723        };
1724        let results = engine.search(r"fn test\d", filter).unwrap();
1725
1726        // Should match test1 and test2 but not other
1727        assert_eq!(results.len(), 2);
1728        assert!(results.iter().all(|r| r.preview.contains("test")));
1729    }
1730
1731    // ==================== Filter Tests ====================
1732
1733    #[test]
1734    fn test_language_filter() {
1735        let temp = TempDir::new().unwrap();
1736        let project = temp.path().join("project");
1737        fs::create_dir(&project).unwrap();
1738
1739        fs::write(project.join("main.rs"), "fn main() {}").unwrap();
1740        fs::write(project.join("main.js"), "function main() {}").unwrap();
1741
1742        let cache = CacheManager::new(&project);
1743        let indexer = Indexer::new(cache, IndexConfig::default());
1744        indexer.index(&project, false).unwrap();
1745
1746        let cache = CacheManager::new(&project);
1747
1748        let engine = QueryEngine::new(cache);
1749
1750        // Filter to Rust only
1751        let filter = QueryFilter {
1752            language: Some(Language::Rust),
1753            ..Default::default()
1754        };
1755        let results = engine.search("main", filter).unwrap();
1756
1757        assert!(results.iter().all(|r| r.lang == Language::Rust));
1758        assert!(results.iter().all(|r| r.path.ends_with(".rs")));
1759    }
1760
1761    #[test]
1762    fn test_kind_filter() {
1763        let temp = TempDir::new().unwrap();
1764        let project = temp.path().join("project");
1765        fs::create_dir(&project).unwrap();
1766
1767        fs::write(
1768            project.join("main.rs"),
1769            "struct Point {}\nfn main() {}\nimpl Point { fn new() {} }"
1770        ).unwrap();
1771
1772        let cache = CacheManager::new(&project);
1773        let indexer = Indexer::new(cache, IndexConfig::default());
1774        indexer.index(&project, false).unwrap();
1775
1776        let cache = CacheManager::new(&project);
1777
1778        let engine = QueryEngine::new(cache);
1779
1780        // Filter to functions only (includes methods)
1781        let filter = QueryFilter {
1782            symbols_mode: true,
1783            kind: Some(SymbolKind::Function),
1784            use_contains: true,  // "mai" is substring of "main"
1785            ..Default::default()
1786        };
1787        // Search for "mai" which should match "main" (tri gram pattern will def be in index)
1788        let results = engine.search("mai", filter).unwrap();
1789
1790        // Should find main function
1791        assert!(results.len() > 0, "Should find at least one result");
1792        assert!(results.iter().any(|r| r.symbol.as_deref() == Some("main")), "Should find 'main' function");
1793    }
1794
1795    #[test]
1796    fn test_file_pattern_filter() {
1797        let temp = TempDir::new().unwrap();
1798        let project = temp.path().join("project");
1799        fs::create_dir_all(project.join("src")).unwrap();
1800        fs::create_dir_all(project.join("tests")).unwrap();
1801
1802        fs::write(project.join("src/lib.rs"), "fn foo() {}").unwrap();
1803        fs::write(project.join("tests/test.rs"), "fn foo() {}").unwrap();
1804
1805        let cache = CacheManager::new(&project);
1806        let indexer = Indexer::new(cache, IndexConfig::default());
1807        indexer.index(&project, false).unwrap();
1808
1809        let cache = CacheManager::new(&project);
1810
1811        let engine = QueryEngine::new(cache);
1812
1813        // Filter to src/ only
1814        let filter = QueryFilter {
1815            file_pattern: Some("src/".to_string()),
1816            ..Default::default()
1817        };
1818        let results = engine.search("foo", filter).unwrap();
1819
1820        assert!(results.iter().all(|r| r.path.contains("src/")));
1821        assert!(!results.iter().any(|r| r.path.contains("tests/")));
1822    }
1823
1824    #[test]
1825    fn test_limit_filter() {
1826        let temp = TempDir::new().unwrap();
1827        let project = temp.path().join("project");
1828        fs::create_dir(&project).unwrap();
1829
1830        // Create file with many matches
1831        let content = (0..20).map(|i| format!("fn test{}() {{}}", i)).collect::<Vec<_>>().join("\n");
1832        fs::write(project.join("main.rs"), content).unwrap();
1833
1834        let cache = CacheManager::new(&project);
1835        let indexer = Indexer::new(cache, IndexConfig::default());
1836        indexer.index(&project, false).unwrap();
1837
1838        let cache = CacheManager::new(&project);
1839
1840        let engine = QueryEngine::new(cache);
1841
1842        // Limit to 5 results
1843        let filter = QueryFilter {
1844            limit: Some(5),
1845            use_contains: true,  // "test" is substring of "test0", "test1", etc.
1846            ..Default::default()
1847        };
1848        let results = engine.search("test", filter).unwrap();
1849
1850        assert_eq!(results.len(), 5);
1851    }
1852
1853    #[test]
1854    fn test_exact_match_filter() {
1855        let temp = TempDir::new().unwrap();
1856        let project = temp.path().join("project");
1857        fs::create_dir(&project).unwrap();
1858
1859        fs::write(
1860            project.join("main.rs"),
1861            "fn test() {}\nfn test_helper() {}\nfn other_test() {}"
1862        ).unwrap();
1863
1864        let cache = CacheManager::new(&project);
1865        let indexer = Indexer::new(cache, IndexConfig::default());
1866        indexer.index(&project, false).unwrap();
1867
1868        let cache = CacheManager::new(&project);
1869
1870        let engine = QueryEngine::new(cache);
1871
1872        // Exact match for "test"
1873        let filter = QueryFilter {
1874            symbols_mode: true,
1875            exact: true,
1876            ..Default::default()
1877        };
1878        let results = engine.search("test", filter).unwrap();
1879
1880        // Should only match exactly "test", not "test_helper" or "other_test"
1881        assert_eq!(results.len(), 1);
1882        assert_eq!(results[0].symbol.as_deref(), Some("test"));
1883    }
1884
1885    // ==================== Expand Mode Tests ====================
1886
1887    #[test]
1888    fn test_expand_mode() {
1889        let temp = TempDir::new().unwrap();
1890        let project = temp.path().join("project");
1891        fs::create_dir(&project).unwrap();
1892
1893        fs::write(
1894            project.join("main.rs"),
1895            "fn greet() {\n    println!(\"Hello\");\n    println!(\"World\");\n}"
1896        ).unwrap();
1897
1898        let cache = CacheManager::new(&project);
1899        let indexer = Indexer::new(cache, IndexConfig::default());
1900        indexer.index(&project, false).unwrap();
1901
1902        let cache = CacheManager::new(&project);
1903
1904        let engine = QueryEngine::new(cache);
1905
1906        // Search with expand mode
1907        let filter = QueryFilter {
1908            symbols_mode: true,
1909            expand: true,
1910            ..Default::default()
1911        };
1912        let results = engine.search("greet", filter).unwrap();
1913
1914        // Should have full function body in preview
1915        assert!(results.len() >= 1);
1916        let result = &results[0];
1917        assert!(result.preview.contains("println"));
1918    }
1919
1920    // ==================== Edge Cases ====================
1921
1922    #[test]
1923    fn test_search_empty_index() {
1924        let temp = TempDir::new().unwrap();
1925        let project = temp.path().join("project");
1926        fs::create_dir(&project).unwrap();
1927
1928        let cache = CacheManager::new(&project);
1929        let indexer = Indexer::new(cache, IndexConfig::default());
1930        indexer.index(&project, false).unwrap();
1931
1932        let cache = CacheManager::new(&project);
1933
1934        let engine = QueryEngine::new(cache);
1935        let filter = QueryFilter::default();
1936        let results = engine.search("nonexistent", filter).unwrap();
1937
1938        assert_eq!(results.len(), 0);
1939    }
1940
1941    #[test]
1942    fn test_search_no_index() {
1943        let temp = TempDir::new().unwrap();
1944        let project = temp.path().join("project");
1945        fs::create_dir(&project).unwrap();
1946
1947        let cache = CacheManager::new(&project);
1948        let engine = QueryEngine::new(cache);
1949        let filter = QueryFilter::default();
1950
1951        // Should fail when index doesn't exist
1952        assert!(engine.search("test", filter).is_err());
1953    }
1954
1955    #[test]
1956    fn test_search_special_characters() {
1957        let temp = TempDir::new().unwrap();
1958        let project = temp.path().join("project");
1959        fs::create_dir(&project).unwrap();
1960
1961        fs::write(project.join("main.rs"), "let x = 42;\nlet y = x + 1;").unwrap();
1962
1963        let cache = CacheManager::new(&project);
1964        let indexer = Indexer::new(cache, IndexConfig::default());
1965        indexer.index(&project, false).unwrap();
1966
1967        let cache = CacheManager::new(&project);
1968
1969        let engine = QueryEngine::new(cache);
1970        let filter = QueryFilter::default();
1971
1972        // Search for special characters
1973        let results = engine.search("x + ", filter).unwrap();
1974        assert!(results.len() >= 1);
1975    }
1976
1977    #[test]
1978    fn test_search_unicode() {
1979        let temp = TempDir::new().unwrap();
1980        let project = temp.path().join("project");
1981        fs::create_dir(&project).unwrap();
1982
1983        fs::write(project.join("main.rs"), "// 你好世界\nfn main() {}").unwrap();
1984
1985        let cache = CacheManager::new(&project);
1986        let indexer = Indexer::new(cache, IndexConfig::default());
1987        indexer.index(&project, false).unwrap();
1988
1989        let cache = CacheManager::new(&project);
1990
1991        let engine = QueryEngine::new(cache);
1992        let filter = QueryFilter {
1993            use_contains: true,  // Unicode word boundaries may not work as expected
1994            ..Default::default()
1995        };
1996
1997        // Search for unicode characters
1998        let results = engine.search("你好", filter).unwrap();
1999        assert!(results.len() >= 1);
2000    }
2001
2002    #[test]
2003    fn test_case_sensitive_search() {
2004        let temp = TempDir::new().unwrap();
2005        let project = temp.path().join("project");
2006        fs::create_dir(&project).unwrap();
2007
2008        fs::write(project.join("main.rs"), "fn Test() {}\nfn test() {}").unwrap();
2009
2010        let cache = CacheManager::new(&project);
2011        let indexer = Indexer::new(cache, IndexConfig::default());
2012        indexer.index(&project, false).unwrap();
2013
2014        let cache = CacheManager::new(&project);
2015
2016        let engine = QueryEngine::new(cache);
2017        let filter = QueryFilter::default();
2018
2019        // Search is case-sensitive
2020        let results = engine.search("Test", filter).unwrap();
2021        assert!(results.iter().any(|r| r.preview.contains("Test()")));
2022    }
2023
2024    // ==================== Determinism Tests ====================
2025
2026    #[test]
2027    fn test_results_sorted_deterministically() {
2028        let temp = TempDir::new().unwrap();
2029        let project = temp.path().join("project");
2030        fs::create_dir(&project).unwrap();
2031
2032        fs::write(project.join("a.rs"), "fn test() {}").unwrap();
2033        fs::write(project.join("z.rs"), "fn test() {}").unwrap();
2034        fs::write(project.join("m.rs"), "fn test() {}\nfn test2() {}").unwrap();
2035
2036        let cache = CacheManager::new(&project);
2037        let indexer = Indexer::new(cache, IndexConfig::default());
2038        indexer.index(&project, false).unwrap();
2039
2040        let cache = CacheManager::new(&project);
2041
2042        let engine = QueryEngine::new(cache);
2043        let filter = QueryFilter::default();
2044
2045        // Run search multiple times
2046        let results1 = engine.search("test", filter.clone()).unwrap();
2047        let results2 = engine.search("test", filter.clone()).unwrap();
2048        let results3 = engine.search("test", filter).unwrap();
2049
2050        // Results should be identical and sorted by path then line
2051        assert_eq!(results1.len(), results2.len());
2052        assert_eq!(results1.len(), results3.len());
2053
2054        for i in 0..results1.len() {
2055            assert_eq!(results1[i].path, results2[i].path);
2056            assert_eq!(results1[i].path, results3[i].path);
2057            assert_eq!(results1[i].span.start_line, results2[i].span.start_line);
2058            assert_eq!(results1[i].span.start_line, results3[i].span.start_line);
2059        }
2060
2061        // Verify sorting (path ascending, then line ascending)
2062        for i in 0..results1.len().saturating_sub(1) {
2063            let curr = &results1[i];
2064            let next = &results1[i + 1];
2065            assert!(
2066                curr.path < next.path ||
2067                (curr.path == next.path && curr.span.start_line <= next.span.start_line)
2068            );
2069        }
2070    }
2071
2072    // ==================== Combined Filter Tests ====================
2073
2074    #[test]
2075    fn test_multiple_filters_combined() {
2076        let temp = TempDir::new().unwrap();
2077        let project = temp.path().join("project");
2078        fs::create_dir_all(project.join("src")).unwrap();
2079
2080        fs::write(project.join("src/main.rs"), "fn test() {}\nstruct Test {}").unwrap();
2081        fs::write(project.join("src/lib.rs"), "fn test() {}").unwrap();
2082        fs::write(project.join("test.js"), "function test() {}").unwrap();
2083
2084        let cache = CacheManager::new(&project);
2085        let indexer = Indexer::new(cache, IndexConfig::default());
2086        indexer.index(&project, false).unwrap();
2087
2088        let cache = CacheManager::new(&project);
2089
2090        let engine = QueryEngine::new(cache);
2091
2092        // Combine language, kind, and file pattern filters
2093        let filter = QueryFilter {
2094            language: Some(Language::Rust),
2095            kind: Some(SymbolKind::Function),
2096            file_pattern: Some("src/main".to_string()),
2097            symbols_mode: true,
2098            ..Default::default()
2099        };
2100        let results = engine.search("test", filter).unwrap();
2101
2102        // Should only find the function in src/main.rs
2103        assert_eq!(results.len(), 1);
2104        assert!(results[0].path.contains("src/main.rs"));
2105        assert_eq!(results[0].kind, SymbolKind::Function);
2106    }
2107
2108    // ==================== Helper Method Tests ====================
2109
2110    #[test]
2111    fn test_find_symbol_helper() {
2112        let temp = TempDir::new().unwrap();
2113        let project = temp.path().join("project");
2114        fs::create_dir(&project).unwrap();
2115
2116        fs::write(project.join("main.rs"), "fn greet() {}").unwrap();
2117
2118        let cache = CacheManager::new(&project);
2119        let indexer = Indexer::new(cache, IndexConfig::default());
2120        indexer.index(&project, false).unwrap();
2121
2122        let cache = CacheManager::new(&project);
2123
2124        let engine = QueryEngine::new(cache);
2125        let results = engine.find_symbol("greet").unwrap();
2126
2127        assert!(results.len() >= 1);
2128        assert_eq!(results[0].kind, SymbolKind::Function);
2129    }
2130
2131    #[test]
2132    fn test_list_by_kind_helper() {
2133        let temp = TempDir::new().unwrap();
2134        let project = temp.path().join("project");
2135        fs::create_dir(&project).unwrap();
2136
2137        fs::write(
2138            project.join("main.rs"),
2139            "struct Point {}\nfn test() {}\nstruct Line {}"
2140        ).unwrap();
2141
2142        let cache = CacheManager::new(&project);
2143        let indexer = Indexer::new(cache, IndexConfig::default());
2144        indexer.index(&project, false).unwrap();
2145
2146        let cache = CacheManager::new(&project);
2147
2148        let engine = QueryEngine::new(cache);
2149
2150        // Search for structs that contain "oin" (Point contains it, Line doesn't)
2151        let filter = QueryFilter {
2152            kind: Some(SymbolKind::Struct),
2153            symbols_mode: true,
2154            use_contains: true,  // "oin" is substring of "Point"
2155            ..Default::default()
2156        };
2157        let results = engine.search("oin", filter).unwrap();
2158
2159        // Should find Point struct
2160        assert!(results.len() >= 1, "Should find at least Point struct");
2161        assert!(results.iter().all(|r| r.kind == SymbolKind::Struct));
2162        assert!(results.iter().any(|r| r.symbol.as_deref() == Some("Point")));
2163    }
2164
2165    // ==================== Metadata Tests ====================
2166
2167    #[test]
2168    fn test_search_with_metadata() {
2169        let temp = TempDir::new().unwrap();
2170        let project = temp.path().join("project");
2171        fs::create_dir(&project).unwrap();
2172
2173        fs::write(project.join("main.rs"), "fn test() {}").unwrap();
2174
2175        let cache = CacheManager::new(&project);
2176        let indexer = Indexer::new(cache, IndexConfig::default());
2177        indexer.index(&project, false).unwrap();
2178
2179        let cache = CacheManager::new(&project);
2180
2181        let engine = QueryEngine::new(cache);
2182        let filter = QueryFilter::default();
2183        let response = engine.search_with_metadata("test", filter).unwrap();
2184
2185        // Check metadata is present (status might be stale if run inside git repo)
2186        assert!(response.results.len() >= 1);
2187        // Note: can_trust_results may be false if running in a git repo without branch index
2188    }
2189
2190    // ==================== Multi-language Tests ====================
2191
2192    #[test]
2193    fn test_search_across_languages() {
2194        let temp = TempDir::new().unwrap();
2195        let project = temp.path().join("project");
2196        fs::create_dir(&project).unwrap();
2197
2198        fs::write(project.join("main.rs"), "fn greet() {}").unwrap();
2199        fs::write(project.join("main.ts"), "function greet() {}").unwrap();
2200        fs::write(project.join("main.py"), "def greet(): pass").unwrap();
2201
2202        let cache = CacheManager::new(&project);
2203        let indexer = Indexer::new(cache, IndexConfig::default());
2204        indexer.index(&project, false).unwrap();
2205
2206        let cache = CacheManager::new(&project);
2207
2208        let engine = QueryEngine::new(cache);
2209        let filter = QueryFilter::default();
2210        let results = engine.search("greet", filter).unwrap();
2211
2212        // Should find greet in all three languages
2213        assert!(results.len() >= 3);
2214        assert!(results.iter().any(|r| r.lang == Language::Rust));
2215        assert!(results.iter().any(|r| r.lang == Language::TypeScript));
2216        assert!(results.iter().any(|r| r.lang == Language::Python));
2217    }
2218}