Skip to main content

reflex/query/
mod.rs

1//! Query engine for searching indexed code
2//!
3//! The query engine loads the memory-mapped cache and executes
4//! deterministic searches based on lexical, structural, or symbol patterns.
5
6pub mod filter;
7pub mod result;
8
9pub use filter::QueryFilter;
10
11use anyhow::{Context, Result};
12use regex::Regex;
13
14use crate::cache::CacheManager;
15use crate::content_store::ContentReader;
16use crate::models::{
17    IndexStatus, IndexWarning, IndexWarningDetails, Language, QueryResponse, SearchResult, Span,
18    SymbolKind,
19};
20use crate::output;
21use crate::parsers::ParserFactory;
22use crate::regex_trigrams::extract_trigrams_from_regex;
23use crate::trigram::TrigramIndex;
24
25/// Manages query execution against the index
26pub struct QueryEngine {
27    cache: CacheManager,
28}
29
30impl QueryEngine {
31    /// Create a new query engine with the given cache manager
32    pub fn new(cache: CacheManager) -> Self {
33        Self { cache }
34    }
35
36    /// Load dependencies for search results if requested (legacy - per result)
37    /// Deprecated: Use group_and_load_dependencies for file-level grouping
38    fn load_dependencies(&self, results: &mut [SearchResult], include_deps: bool) -> Result<()> {
39        if !include_deps || results.is_empty() {
40            return Ok(());
41        }
42
43        log::debug!("Loading dependencies for {} results", results.len());
44
45        // Create dependency index
46        // Note: We need to pass the workspace root, not the cache directory
47        // The cache path is .reflex/, so its parent is the workspace root (.)
48        let workspace_root = self
49            .cache
50            .path()
51            .parent()
52            .ok_or_else(|| anyhow::anyhow!("Cache path has no parent"))?;
53        let cache_for_deps = CacheManager::new(workspace_root);
54        let dep_index = crate::dependency::DependencyIndex::new(cache_for_deps);
55
56        // Load dependencies for each result
57        for result in results {
58            // Normalize path: strip leading "./" if present
59            let normalized_path = result.path.strip_prefix("./").unwrap_or(&result.path);
60
61            // Get file_id from database by path
62            match self.cache.get_file_id(normalized_path) {
63                Ok(Some(file_id)) => {
64                    log::debug!("Found file_id={} for path={}", file_id, result.path);
65                    // Get dependencies for this file
66                    match dep_index.get_dependencies_info(file_id) {
67                        Ok(dep_infos) => {
68                            log::debug!(
69                                "Loaded {} dependencies for file_id={}",
70                                dep_infos.len(),
71                                file_id
72                            );
73                            if !dep_infos.is_empty() {
74                                result.dependencies = Some(dep_infos);
75                            }
76                        }
77                        Err(e) => {
78                            log::warn!("Failed to get dependencies for file_id={}: {}", file_id, e);
79                        }
80                    }
81                }
82                Ok(None) => {
83                    log::warn!("No file_id found for path: {}", result.path);
84                }
85                Err(e) => {
86                    log::warn!("Failed to get file_id for path {}: {}", result.path, e);
87                }
88            }
89        }
90
91        Ok(())
92    }
93
94    /// Group search results by file and load dependencies at file level
95    /// Returns file-grouped results with dependencies populated once per file
96    fn group_and_load_dependencies(
97        &self,
98        results: Vec<SearchResult>,
99        include_deps: bool,
100        context_lines: usize,
101    ) -> Result<Vec<crate::models::FileGroupedResult>> {
102        use crate::models::{FileGroupedResult, MatchResult};
103        use std::collections::HashMap;
104
105        if results.is_empty() {
106            return Ok(Vec::new());
107        }
108
109        // Group results by file path (preserving language from first match)
110        let mut grouped: HashMap<String, Vec<SearchResult>> = HashMap::new();
111        for result in results {
112            grouped.entry(result.path.clone()).or_default().push(result);
113        }
114
115        // Create dependency index if needed
116        let dep_index = if include_deps {
117            let workspace_root = self
118                .cache
119                .path()
120                .parent()
121                .ok_or_else(|| anyhow::anyhow!("Cache path has no parent"))?;
122            let cache_for_deps = CacheManager::new(workspace_root);
123            Some(crate::dependency::DependencyIndex::new(cache_for_deps))
124        } else {
125            None
126        };
127
128        // Load ContentReader for extracting context lines
129        let content_path = self.cache.path().join("content.bin");
130        let content_reader_opt = ContentReader::open(&content_path).ok();
131
132        // Convert to FileGroupedResult and load dependencies
133        let mut file_results: Vec<FileGroupedResult> = grouped
134            .into_iter()
135            .map(|(path, file_matches)| {
136                // Capture language from first match (all matches in a file share the same language)
137                let language = file_matches.first().map(|r| r.lang).unwrap_or_default();
138
139                // Load dependencies for this file (once per file, not per result)
140                let dependencies = if let Some(dep_idx) = &dep_index {
141                    let normalized_path = path.strip_prefix("./").unwrap_or(&path);
142                    match self.cache.get_file_id(normalized_path) {
143                        Ok(Some(file_id)) => match dep_idx.get_dependencies_info(file_id) {
144                            Ok(dep_infos) if !dep_infos.is_empty() => {
145                                log::debug!(
146                                    "Loaded {} dependencies for file: {}",
147                                    dep_infos.len(),
148                                    path
149                                );
150                                Some(dep_infos)
151                            }
152                            Ok(_) => None,
153                            Err(e) => {
154                                log::warn!("Failed to get dependencies for {}: {}", path, e);
155                                None
156                            }
157                        },
158                        Ok(None) => {
159                            log::warn!("No file_id found for path: {}", path);
160                            None
161                        }
162                        Err(e) => {
163                            log::warn!("Failed to get file_id for path {}: {}", path, e);
164                            None
165                        }
166                    }
167                } else {
168                    None
169                };
170
171                // Get file_id for context extraction
172                // Note: We use ContentReader's get_file_id_by_path() which returns array indices,
173                // not database file_ids (which are AUTO INCREMENT values)
174                let normalized_path = path.strip_prefix("./").unwrap_or(&path);
175                let file_id_for_context = if let Some(reader) = &content_reader_opt {
176                    reader.get_file_id_by_path(normalized_path)
177                } else {
178                    None
179                };
180                log::debug!(
181                    "Context extraction: file={}, file_id={:?}, content_reader={}",
182                    path,
183                    file_id_for_context,
184                    content_reader_opt.is_some()
185                );
186
187                // Convert SearchResults to MatchResults (strip path and dependencies) and extract context
188                let matches: Vec<MatchResult> = file_matches
189                    .into_iter()
190                    .map(|r| {
191                        // Extract context lines if requested (0 = disabled)
192                        let (context_before, context_after) = if context_lines > 0 {
193                            if let (Some(reader), Some(fid)) =
194                                (&content_reader_opt, file_id_for_context)
195                            {
196                                let result = reader
197                                    .get_context_by_line(
198                                        fid as u32,
199                                        r.span.start_line,
200                                        context_lines,
201                                    )
202                                    .unwrap_or_else(|e| {
203                                        log::warn!(
204                                            "Failed to extract context for {}:{}: {}",
205                                            path,
206                                            r.span.start_line,
207                                            e
208                                        );
209                                        (vec![], vec![])
210                                    });
211                                log::debug!(
212                                    "Extracted context for {}:{} - before: {}, after: {}",
213                                    path,
214                                    r.span.start_line,
215                                    result.0.len(),
216                                    result.1.len()
217                                );
218                                result
219                            } else {
220                                if content_reader_opt.is_none() {
221                                    log::debug!(
222                                        "No ContentReader available for context extraction"
223                                    );
224                                }
225                                if file_id_for_context.is_none() {
226                                    log::debug!("No file_id found for {}", path);
227                                }
228                                (vec![], vec![])
229                            }
230                        } else {
231                            (vec![], vec![])
232                        };
233
234                        MatchResult {
235                            kind: r.kind,
236                            symbol: r.symbol,
237                            span: r.span,
238                            preview: r.preview,
239                            context_before,
240                            context_after,
241                        }
242                    })
243                    .collect();
244
245                FileGroupedResult {
246                    path,
247                    language,
248                    dependencies,
249                    matches,
250                }
251            })
252            .collect();
253
254        // Sort by path for deterministic output
255        file_results.sort_by(|a, b| a.path.cmp(&b.path));
256
257        Ok(file_results)
258    }
259
260    /// Execute a query and return matching results with index metadata
261    ///
262    /// This is the preferred method for programmatic/JSON output as it includes
263    /// index freshness information that AI agents can use to decide whether to re-index.
264    pub fn search_with_metadata(
265        &self,
266        pattern: &str,
267        filter: QueryFilter,
268    ) -> Result<QueryResponse> {
269        log::info!(
270            "Executing query with metadata: pattern='{}', filter={:?}",
271            pattern,
272            filter
273        );
274
275        // Ensure cache exists
276        if !self.cache.exists() {
277            anyhow::bail!("Index not found. Run 'rfx index' to build the cache first.");
278        }
279
280        // Validate cache integrity
281        if let Err(e) = self.cache.validate() {
282            anyhow::bail!(
283                "Cache appears to be corrupted: {}. Run 'rfx clear' followed by 'rfx index' to rebuild.",
284                e
285            );
286        }
287
288        // Get index status and warning (without printing warnings to stderr)
289        let (status, can_trust_results, warning) = self.get_index_status()?;
290
291        // Execute the search
292        let (results, total) = self.search_internal(pattern, filter.clone())?;
293
294        // Build pagination metadata
295        use crate::models::PaginationInfo;
296        let pagination = PaginationInfo {
297            total,
298            count: results.len(),
299            offset: filter.offset.unwrap_or(0),
300            limit: filter.limit,
301            has_more: total > filter.offset.unwrap_or(0) + results.len(),
302        };
303
304        // Always use grouped format (group results by file)
305        // Dependencies are loaded only when include_dependencies is true
306        let grouped_results = self.group_and_load_dependencies(
307            results,
308            filter.include_dependencies,
309            filter.context_lines,
310        )?;
311
312        Ok(QueryResponse {
313            ai_instruction: None, // AI instruction is generated by CLI/MCP layer, not here
314            status,
315            can_trust_results,
316            warning,
317            pagination,
318            results: grouped_results,
319        })
320    }
321
322    /// Execute a query and return matching results (legacy method)
323    ///
324    /// This method prints warnings to stderr and returns just the results.
325    /// For programmatic use, prefer `search_with_metadata()`.
326    pub fn search(&self, pattern: &str, filter: QueryFilter) -> Result<Vec<SearchResult>> {
327        log::info!(
328            "Executing query: pattern='{}', filter={:?}",
329            pattern,
330            filter
331        );
332
333        // Ensure cache exists
334        if !self.cache.exists() {
335            anyhow::bail!("Index not found. Run 'rfx index' to build the cache first.");
336        }
337
338        // Validate cache integrity
339        if let Err(e) = self.cache.validate() {
340            anyhow::bail!(
341                "Cache appears to be corrupted: {}. Run 'rfx clear' followed by 'rfx index' to rebuild.",
342                e
343            );
344        }
345
346        // Show non-blocking warnings about branch state and staleness
347        self.check_index_freshness(&filter)?;
348
349        // Execute the search (discard total count - legacy method doesn't use it)
350        let (mut results, _total_count) = self.search_internal(pattern, filter.clone())?;
351
352        // Load dependencies if requested
353        self.load_dependencies(&mut results, filter.include_dependencies)?;
354
355        Ok(results)
356    }
357
358    /// Internal search implementation (used by both search methods)
359    /// Returns (results, total_count) where total_count is the count before offset/limit
360    fn search_internal(
361        &self,
362        pattern: &str,
363        filter: QueryFilter,
364    ) -> Result<(Vec<SearchResult>, usize)> {
365        use std::time::{Duration, Instant};
366
367        // Start timeout timer if configured
368        let start_time = Instant::now();
369        let timeout = if filter.timeout_secs > 0 {
370            Some(Duration::from_secs(filter.timeout_secs))
371        } else {
372            None
373        };
374
375        // KEYWORD DETECTION (early): Check if this is a keyword query that should scan ALL files
376        // When a user searches for a language keyword (like "class", "function") with --symbols or --kind,
377        // we interpret it as "list all symbols of that type" and should scan ALL files,
378        // not just the first 100 candidates from trigram search.
379        //
380        // Requirements for keyword query mode:
381        // 1. Symbol mode active (--symbols or --kind)
382        // 2. Pattern matches a keyword in ANY supported language
383        //
384        // Note: --lang is optional. If specified, language filtering happens naturally in Phase 2/3.
385        // Empty pattern in symbol mode means "list all symbols of the requested kind" —
386        // treat it like a keyword query so we scan all files instead of failing the
387        // broad-query guard or returning zero trigram matches.
388        let is_keyword_query = if filter.symbols_mode || filter.kind.is_some() {
389            pattern.is_empty() || ParserFactory::get_all_keywords().contains(&pattern)
390        } else {
391            false
392        };
393
394        // KEYWORD-TO-KIND MAPPING: If user searches for a keyword without --kind, infer the kind
395        // Example: "class" → SymbolKind::Class, "function" → SymbolKind::Function
396        // This ensures keyword queries return only the relevant symbol type
397        let mut filter = filter.clone(); // Clone so we can modify it
398        if is_keyword_query && filter.kind.is_none() {
399            if let Some(inferred_kind) = Self::keyword_to_kind(pattern) {
400                log::info!(
401                    "Keyword '{}' mapped to kind {:?} (auto-inferred)",
402                    pattern,
403                    inferred_kind
404                );
405                filter.kind = Some(inferred_kind);
406            }
407        }
408
409        // EARLY BROAD QUERY DETECTION (Index Size Check)
410        // This check happens BEFORE the expensive trigram search to prevent hangs on large indexes
411        // For very large codebases (like Linux kernel with 62K files), even valid 3-char trigrams
412        // like "get" can take 10-30+ seconds to search. This early check prevents that hang.
413        //
414        // Criteria for early blocking:
415        // 1. Large index (> 20,000 files) AND
416        // 2. Short pattern (< 4 chars) AND
417        // 3. Not using regex (regex has its own trigram extraction) AND
418        // 4. Not a keyword query (keywords are intentionally broad) AND
419        // 5. Not forced by --force flag
420        if !filter.force && !filter.use_regex && !is_keyword_query {
421            let stats = self.cache.stats()?;
422            let total_files = stats.total_files;
423            let pattern_len = pattern.chars().count();
424
425            // Thresholds for early blocking:
426            // - Large index: 20,000+ files (approximately where performance degrades significantly)
427            // - Short pattern: < 4 chars (3-char trigrams are borderline, < 4 catches edge cases)
428            // Test overrides allow reducing thresholds for integration tests without creating 20K+ files
429            let large_index_threshold = filter.test_large_index_threshold.unwrap_or(20_000);
430            let short_pattern_threshold = filter.test_short_pattern_threshold.unwrap_or(4);
431
432            if total_files > large_index_threshold && pattern_len < short_pattern_threshold {
433                anyhow::bail!(
434                    "Query too broad - would be expensive to execute on this large index\n\
435                     \n\
436                     This index contains {} files, and pattern '{}' ({} characters) is too short for efficient searching.\n\
437                     On large codebases, short patterns can take 10-30+ seconds to complete.\n\
438                     \n\
439                     This query could:\n\
440                     • Hang for an extended period before returning results\n\
441                     • Return thousands of results\n\
442                     • Flood LLM context windows with excessive data\n\
443                     • Fail entirely\n\
444                     \n\
445                     Suggestions to narrow the query:\n\
446                     • Use a longer, more specific pattern (4+ characters recommended for large indexes)\n\
447                     • Add a language filter: --lang <language>\n\
448                     • Add a file filter: --glob <pattern> or --file <path>\n\
449                     • Use --force to bypass this check if you really need all results\n\
450                     \n\
451                     To force execution anyway:\n\
452                     rfx query \"{}\" --force",
453                    total_files,
454                    pattern,
455                    pattern_len,
456                    pattern
457                );
458            }
459        }
460
461        // PHASE 1: Get initial candidates (choose search strategy)
462        let mut results = if is_keyword_query {
463            // KEYWORD QUERY MODE: Scan all files (or files of target language if --lang specified)
464            // This ensures we find ALL classes/functions/etc, not just those in the first 100 trigram matches
465            if let Some(lang) = filter.language {
466                log::info!(
467                    "Keyword query detected for '{}' - scanning all {:?} files (bypassing trigram search)",
468                    pattern,
469                    lang
470                );
471            } else {
472                log::info!(
473                    "Keyword query detected for '{}' - scanning all files (bypassing trigram search)",
474                    pattern
475                );
476            }
477            self.get_all_language_files(&filter)?
478        } else if filter.use_regex {
479            // Regex pattern search with trigram optimization
480            self.get_regex_candidates(
481                pattern,
482                timeout.as_ref(),
483                &start_time,
484                filter.suppress_output,
485            )?
486        } else {
487            // Standard trigram-based full-text search
488            self.get_trigram_candidates(pattern, &filter)?
489        };
490
491        // EARLY LANGUAGE FILTER: Apply language filtering BEFORE broad query check
492        // This ensures we only parse files matching the language filter in Phase 2
493        // Critical for non-keyword queries to work correctly with accurate candidate counts
494        //
495        // Skip for keyword queries - those candidates are already pre-filtered by language
496        if !is_keyword_query {
497            if let Some(lang) = filter.language {
498                let before_count = results.len();
499                results.retain(|r| r.lang == lang);
500                log::debug!(
501                    "Language filter ({:?}): reduced {} candidates to {} candidates",
502                    lang,
503                    before_count,
504                    results.len()
505                );
506            }
507        }
508
509        // EARLY GLOB PATTERN FILTER: Apply glob/exclude filtering BEFORE broad query check
510        // This ensures candidate count reflects actual files that will be parsed
511        // Critical for queries like: rfx query "index" --symbols --glob "src/**/*.rs"
512        if !filter.glob_patterns.is_empty() || !filter.exclude_patterns.is_empty() {
513            use globset::{Glob, GlobSetBuilder};
514
515            // Build include matcher (if patterns specified)
516            let include_matcher = if !filter.glob_patterns.is_empty() {
517                let mut builder = GlobSetBuilder::new();
518                for pattern in &filter.glob_patterns {
519                    // Normalize pattern to ensure LLM-generated patterns work correctly
520                    let normalized = Self::normalize_glob_pattern(pattern);
521                    match Glob::new(&normalized) {
522                        Ok(glob) => {
523                            builder.add(glob);
524                        }
525                        Err(e) => {
526                            log::warn!("Invalid glob pattern '{}': {}", pattern, e);
527                        }
528                    }
529                }
530                match builder.build() {
531                    Ok(matcher) => Some(matcher),
532                    Err(e) => {
533                        log::warn!("Failed to build glob matcher: {}", e);
534                        None
535                    }
536                }
537            } else {
538                None
539            };
540
541            // Build exclude matcher (if patterns specified)
542            let exclude_matcher = if !filter.exclude_patterns.is_empty() {
543                let mut builder = GlobSetBuilder::new();
544                for pattern in &filter.exclude_patterns {
545                    // Normalize pattern to ensure LLM-generated patterns work correctly
546                    let normalized = Self::normalize_glob_pattern(pattern);
547                    match Glob::new(&normalized) {
548                        Ok(glob) => {
549                            builder.add(glob);
550                        }
551                        Err(e) => {
552                            log::warn!("Invalid exclude pattern '{}': {}", pattern, e);
553                        }
554                    }
555                }
556                match builder.build() {
557                    Ok(matcher) => Some(matcher),
558                    Err(e) => {
559                        log::warn!("Failed to build exclude matcher: {}", e);
560                        None
561                    }
562                }
563            } else {
564                None
565            };
566
567            // Apply filters
568            let before_count = results.len();
569            results.retain(|r| {
570                // If include patterns specified, path must match at least one
571                let included = if let Some(ref matcher) = include_matcher {
572                    matcher.is_match(&r.path)
573                } else {
574                    true // No include patterns = include all
575                };
576
577                // If exclude patterns specified, path must NOT match any
578                let excluded = if let Some(ref matcher) = exclude_matcher {
579                    matcher.is_match(&r.path)
580                } else {
581                    false // No exclude patterns = exclude none
582                };
583
584                included && !excluded
585            });
586            log::debug!(
587                "Glob filter: reduced {} candidates to {} candidates",
588                before_count,
589                results.len()
590            );
591        }
592
593        // Check timeout after Phase 1
594        if let Some(timeout_duration) = timeout {
595            if start_time.elapsed() > timeout_duration {
596                anyhow::bail!(
597                    "Query timeout exceeded ({} seconds).\n\
598                     \n\
599                     The query took too long to complete. Try one of these approaches:\n\
600                     • Use a more specific search pattern (longer patterns = faster search)\n\
601                     • Add a language filter with --lang to narrow the search space\n\
602                     • Add a file filter with --file to search specific directories\n\
603                     • Increase the timeout with --timeout <seconds>\n\
604                     \n\
605                     Example: rfx query \"{}\" --lang rust --timeout 60",
606                    filter.timeout_secs,
607                    pattern
608                );
609            }
610        }
611
612        // BROAD QUERY DETECTION: Check if query is too expensive BEFORE parsing
613        // This protects LLM users from accidentally running expensive queries that flood context windows
614        if !filter.force {
615            let candidate_count = results.len();
616            let pattern_len = pattern.chars().count();
617
618            // Condition 1: Pattern too short (< 3 chars can't use trigram optimization efficiently)
619            // Exception: Allow short keyword queries (e.g., "fn", "if") since they scan all language files
620            let is_short_pattern = pattern_len < 3 && !filter.use_regex && !is_keyword_query;
621
622            // Condition 2: AST query without glob restriction on large codebases
623            // Allow on small codebases (< 100 files) but require glob for larger ones
624            let is_broad_ast =
625                filter.use_ast && filter.glob_patterns.is_empty() && candidate_count >= 100;
626
627            // Condition 3: Query-type-aware threshold for symbol/AST parsing
628            // Different thresholds based on actual performance characteristics:
629            // - AST without glob: 100 files (allow small codebases, block large ones)
630            // - AST with glob: 10,000 files (~5 seconds max)
631            // - Keyword queries: 20,000 files (~3 seconds max) - scan all files of language
632            // - Trigram-filtered symbols: 50,000 files (~5 seconds max) - very fast due to trigram filtering
633            let threshold = if filter.use_ast && filter.glob_patterns.is_empty() {
634                100 // AST without glob - allow small codebases
635            } else if filter.use_ast {
636                10_000 // AST with glob restriction
637            } else if is_keyword_query {
638                20_000 // Keyword queries (e.g., "class", "function")
639            } else {
640                50_000 // Trigram-filtered symbol queries
641            };
642
643            let has_many_candidates = candidate_count > threshold
644                && (filter.symbols_mode || filter.kind.is_some() || filter.use_ast);
645
646            if is_short_pattern || has_many_candidates || is_broad_ast {
647                let reason = if is_short_pattern {
648                    format!(
649                        "Pattern '{}' is too short ({} characters). Short patterns bypass trigram optimization and require scanning many files.",
650                        pattern, pattern_len
651                    )
652                } else if is_broad_ast {
653                    format!(
654                        "AST query without --glob restriction will scan the entire codebase ({} files). AST queries are SLOW (500ms-10s+).",
655                        candidate_count
656                    )
657                } else if is_keyword_query {
658                    format!(
659                        "Keyword query '{}' matched {} files. This query scans all files of the target language, which will take significant time and produce excessive results.",
660                        pattern, candidate_count
661                    )
662                } else {
663                    format!(
664                        "Query matched {} files. Parsing this many files with --symbols or --kind will take significant time and produce excessive results.",
665                        candidate_count
666                    )
667                };
668
669                let suggestions = if is_short_pattern {
670                    vec![
671                        "• Use a longer, more specific pattern (3+ characters recommended)",
672                        "• Add a language filter: --lang <language>",
673                        "• Add a file path filter: --file <path> or --glob <pattern>",
674                        "• Use --force to bypass this check if you really need all results",
675                    ]
676                } else if is_broad_ast {
677                    vec![
678                        "• Add --glob to restrict AST query to specific files: --glob 'src/**/*.rs'",
679                        "• Use --symbols instead (10-100x faster in 95% of cases)",
680                        "• Use --force to bypass this check if you need a full codebase scan",
681                    ]
682                } else if is_keyword_query {
683                    vec![
684                        "• Add a language filter to reduce files scanned: --lang <language>",
685                        "• Add glob patterns to search specific directories: --glob 'src/**/*.rs'",
686                        "• Add --kind to filter to specific symbol types: --kind function",
687                        "• Use a more specific pattern instead of a keyword",
688                        "• Use --force to bypass this check if you need all results",
689                    ]
690                } else {
691                    vec![
692                        "• Add a language filter to reduce candidate set: --lang <language>",
693                        "• Add glob patterns to search specific directories: --glob 'src/**/*.rs'",
694                        "• Use a more specific search pattern",
695                        "• Use --force to bypass this check if you need all results",
696                    ]
697                };
698
699                // Build the command snippet showing current flags
700                let mut cmd_flags = String::new();
701                if filter.symbols_mode {
702                    cmd_flags.push_str("--symbols ");
703                }
704                if let Some(ref lang) = filter.language {
705                    cmd_flags.push_str(&format!("--lang {:?} ", lang));
706                }
707                if let Some(ref kind) = filter.kind {
708                    cmd_flags.push_str(&format!("--kind {:?} ", kind));
709                }
710                if filter.use_ast {
711                    cmd_flags.push_str("--ast ");
712                }
713
714                anyhow::bail!(
715                    "Query too broad - would be expensive to execute\n\
716                     \n\
717                     {}\n\
718                     \n\
719                     This query could:\n\
720                     • Hang for an extended period before returning results\n\
721                     • Return thousands of results\n\
722                     • Flood LLM context windows with excessive data\n\
723                     • Fail entirely\n\
724                     \n\
725                     Suggestions to narrow the query:\n\
726                     {}\n\
727                     \n\
728                     To force execution anyway:\n\
729                     rfx query \"{}\" --force {}",
730                    reason,
731                    suggestions.join("\n             "),
732                    pattern,
733                    cmd_flags
734                );
735            }
736        }
737
738        // DETERMINISTIC SORTING: Sort candidates early for deterministic results
739        // This ensures results are always returned in the same order
740        if filter.symbols_mode || filter.kind.is_some() || filter.use_ast {
741            results.sort_by(|a, b| {
742                a.path
743                    .cmp(&b.path)
744                    .then_with(|| a.span.start_line.cmp(&b.span.start_line))
745            });
746
747            // Warn if many candidates need parsing (helps users refine queries)
748            let candidate_count = results.len();
749            if candidate_count > 1000 && !filter.suppress_output {
750                output::warn(&format!(
751                    "Pattern '{}' matched {} files - parsing may take some time. Consider using --file, --glob, or a more specific pattern to narrow the search.",
752                    pattern, candidate_count
753                ));
754            } else if candidate_count > 100 {
755                log::info!(
756                    "Parsing {} candidate files for symbol extraction",
757                    candidate_count
758                );
759            }
760        }
761
762        // PHASE 2: Enrich with symbol information or AST pattern matching (if needed)
763        if filter.use_ast {
764            // AST pattern matching: Execute Tree-sitter query on candidate files
765            results = self.enrich_with_ast(results, pattern, filter.language)?;
766        } else if filter.symbols_mode || filter.kind.is_some() {
767            // Symbol enrichment: Parse candidate files and extract symbol definitions
768            results = self.enrich_with_symbols(results, pattern, &filter)?;
769        }
770
771        // PHASE 3: Apply post-enrichment filters
772        // Note: Language and glob filters are applied in Phase 1 (before broad query check)
773        // Only kind, file_pattern, and exact filters are applied here
774
775        // Deduplicate symbols: the same source location can be emitted as both
776        // Function and Method by some parsers.  Keep the first hit for each
777        // (path, start_line, symbol_name) triple so --kind function doesn't
778        // return the same definition twice.
779        if filter.symbols_mode || filter.kind.is_some() {
780            let mut seen = std::collections::HashSet::<(String, usize, Option<String>)>::new();
781            results.retain(|r| seen.insert((r.path.clone(), r.span.start_line, r.symbol.clone())));
782        }
783
784        // Apply kind filter (only relevant for symbol searches)
785        // Special case: --kind function also includes methods (methods are functions in classes)
786        if let Some(ref kind) = filter.kind {
787            results.retain(|r| {
788                if matches!(kind, SymbolKind::Function) {
789                    // When searching for functions, also include methods
790                    matches!(r.kind, SymbolKind::Function | SymbolKind::Method)
791                } else {
792                    r.kind == *kind
793                }
794            });
795        }
796
797        // Apply file path filter (substring match)
798        if let Some(ref file_pattern) = filter.file_pattern {
799            results.retain(|r| r.path.contains(file_pattern));
800        }
801
802        // Apply exact name filter (only for symbol searches)
803        if filter.exact && filter.symbols_mode {
804            results.retain(|r| r.symbol.as_deref() == Some(pattern));
805        }
806
807        // Expand symbol bodies if requested
808        // Works for both symbol-mode and regex searches (if regex matched a symbol definition)
809        if filter.expand {
810            // Load content store to fetch full symbol bodies
811            let content_path = self.cache.path().join("content.bin");
812            if let Ok(content_reader) = ContentReader::open(&content_path) {
813                for result in &mut results {
814                    // Only expand if the result has a meaningful span (not just a single line)
815                    if result.span.start_line < result.span.end_line {
816                        // Find the file_id for this result's path
817                        if let Some(file_id) = Self::find_file_id(&content_reader, &result.path) {
818                            // Fetch the full span content
819                            if let Ok(content) = content_reader.get_file_content(file_id) {
820                                let lines: Vec<&str> = content.lines().collect();
821                                let start_idx = (result.span.start_line as usize).saturating_sub(1);
822                                let end_idx = (result.span.end_line as usize).min(lines.len());
823
824                                if start_idx < end_idx {
825                                    let full_body = lines[start_idx..end_idx].join("\n");
826                                    result.preview = full_body;
827                                }
828                            }
829                        }
830                    }
831                }
832            }
833        }
834
835        // Step 4: Deduplicate by path if paths-only mode
836        if filter.paths_only {
837            use std::collections::HashSet;
838            let mut seen_paths = HashSet::new();
839            results.retain(|r| seen_paths.insert(r.path.clone()));
840        }
841
842        // Step 5: Sort results deterministically (by path, then line number)
843        results.sort_by(|a, b| {
844            a.path
845                .cmp(&b.path)
846                .then_with(|| a.span.start_line.cmp(&b.span.start_line))
847        });
848
849        // Capture total count AFTER all filtering but BEFORE pagination (offset/limit)
850        // This is the total number of results the user can paginate through
851        let total_count = results.len();
852
853        // Step 5.5: Apply offset (pagination)
854        if let Some(offset) = filter.offset {
855            if offset < results.len() {
856                results = results.into_iter().skip(offset).collect();
857            } else {
858                // Offset beyond results - return empty
859                results.clear();
860            }
861        }
862
863        // Step 6: Apply limit
864        if let Some(limit) = filter.limit {
865            results.truncate(limit);
866        }
867
868        log::info!(
869            "Query returned {} results (total before pagination: {})",
870            results.len(),
871            total_count
872        );
873
874        Ok((results, total_count))
875    }
876
877    /// Search for symbols by exact name match
878    pub fn find_symbol(&self, name: &str) -> Result<Vec<SearchResult>> {
879        let filter = QueryFilter {
880            symbols_mode: true,
881            ..Default::default()
882        };
883        self.search(name, filter)
884    }
885
886    /// Search using a Tree-sitter AST pattern
887    pub fn search_ast(&self, pattern: &str, lang: Option<Language>) -> Result<Vec<SearchResult>> {
888        let filter = QueryFilter {
889            language: lang,
890            use_ast: true,
891            ..Default::default()
892        };
893
894        self.search(pattern, filter)
895    }
896
897    /// Execute AST query on all indexed files (no trigram filtering)
898    ///
899    /// WARNING: This method scans the entire codebase (500ms-2s+).
900    /// In 95% of cases, use --symbols instead which is 10-100x faster.
901    ///
902    /// # Algorithm
903    /// 1. Get all indexed files for the specified language
904    /// 2. Apply glob/exclude filters to reduce file set
905    /// 3. Load file contents for all matching files
906    /// 4. Execute AST query pattern using Tree-sitter
907    /// 5. Apply remaining filters and return results
908    ///
909    /// # Performance
910    /// - Parses entire codebase (not just trigram candidates)
911    /// - Expected: 500ms-2s for medium codebases, 2-10s for large codebases
912    /// - Use --glob to limit scope for better performance
913    ///
914    /// # Requirements
915    /// - Language must be specified (AST queries are language-specific)
916    /// - AST pattern must be valid S-expression syntax
917    pub fn search_ast_all_files(
918        &self,
919        ast_pattern: &str,
920        filter: QueryFilter,
921    ) -> Result<Vec<SearchResult>> {
922        log::info!(
923            "Executing AST query on all files: pattern='{}', filter={:?}",
924            ast_pattern,
925            filter
926        );
927
928        // Require language for AST queries
929        let lang = filter.language.ok_or_else(|| anyhow::anyhow!(
930            "Language must be specified for AST pattern matching. Use --lang to specify the language.\n\
931             \n\
932             Example: rfx query \"(function_definition) @fn\" --ast --lang python"
933        ))?;
934
935        // Ensure cache exists
936        if !self.cache.exists() {
937            anyhow::bail!("Index not found. Run 'rfx index' to build the cache first.");
938        }
939
940        // Show non-blocking warnings about branch state and staleness
941        self.check_index_freshness(&filter)?;
942
943        // Load content store
944        let content_path = self.cache.path().join("content.bin");
945        let content_reader =
946            ContentReader::open(&content_path).context("Failed to open content store")?;
947
948        // Build glob matchers ONCE before file iteration (performance optimization)
949        use globset::{Glob, GlobSetBuilder};
950
951        let include_matcher = if !filter.glob_patterns.is_empty() {
952            let mut builder = GlobSetBuilder::new();
953            for pattern in &filter.glob_patterns {
954                // Normalize pattern to ensure LLM-generated patterns work correctly
955                let normalized = Self::normalize_glob_pattern(pattern);
956                if let Ok(glob) = Glob::new(&normalized) {
957                    builder.add(glob);
958                }
959            }
960            builder.build().ok()
961        } else {
962            None
963        };
964
965        let exclude_matcher = if !filter.exclude_patterns.is_empty() {
966            let mut builder = GlobSetBuilder::new();
967            for pattern in &filter.exclude_patterns {
968                // Normalize pattern to ensure LLM-generated patterns work correctly
969                let normalized = Self::normalize_glob_pattern(pattern);
970                if let Ok(glob) = Glob::new(&normalized) {
971                    builder.add(glob);
972                }
973            }
974            builder.build().ok()
975        } else {
976            None
977        };
978
979        // Get all files matching the language and glob filters
980        let mut candidates: Vec<SearchResult> = Vec::new();
981
982        for file_id in 0..content_reader.file_count() {
983            let file_path = match content_reader.get_file_path(file_id as u32) {
984                Some(p) => p,
985                None => continue,
986            };
987
988            // Detect language from file extension
989            let ext = file_path.extension().and_then(|e| e.to_str()).unwrap_or("");
990            let detected_lang = Language::from_extension(ext);
991
992            // Filter by language
993            if detected_lang != lang {
994                continue;
995            }
996
997            let file_path_str = file_path.to_string_lossy().to_string();
998
999            // Apply glob/exclude filters BEFORE loading content (performance optimization)
1000            let included = include_matcher
1001                .as_ref()
1002                .map_or(true, |m| m.is_match(&file_path_str));
1003            let excluded = exclude_matcher
1004                .as_ref()
1005                .map_or(false, |m| m.is_match(&file_path_str));
1006
1007            if !included || excluded {
1008                continue;
1009            }
1010
1011            // Create a dummy candidate for this file (AST query will replace it)
1012            candidates.push(SearchResult {
1013                path: file_path_str,
1014                lang: detected_lang,
1015                span: Span {
1016                    start_line: 1,
1017                    end_line: 1,
1018                },
1019                symbol: None,
1020                kind: SymbolKind::Unknown("ast_query".to_string()),
1021                preview: String::new(),
1022                dependencies: None,
1023            });
1024        }
1025
1026        log::info!(
1027            "AST query scanning {} files for language {:?}",
1028            candidates.len(),
1029            lang
1030        );
1031
1032        // BROAD QUERY DETECTION: Block large AST queries without glob restriction
1033        // Allow small codebases (<100 files) but require --glob for larger ones
1034        if !filter.force && filter.glob_patterns.is_empty() && candidates.len() >= 100 {
1035            anyhow::bail!(
1036                "Query too broad - would be expensive to execute\n\
1037                 \n\
1038                 AST query without --glob restriction will scan the ENTIRE codebase ({} files). AST queries are SLOW (500ms-10s+).\n\
1039                 \n\
1040                 This query could:\n\
1041                 • Hang for an extended period before returning results\n\
1042                 • Return thousands of results\n\
1043                 • Flood LLM context windows with excessive data\n\
1044                 • Fail entirely\n\
1045                 \n\
1046                 Suggestions to narrow the query:\n\
1047                 • Add --glob to restrict AST query to specific files: --glob 'src/**/*.rs'\n\
1048                 • Use --symbols instead (10-100x faster in 95% of cases)\n\
1049                 • Use --force to bypass this check if you need a full codebase scan\n\
1050                 \n\
1051                 To force execution anyway:\n\
1052                 rfx query \"{}\" --force --ast --lang {:?}",
1053                candidates.len(),
1054                ast_pattern,
1055                lang
1056            );
1057        }
1058
1059        if candidates.is_empty() {
1060            if !filter.suppress_output {
1061                output::warn(&format!(
1062                    "No files found for language {:?}. Check your language filter or glob patterns.",
1063                    lang
1064                ));
1065            }
1066            return Ok(Vec::new());
1067        }
1068
1069        // Execute the AST query on all candidate files
1070        // This will load file contents and parse them with tree-sitter
1071        let mut results = self.enrich_with_ast(candidates, ast_pattern, filter.language)?;
1072
1073        log::debug!("AST query found {} matches before filtering", results.len());
1074
1075        // Apply remaining filters (same as search_internal Phase 3)
1076
1077        // Apply kind filter
1078        if let Some(ref kind) = filter.kind {
1079            results.retain(|r| {
1080                if matches!(kind, SymbolKind::Function) {
1081                    matches!(r.kind, SymbolKind::Function | SymbolKind::Method)
1082                } else {
1083                    r.kind == *kind
1084                }
1085            });
1086        }
1087
1088        // Note: exact filter doesn't make sense for AST queries (pattern is S-expression, not symbol name)
1089
1090        // Expand symbol bodies if requested
1091        if filter.expand {
1092            let content_path = self.cache.path().join("content.bin");
1093            if let Ok(content_reader) = ContentReader::open(&content_path) {
1094                for result in &mut results {
1095                    if result.span.start_line < result.span.end_line {
1096                        if let Some(file_id) = Self::find_file_id(&content_reader, &result.path) {
1097                            if let Ok(content) = content_reader.get_file_content(file_id) {
1098                                let lines: Vec<&str> = content.lines().collect();
1099                                let start_idx = (result.span.start_line as usize).saturating_sub(1);
1100                                let end_idx = (result.span.end_line as usize).min(lines.len());
1101
1102                                if start_idx < end_idx {
1103                                    let full_body = lines[start_idx..end_idx].join("\n");
1104                                    result.preview = full_body;
1105                                }
1106                            }
1107                        }
1108                    }
1109                }
1110            }
1111        }
1112
1113        // Deduplicate by path if paths-only mode
1114        if filter.paths_only {
1115            use std::collections::HashSet;
1116            let mut seen_paths = HashSet::new();
1117            results.retain(|r| seen_paths.insert(r.path.clone()));
1118        }
1119
1120        // Sort results deterministically
1121        results.sort_by(|a, b| {
1122            a.path
1123                .cmp(&b.path)
1124                .then_with(|| a.span.start_line.cmp(&b.span.start_line))
1125        });
1126
1127        // Apply offset (pagination)
1128        if let Some(offset) = filter.offset {
1129            if offset < results.len() {
1130                results = results.into_iter().skip(offset).collect();
1131            } else {
1132                results.clear();
1133            }
1134        }
1135
1136        // Apply limit
1137        if let Some(limit) = filter.limit {
1138            results.truncate(limit);
1139        }
1140
1141        log::info!("AST query returned {} results", results.len());
1142
1143        // Load dependencies if requested
1144        self.load_dependencies(&mut results, filter.include_dependencies)?;
1145
1146        Ok(results)
1147    }
1148
1149    /// Search using AST pattern with separate text pattern for trigram filtering
1150    ///
1151    /// This allows efficient AST queries by:
1152    /// 1. Using text_pattern for Phase 1 trigram filtering (narrows to candidate files)
1153    /// 2. Using ast_pattern for Phase 2 AST matching (structure-aware filtering)
1154    ///
1155    /// # Example
1156    /// ```ignore
1157    /// // Find async functions: trigram search for "fn ", AST match for function_item
1158    /// engine.search_ast_with_text_filter("fn ", "(function_item (async))", filter)?;
1159    /// ```
1160    pub fn search_ast_with_text_filter(
1161        &self,
1162        text_pattern: &str,
1163        ast_pattern: &str,
1164        filter: QueryFilter,
1165    ) -> Result<Vec<SearchResult>> {
1166        log::info!(
1167            "Executing AST query with text filter: text='{}', ast='{}', filter={:?}",
1168            text_pattern,
1169            ast_pattern,
1170            filter
1171        );
1172
1173        // Ensure cache exists
1174        if !self.cache.exists() {
1175            anyhow::bail!("Index not found. Run 'rfx index' to build the cache first.");
1176        }
1177
1178        // Show non-blocking warnings about branch state and staleness
1179        self.check_index_freshness(&filter)?;
1180
1181        // Start timeout timer if configured
1182        use std::time::{Duration, Instant};
1183        let start_time = Instant::now();
1184        let timeout = if filter.timeout_secs > 0 {
1185            Some(Duration::from_secs(filter.timeout_secs))
1186        } else {
1187            None
1188        };
1189
1190        // PHASE 1: Get initial candidates using text pattern (trigram search)
1191        let candidates = if filter.use_regex {
1192            self.get_regex_candidates(
1193                text_pattern,
1194                timeout.as_ref(),
1195                &start_time,
1196                filter.suppress_output,
1197            )?
1198        } else {
1199            self.get_trigram_candidates(text_pattern, &filter)?
1200        };
1201
1202        log::debug!("Phase 1 found {} candidate locations", candidates.len());
1203
1204        // PHASE 2: Execute AST query on candidates
1205        let mut results = self.enrich_with_ast(candidates, ast_pattern, filter.language)?;
1206
1207        log::debug!("Phase 2 AST matching found {} results", results.len());
1208
1209        // PHASE 3: Apply filters
1210        if let Some(lang) = filter.language {
1211            results.retain(|r| r.lang == lang);
1212        }
1213
1214        if let Some(ref kind) = filter.kind {
1215            results.retain(|r| {
1216                if matches!(kind, SymbolKind::Function) {
1217                    matches!(r.kind, SymbolKind::Function | SymbolKind::Method)
1218                } else {
1219                    r.kind == *kind
1220                }
1221            });
1222        }
1223
1224        if let Some(ref file_pattern) = filter.file_pattern {
1225            results.retain(|r| r.path.contains(file_pattern));
1226        }
1227
1228        // Apply glob pattern filters (same logic as in search_internal)
1229        if !filter.glob_patterns.is_empty() || !filter.exclude_patterns.is_empty() {
1230            use globset::{Glob, GlobSetBuilder};
1231
1232            let include_matcher = if !filter.glob_patterns.is_empty() {
1233                let mut builder = GlobSetBuilder::new();
1234                for pattern in &filter.glob_patterns {
1235                    // Normalize pattern to ensure LLM-generated patterns work correctly
1236                    let normalized = Self::normalize_glob_pattern(pattern);
1237                    if let Ok(glob) = Glob::new(&normalized) {
1238                        builder.add(glob);
1239                    }
1240                }
1241                builder.build().ok()
1242            } else {
1243                None
1244            };
1245
1246            let exclude_matcher = if !filter.exclude_patterns.is_empty() {
1247                let mut builder = GlobSetBuilder::new();
1248                for pattern in &filter.exclude_patterns {
1249                    // Normalize pattern to ensure LLM-generated patterns work correctly
1250                    let normalized = Self::normalize_glob_pattern(pattern);
1251                    if let Ok(glob) = Glob::new(&normalized) {
1252                        builder.add(glob);
1253                    }
1254                }
1255                builder.build().ok()
1256            } else {
1257                None
1258            };
1259
1260            results.retain(|r| {
1261                let included = include_matcher
1262                    .as_ref()
1263                    .map_or(true, |m| m.is_match(&r.path));
1264                let excluded = exclude_matcher
1265                    .as_ref()
1266                    .map_or(false, |m| m.is_match(&r.path));
1267                included && !excluded
1268            });
1269        }
1270
1271        if filter.exact && filter.symbols_mode {
1272            results.retain(|r| r.symbol.as_deref() == Some(text_pattern));
1273        }
1274
1275        // Expand symbol bodies if requested
1276        if filter.expand {
1277            let content_path = self.cache.path().join("content.bin");
1278            if let Ok(content_reader) = ContentReader::open(&content_path) {
1279                for result in &mut results {
1280                    if result.span.start_line < result.span.end_line {
1281                        if let Some(file_id) = Self::find_file_id(&content_reader, &result.path) {
1282                            if let Ok(content) = content_reader.get_file_content(file_id) {
1283                                let lines: Vec<&str> = content.lines().collect();
1284                                let start_idx = (result.span.start_line as usize).saturating_sub(1);
1285                                let end_idx = (result.span.end_line as usize).min(lines.len());
1286
1287                                if start_idx < end_idx {
1288                                    let full_body = lines[start_idx..end_idx].join("\n");
1289                                    result.preview = full_body;
1290                                }
1291                            }
1292                        }
1293                    }
1294                }
1295            }
1296        }
1297
1298        // Sort results deterministically
1299        results.sort_by(|a, b| {
1300            a.path
1301                .cmp(&b.path)
1302                .then_with(|| a.span.start_line.cmp(&b.span.start_line))
1303        });
1304
1305        // Apply offset (pagination)
1306        if let Some(offset) = filter.offset {
1307            if offset < results.len() {
1308                results = results.into_iter().skip(offset).collect();
1309            } else {
1310                results.clear();
1311            }
1312        }
1313
1314        // Apply limit
1315        if let Some(limit) = filter.limit {
1316            results.truncate(limit);
1317        }
1318
1319        log::info!("AST query returned {} results", results.len());
1320
1321        Ok(results)
1322    }
1323
1324    /// List all symbols of a specific kind
1325    pub fn list_by_kind(&self, kind: SymbolKind) -> Result<Vec<SearchResult>> {
1326        let filter = QueryFilter {
1327            kind: Some(kind),
1328            symbols_mode: true,
1329            ..Default::default()
1330        };
1331
1332        self.search("*", filter)
1333    }
1334
1335    /// Enrich text match candidates with symbol information by parsing files
1336    ///
1337    /// Takes a list of text match candidates and extracts symbol information at those locations.
1338    ///
1339    /// # Algorithm
1340    /// 1. Group candidates by file_id for efficient processing
1341    /// 2. Parse each file with tree-sitter to extract ALL symbols
1342    /// 3. Filter symbols based on matching strategy:
1343    ///    - If use_regex=true: Extract symbols whose line spans overlap with candidate locations
1344    ///    - If use_contains=true: Filter symbols by substring match on symbol name
1345    ///    - Default: Filter symbols by exact name match
1346    /// 4. Return filtered symbol results
1347    ///
1348    /// # Performance
1349    /// Only parses files that have text matches, so typically 10-100 files
1350    /// instead of the entire codebase (62K+ files).
1351    ///
1352    /// # Optimizations
1353    /// 1. Language filtering: Skips files with unsupported languages (no parsers)
1354    /// 2. Parallel processing: Uses Rayon to parse files concurrently across CPU cores
1355    fn enrich_with_symbols(
1356        &self,
1357        candidates: Vec<SearchResult>,
1358        pattern: &str,
1359        filter: &QueryFilter,
1360    ) -> Result<Vec<SearchResult>> {
1361        // Load content store for file reading
1362        let content_path = self.cache.path().join("content.bin");
1363        let content_reader =
1364            ContentReader::open(&content_path).context("Failed to open content store")?;
1365
1366        // Load trigram index for file path lookups
1367        let trigrams_path = self.cache.path().join("trigrams.bin");
1368        let trigram_index = if trigrams_path.exists() {
1369            TrigramIndex::load(&trigrams_path)?
1370        } else {
1371            Self::rebuild_trigram_index(&content_reader)?
1372        };
1373
1374        // Open symbol cache for reading cached symbols
1375        let symbol_cache = crate::symbol_cache::SymbolCache::open(self.cache.path())
1376            .context("Failed to open symbol cache")?;
1377
1378        // Load file hashes for current branch for cache lookups
1379        let root = self.cache.workspace_root();
1380        let branch =
1381            crate::git::get_current_branch(&root).unwrap_or_else(|_| "_default".to_string());
1382        let file_hashes = self
1383            .cache
1384            .load_hashes_for_branch(&branch)
1385            .context("Failed to load file hashes")?;
1386        log::debug!(
1387            "Loaded {} file hashes for branch '{}' for symbol cache lookups",
1388            file_hashes.len(),
1389            branch
1390        );
1391
1392        // Group candidates by file, filtering out unsupported languages
1393        use std::collections::HashMap;
1394        let mut files_by_path: HashMap<String, Vec<SearchResult>> = HashMap::new();
1395        let mut skipped_unsupported = 0;
1396
1397        for candidate in candidates {
1398            // Skip files with unsupported languages (no parser available)
1399            if !candidate.lang.is_supported() {
1400                skipped_unsupported += 1;
1401                continue;
1402            }
1403
1404            files_by_path
1405                .entry(candidate.path.clone())
1406                .or_insert_with(Vec::new)
1407                .push(candidate);
1408        }
1409
1410        let total_files = files_by_path.len();
1411        log::debug!(
1412            "Processing {} candidate files for symbol enrichment (skipped {} unsupported language files)",
1413            total_files,
1414            skipped_unsupported
1415        );
1416
1417        // Warn if pattern is very broad (may take time to parse all files)
1418        if total_files > 1000 && !filter.suppress_output {
1419            output::warn(&format!(
1420                "Pattern '{}' matched {} files. This may take some time to parse. Consider using a more specific pattern or adding --lang/--file filters to narrow the search.",
1421                pattern, total_files
1422            ));
1423        }
1424
1425        // Convert to vec for parallel processing
1426        let mut files_to_process: Vec<String> = files_by_path.keys().cloned().collect();
1427
1428        // PHASE 2a: Line-based pre-filtering (skip files where ALL matches are in comments/strings)
1429        // This reduces tree-sitter parsing workload by 2-5x for most queries
1430        let mut files_to_skip: std::collections::HashSet<String> = std::collections::HashSet::new();
1431
1432        for file_path in &files_to_process {
1433            // Get the language for this file
1434            let ext = std::path::Path::new(file_path)
1435                .extension()
1436                .and_then(|e| e.to_str())
1437                .unwrap_or("");
1438            let lang = Language::from_extension(ext);
1439
1440            // Get line filter for this language (if available)
1441            if let Some(line_filter) = crate::line_filter::get_filter(lang) {
1442                // Find file_id for this path
1443                let file_id =
1444                    match Self::find_file_id_by_path(&content_reader, &trigram_index, file_path) {
1445                        Some(id) => id,
1446                        None => continue,
1447                    };
1448
1449                // Load file content
1450                let content = match content_reader.get_file_content(file_id) {
1451                    Ok(c) => c,
1452                    Err(_) => continue,
1453                };
1454
1455                // Check if ALL pattern occurrences are in comments/strings
1456                let mut all_in_non_code = true;
1457                for line in content.lines() {
1458                    // Find all occurrences of the pattern in this line
1459                    let mut search_start = 0;
1460                    while let Some(pos) = line[search_start..].find(pattern) {
1461                        let absolute_pos = search_start + pos;
1462
1463                        // Check if this occurrence is in code (not comment/string)
1464                        let in_comment = line_filter.is_in_comment(line, absolute_pos);
1465                        let in_string = line_filter.is_in_string(line, absolute_pos);
1466
1467                        if !in_comment && !in_string {
1468                            // Found at least one occurrence in actual code
1469                            all_in_non_code = false;
1470                            break;
1471                        }
1472
1473                        search_start = absolute_pos + pattern.len();
1474                    }
1475
1476                    if !all_in_non_code {
1477                        break;
1478                    }
1479                }
1480
1481                // If ALL occurrences are in comments/strings, skip this file
1482                if all_in_non_code {
1483                    // Double-check: make sure there was at least one occurrence
1484                    if content.contains(pattern) {
1485                        files_to_skip.insert(file_path.clone());
1486                        log::debug!(
1487                            "Pre-filter: Skipping {} (all matches in comments/strings)",
1488                            file_path
1489                        );
1490                    }
1491                }
1492            }
1493        }
1494
1495        // Filter out files we're skipping
1496        files_to_process.retain(|path| !files_to_skip.contains(path));
1497
1498        log::debug!(
1499            "Pre-filter: Skipped {} files where all matches are in comments/strings (parsing {} files)",
1500            files_to_skip.len(),
1501            files_to_process.len()
1502        );
1503
1504        // Configure thread pool for parallel processing (use 80% of available cores, capped at 8)
1505        let num_threads = {
1506            let available_cores = std::thread::available_parallelism()
1507                .map(|n| n.get())
1508                .unwrap_or(4);
1509            // Use 80% of available cores (minimum 1, maximum 8) to avoid locking the system
1510            // Cap at 8 to prevent diminishing returns from cache contention on high-core systems
1511            ((available_cores as f64 * 0.8).ceil() as usize)
1512                .max(1)
1513                .min(8)
1514        };
1515
1516        log::debug!(
1517            "Using {} threads for parallel symbol extraction (out of {} available cores)",
1518            num_threads,
1519            std::thread::available_parallelism()
1520                .map(|n| n.get())
1521                .unwrap_or(4)
1522        );
1523
1524        // Build a custom thread pool with limited threads
1525        let pool = rayon::ThreadPoolBuilder::new()
1526            .num_threads(num_threads)
1527            .build()
1528            .context("Failed to create thread pool for symbol extraction")?;
1529
1530        // OPTIMIZATION: Batch read all cached symbols in ONE database transaction
1531        // This is 10-30x faster than calling get() individually for each file
1532
1533        // Step 1: Collect file paths that have hashes
1534        let files_with_hashes: Vec<String> = files_to_process
1535            .iter()
1536            .filter(|path| file_hashes.contains_key(path.as_str()))
1537            .cloned()
1538            .collect();
1539
1540        // Step 2: Batch lookup file_ids for all paths
1541        let file_id_map = self
1542            .cache
1543            .batch_get_file_ids(&files_with_hashes)
1544            .context("Failed to batch lookup file IDs")?;
1545
1546        // Step 3: Build (file_id, hash, path) tuples for batch_get_with_kind
1547        let file_lookup_tuples: Vec<(i64, String, String)> = files_with_hashes
1548            .iter()
1549            .filter_map(|path| {
1550                let file_id = file_id_map.get(path)?;
1551                let hash = file_hashes.get(path.as_str())?;
1552                Some((*file_id, hash.clone(), path.clone()))
1553            })
1554            .collect();
1555
1556        // Step 4: Batch read symbols with kind filtering (uses junction table + integer joins)
1557        let batch_results = symbol_cache
1558            .batch_get_with_kind(&file_lookup_tuples, filter.kind.clone())
1559            .context("Failed to batch read symbol cache")?;
1560
1561        // Step 5: Separate files into cached vs need-to-parse
1562        let mut cached_symbols: HashMap<String, Vec<SearchResult>> = HashMap::new();
1563        let mut files_needing_parse: Vec<String> = Vec::new();
1564
1565        // Build path lookup from file_id
1566        let id_to_path: HashMap<i64, String> = file_id_map
1567            .iter()
1568            .map(|(path, id)| (*id, path.clone()))
1569            .collect();
1570
1571        // Process cached results
1572        for (file_id, symbols) in batch_results {
1573            if let Some(file_path) = id_to_path.get(&file_id) {
1574                cached_symbols.insert(file_path.clone(), symbols);
1575            }
1576        }
1577
1578        // Files with hashes but not in cache results need parsing
1579        for path in &files_with_hashes {
1580            if file_id_map.contains_key(path) && !cached_symbols.contains_key(path) {
1581                files_needing_parse.push(path.clone());
1582            }
1583        }
1584
1585        // Add files without hashes to parse list
1586        for file_path in &files_to_process {
1587            if !file_hashes.contains_key(file_path.as_str()) {
1588                files_needing_parse.push(file_path.clone());
1589            }
1590        }
1591
1592        log::debug!(
1593            "Symbol cache: {} hits, {} need parsing",
1594            cached_symbols.len(),
1595            files_needing_parse.len()
1596        );
1597
1598        // Parse files in parallel using custom thread pool (only cache misses)
1599        use rayon::prelude::*;
1600
1601        let parsed_symbols: Vec<SearchResult> = pool.install(|| {
1602            files_needing_parse
1603                .par_iter()
1604                .flat_map(|file_path| {
1605                    // Find file_id for this path
1606                    let file_id = match Self::find_file_id_by_path(
1607                        &content_reader,
1608                        &trigram_index,
1609                        file_path,
1610                    ) {
1611                        Some(id) => id,
1612                        None => {
1613                            log::warn!("Could not find file_id for path: {}", file_path);
1614                            return Vec::new();
1615                        }
1616                    };
1617
1618                    let content = match content_reader.get_file_content(file_id) {
1619                        Ok(c) => c,
1620                        Err(e) => {
1621                            log::warn!("Failed to read file {}: {}", file_path, e);
1622                            return Vec::new();
1623                        }
1624                    };
1625
1626                    // Detect language
1627                    let ext = std::path::Path::new(file_path)
1628                        .extension()
1629                        .and_then(|e| e.to_str())
1630                        .unwrap_or("");
1631                    let lang = Language::from_extension(ext);
1632
1633                    // Parse file to extract symbols
1634                    let symbols = match ParserFactory::parse(file_path, content, lang) {
1635                        Ok(symbols) => {
1636                            log::debug!("Parsed {} symbols from {}", symbols.len(), file_path);
1637                            symbols
1638                        }
1639                        Err(e) => {
1640                            log::debug!("Failed to parse {}: {}", file_path, e);
1641                            Vec::new()
1642                        }
1643                    };
1644
1645                    // Cache the parsed symbols (ignore errors - caching is best-effort)
1646                    if let Some(file_hash) = file_hashes.get(file_path.as_str()) {
1647                        if let Err(e) = symbol_cache.set(file_path, file_hash, &symbols) {
1648                            log::debug!("Failed to cache symbols for {}: {}", file_path, e);
1649                        }
1650                    }
1651
1652                    symbols
1653                })
1654                .collect()
1655        });
1656
1657        // Combine cached and parsed symbols
1658        let mut all_symbols: Vec<SearchResult> = Vec::new();
1659
1660        // Add all cached symbols
1661        for symbols in cached_symbols.values() {
1662            all_symbols.extend_from_slice(symbols);
1663        }
1664
1665        // Add all parsed symbols
1666        all_symbols.extend(parsed_symbols);
1667
1668        // KEYWORD DETECTION: Check if pattern is a language keyword (e.g., "class", "function")
1669        // If it matches a keyword AND symbols_mode is true, interpret as "list all symbols of that type"
1670        // rather than looking for a symbol literally named "class" or "function"
1671        //
1672        // IMPORTANT: Only check keywords for languages that will pass Phase 3 filtering.
1673        // If a language filter is specified, only check that language's keywords.
1674        // Otherwise, check all languages present in the symbol results.
1675        let is_keyword_query = {
1676            // Determine which language to check keywords for
1677            let lang_to_check = if let Some(lang) = filter.language {
1678                // Language filter specified - check that language only
1679                // This ensures keyword detection aligns with Phase 3 language filtering
1680                vec![lang]
1681            } else {
1682                // No language filter - check all languages that appear in the actual symbols
1683                // (not candidates, but the parsed symbols that made it through)
1684                // This handles mixed-language codebases correctly
1685                let mut langs: Vec<Language> =
1686                    all_symbols.iter().map(|s| s.lang).collect::<Vec<_>>();
1687                langs.sort_by(|a, b| format!("{:?}", a).cmp(&format!("{:?}", b))); // Deterministic ordering
1688                langs.dedup(); // Remove duplicates after sorting
1689                langs
1690            };
1691
1692            // Check if pattern matches a keyword in any of the relevant languages
1693            lang_to_check
1694                .iter()
1695                .any(|lang| ParserFactory::get_keywords(*lang).contains(&pattern))
1696        };
1697
1698        // If pattern is a keyword (like "class" or "function"), skip name-based filtering
1699        // and return all symbols (kind filtering happens in Phase 3)
1700        let filtered: Vec<SearchResult> = if is_keyword_query {
1701            log::info!(
1702                "Pattern '{}' is a language keyword - listing all symbols (kind filtering will be applied in Phase 3)",
1703                pattern
1704            );
1705            all_symbols
1706        } else if filter.use_regex {
1707            // For regex queries, candidates already matched content via regex in Phase 1.
1708            // Extract symbols whose line spans overlap with the candidate locations.
1709            // This ensures symbols are found at the locations where the regex matched.
1710
1711            // Build a map of (file_path, line_no) from candidates
1712            use std::collections::{HashMap, HashSet};
1713            let mut candidate_lines: HashMap<String, HashSet<usize>> = HashMap::new();
1714            for candidate in &files_by_path {
1715                for cand in candidate.1 {
1716                    candidate_lines
1717                        .entry(candidate.0.clone())
1718                        .or_insert_with(HashSet::new)
1719                        .insert(cand.span.start_line);
1720                }
1721            }
1722
1723            // Filter symbols whose spans overlap with candidate lines
1724            all_symbols
1725                .into_iter()
1726                .filter(|sym| {
1727                    if let Some(lines) = candidate_lines.get(&sym.path) {
1728                        // Check if symbol's line span overlaps with any candidate line
1729                        for line in sym.span.start_line..=sym.span.end_line {
1730                            if lines.contains(&line) {
1731                                return true;
1732                            }
1733                        }
1734                    }
1735                    false
1736                })
1737                .collect()
1738        } else if filter.use_contains {
1739            // Substring match (opt-in with --contains)
1740            all_symbols
1741                .into_iter()
1742                .filter(|sym| sym.symbol.as_deref().map_or(false, |s| s.contains(pattern)))
1743                .collect()
1744        } else {
1745            // Exact match (default)
1746            all_symbols
1747                .into_iter()
1748                .filter(|sym| sym.symbol.as_deref().map_or(false, |s| s == pattern))
1749                .collect()
1750        };
1751
1752        log::info!(
1753            "Symbol enrichment found {} matches for pattern '{}'",
1754            filtered.len(),
1755            pattern
1756        );
1757
1758        Ok(filtered)
1759    }
1760
1761    /// Enrich text match candidates with AST pattern matching
1762    ///
1763    /// Takes a list of text match candidates and executes a Tree-sitter AST query
1764    /// on the candidate files, returning only matches that satisfy the AST pattern.
1765    ///
1766    /// # Algorithm
1767    /// 1. Extract unique file paths from candidates
1768    /// 2. Load file contents for each candidate file
1769    /// 3. Execute AST query pattern using Tree-sitter
1770    /// 4. Return AST matches
1771    ///
1772    /// # Performance
1773    /// Only parses files that have text matches, so typically 10-100 files
1774    /// instead of the entire codebase (62K+ files).
1775    ///
1776    /// # Requirements
1777    /// - Language must be specified (AST queries are language-specific)
1778    /// - AST pattern must be valid S-expression syntax
1779    fn enrich_with_ast(
1780        &self,
1781        candidates: Vec<SearchResult>,
1782        ast_pattern: &str,
1783        language: Option<Language>,
1784    ) -> Result<Vec<SearchResult>> {
1785        // Require language for AST queries
1786        let lang = language.ok_or_else(|| anyhow::anyhow!(
1787            "Language must be specified for AST pattern matching. Use --lang to specify the language."
1788        ))?;
1789
1790        // Load content store for file reading
1791        let content_path = self.cache.path().join("content.bin");
1792        let content_reader =
1793            ContentReader::open(&content_path).context("Failed to open content store")?;
1794
1795        // Load trigram index for file path lookups
1796        let trigrams_path = self.cache.path().join("trigrams.bin");
1797        let trigram_index = if trigrams_path.exists() {
1798            TrigramIndex::load(&trigrams_path)?
1799        } else {
1800            Self::rebuild_trigram_index(&content_reader)?
1801        };
1802
1803        // Collect unique file paths from candidates and load their contents
1804        use std::collections::HashMap;
1805        let mut file_contents: HashMap<String, String> = HashMap::new();
1806
1807        for candidate in &candidates {
1808            if file_contents.contains_key(&candidate.path) {
1809                continue;
1810            }
1811
1812            // Find file_id for this path
1813            let file_id = match Self::find_file_id_by_path(
1814                &content_reader,
1815                &trigram_index,
1816                &candidate.path,
1817            ) {
1818                Some(id) => id,
1819                None => {
1820                    log::warn!("Could not find file_id for path: {}", candidate.path);
1821                    continue;
1822                }
1823            };
1824
1825            // Load file content
1826            let content = match content_reader.get_file_content(file_id) {
1827                Ok(c) => c,
1828                Err(e) => {
1829                    log::warn!("Failed to read file {}: {}", candidate.path, e);
1830                    continue;
1831                }
1832            };
1833
1834            file_contents.insert(candidate.path.clone(), content.to_string());
1835        }
1836
1837        log::debug!(
1838            "Executing AST query on {} candidate files with language {:?}",
1839            file_contents.len(),
1840            lang
1841        );
1842
1843        // Execute AST query using the ast_query module
1844        let results =
1845            crate::ast_query::execute_ast_query(candidates, ast_pattern, lang, &file_contents)?;
1846
1847        log::info!(
1848            "AST query found {} matches for pattern '{}'",
1849            results.len(),
1850            ast_pattern
1851        );
1852
1853        Ok(results)
1854    }
1855
1856    /// Helper to find file_id by path string
1857    fn find_file_id_by_path(
1858        content_reader: &ContentReader,
1859        trigram_index: &TrigramIndex,
1860        target_path: &str,
1861    ) -> Option<u32> {
1862        // Try trigram index first (faster)
1863        for file_id in 0..trigram_index.file_count() {
1864            if let Some(path) = trigram_index.get_file(file_id as u32) {
1865                if path.to_string_lossy() == target_path {
1866                    return Some(file_id as u32);
1867                }
1868            }
1869        }
1870
1871        // Fallback to content reader
1872        for file_id in 0..content_reader.file_count() {
1873            if let Some(path) = content_reader.get_file_path(file_id as u32) {
1874                if path.to_string_lossy() == target_path {
1875                    return Some(file_id as u32);
1876                }
1877            }
1878        }
1879
1880        None
1881    }
1882
1883    /// Map keyword patterns to SymbolKind for auto-inference
1884    ///
1885    /// When users search for keywords like "class" or "function" with --symbols,
1886    /// automatically infer the kind filter to return only symbols of that type.
1887    ///
1888    /// This makes keyword queries more intuitive: searching for "class" returns
1889    /// only classes, not all symbols.
1890    fn keyword_to_kind(keyword: &str) -> Option<SymbolKind> {
1891        filter::keyword_to_kind(keyword)
1892    }
1893
1894    /// Get all files matching the language filter (for keyword queries)
1895    ///
1896    /// This method bypasses trigram search and returns ALL files of the specified language.
1897    /// Used for keyword queries like "list all classes" where we need complete coverage,
1898    /// not just the first 100 candidates from a trigram search.
1899    ///
1900    /// Similar to `search_ast_all_files()` but works for symbol queries instead of AST queries.
1901    fn get_all_language_files(&self, filter: &QueryFilter) -> Result<Vec<SearchResult>> {
1902        // Language filter is optional - if not specified, scan all files
1903        // If specified, only scan files of that language
1904
1905        // Load content store
1906        let content_path = self.cache.path().join("content.bin");
1907        let content_reader =
1908            ContentReader::open(&content_path).context("Failed to open content store")?;
1909
1910        // Build glob matchers if specified (for filtering)
1911        use globset::{Glob, GlobSetBuilder};
1912
1913        let include_matcher = if !filter.glob_patterns.is_empty() {
1914            let mut builder = GlobSetBuilder::new();
1915            for pattern in &filter.glob_patterns {
1916                let normalized = Self::normalize_glob_pattern(pattern);
1917                if let Ok(glob) = Glob::new(&normalized) {
1918                    builder.add(glob);
1919                }
1920            }
1921            builder.build().ok()
1922        } else {
1923            None
1924        };
1925
1926        let exclude_matcher = if !filter.exclude_patterns.is_empty() {
1927            let mut builder = GlobSetBuilder::new();
1928            for pattern in &filter.exclude_patterns {
1929                let normalized = Self::normalize_glob_pattern(pattern);
1930                if let Ok(glob) = Glob::new(&normalized) {
1931                    builder.add(glob);
1932                }
1933            }
1934            builder.build().ok()
1935        } else {
1936            None
1937        };
1938
1939        // Scan all files and filter by language + glob patterns
1940        let mut candidates: Vec<SearchResult> = Vec::new();
1941
1942        for file_id in 0..content_reader.file_count() {
1943            let file_path = match content_reader.get_file_path(file_id as u32) {
1944                Some(p) => p,
1945                None => continue,
1946            };
1947
1948            // Detect language from file extension
1949            let ext = file_path.extension().and_then(|e| e.to_str()).unwrap_or("");
1950            let detected_lang = Language::from_extension(ext);
1951
1952            // Filter by language (if specified)
1953            if let Some(lang) = filter.language {
1954                if detected_lang != lang {
1955                    continue;
1956                }
1957            }
1958
1959            let file_path_str = file_path.to_string_lossy().to_string();
1960
1961            // Apply glob/exclude filters
1962            let included = include_matcher
1963                .as_ref()
1964                .map_or(true, |m| m.is_match(&file_path_str));
1965            let excluded = exclude_matcher
1966                .as_ref()
1967                .map_or(false, |m| m.is_match(&file_path_str));
1968
1969            if !included || excluded {
1970                continue;
1971            }
1972
1973            // Apply file path filter if specified
1974            if let Some(ref file_pattern) = filter.file_pattern {
1975                if !file_path_str.contains(file_pattern) {
1976                    continue;
1977                }
1978            }
1979
1980            // Create a dummy candidate for this file
1981            // Phase 2 (symbol enrichment) will parse it and extract actual symbols
1982            candidates.push(SearchResult {
1983                path: file_path_str,
1984                lang: detected_lang,
1985                span: Span {
1986                    start_line: 1,
1987                    end_line: 1,
1988                },
1989                symbol: None,
1990                kind: SymbolKind::Unknown("keyword_query".to_string()),
1991                preview: String::new(),
1992                dependencies: None,
1993            });
1994        }
1995
1996        if let Some(lang) = filter.language {
1997            log::info!(
1998                "Keyword query will scan {} {:?} files for symbol extraction",
1999                candidates.len(),
2000                lang
2001            );
2002        } else {
2003            log::info!(
2004                "Keyword query will scan {} files (all languages) for symbol extraction",
2005                candidates.len()
2006            );
2007        }
2008
2009        Ok(candidates)
2010    }
2011
2012    /// Get candidate results using trigram-based full-text search
2013    fn get_trigram_candidates(
2014        &self,
2015        pattern: &str,
2016        filter: &QueryFilter,
2017    ) -> Result<Vec<SearchResult>> {
2018        // Load content store
2019        let content_path = self.cache.path().join("content.bin");
2020        let content_reader =
2021            ContentReader::open(&content_path).context("Failed to open content store")?;
2022
2023        // Patterns shorter than 3 chars have no trigrams, so the trigram index always
2024        // returns empty.  Fall back to a linear scan of the content store so that
2025        // --force (which bypasses the broad-query guard) still produces real results.
2026        if pattern.chars().count() < 3 {
2027            log::info!(
2028                "Pattern '{}' is shorter than 3 chars — trigram index cannot be used, \
2029                 falling back to linear scan",
2030                pattern
2031            );
2032            return self.linear_scan_candidates(pattern, filter, &content_reader);
2033        }
2034
2035        // Load trigram index from disk (or rebuild if missing)
2036        let trigrams_path = self.cache.path().join("trigrams.bin");
2037        let trigram_index = if trigrams_path.exists() {
2038            match TrigramIndex::load(&trigrams_path) {
2039                Ok(index) => {
2040                    log::debug!(
2041                        "Loaded trigram index from disk: {} trigrams, {} files",
2042                        index.trigram_count(),
2043                        index.file_count()
2044                    );
2045                    index
2046                }
2047                Err(e) => {
2048                    log::warn!("Failed to load trigram index from disk: {}", e);
2049                    log::warn!("Rebuilding trigram index from content store...");
2050                    Self::rebuild_trigram_index(&content_reader)?
2051                }
2052            }
2053        } else {
2054            log::debug!("trigrams.bin not found, rebuilding from content store");
2055            Self::rebuild_trigram_index(&content_reader)?
2056        };
2057
2058        // Search using trigrams
2059        let candidates = trigram_index.search(pattern);
2060        log::debug!(
2061            "Found {} candidate locations from trigram search",
2062            candidates.len()
2063        );
2064
2065        // Clone pattern to owned String for thread safety
2066        let pattern_owned = pattern.to_string();
2067
2068        // Compile regex once if in regex mode (before parallel processing for efficiency)
2069        let compiled_regex = if filter.use_regex {
2070            match Regex::new(&pattern_owned) {
2071                Ok(re) => Some(re),
2072                Err(e) => {
2073                    log::error!("Invalid regex pattern '{}': {}", pattern_owned, e);
2074                    anyhow::bail!("Invalid regex pattern '{}': {}", pattern_owned, e);
2075                }
2076            }
2077        } else {
2078            None
2079        };
2080
2081        // Group candidates by file for efficient processing
2082        use std::collections::HashMap;
2083        let mut candidates_by_file: HashMap<u32, Vec<crate::trigram::FileLocation>> =
2084            HashMap::new();
2085        for loc in candidates {
2086            candidates_by_file
2087                .entry(loc.file_id)
2088                .or_insert_with(Vec::new)
2089                .push(loc);
2090        }
2091
2092        log::debug!(
2093            "Scanning {} files with trigram matches",
2094            candidates_by_file.len()
2095        );
2096
2097        // Process files in parallel using rayon
2098        use rayon::prelude::*;
2099
2100        let results: Vec<SearchResult> = candidates_by_file
2101            .par_iter()
2102            .flat_map(|(file_id, locations)| {
2103                // Get file metadata
2104                let file_path = match trigram_index.get_file(*file_id) {
2105                    Some(p) => p,
2106                    None => return Vec::new(),
2107                };
2108
2109                let content = match content_reader.get_file_content(*file_id) {
2110                    Ok(c) => c,
2111                    Err(_) => return Vec::new(),
2112                };
2113
2114                let file_path_str = file_path.to_string_lossy().to_string();
2115
2116                // Detect language once per file
2117                let ext = file_path.extension().and_then(|e| e.to_str()).unwrap_or("");
2118                let lang = Language::from_extension(ext);
2119
2120                // Split content into lines once
2121                let lines: Vec<&str> = content.lines().collect();
2122
2123                // Use a HashSet to deduplicate results by line number
2124                let mut seen_lines: std::collections::HashSet<usize> =
2125                    std::collections::HashSet::new();
2126                let mut file_results = Vec::new();
2127
2128                // Only check the specific lines indicated by trigram posting lists
2129                for loc in locations {
2130                    let line_no = loc.line_no as usize;
2131
2132                    // Skip if we've already processed this line
2133                    if seen_lines.contains(&line_no) {
2134                        continue;
2135                    }
2136
2137                    // Bounds check
2138                    if line_no == 0 || line_no > lines.len() {
2139                        log::debug!(
2140                            "Line {} out of bounds (file has {} lines)",
2141                            line_no,
2142                            lines.len()
2143                        );
2144                        continue;
2145                    }
2146
2147                    let line = lines[line_no - 1];
2148
2149                    // Apply matching strategy based on filter mode:
2150                    // - Default: Word-boundary matching (restrictive - finds whole identifiers)
2151                    // - --contains: Substring matching (expansive - finds pattern anywhere)
2152                    // - --regex: Actual regex matching (controlled by pattern itself)
2153                    let line_matches = if filter.use_regex {
2154                        // Regex matching - use pre-compiled regex for efficiency
2155                        // The regex was compiled once outside the parallel loop
2156                        compiled_regex
2157                            .as_ref()
2158                            .map(|re| re.is_match(line))
2159                            .unwrap_or(false)
2160                    } else if filter.use_contains {
2161                        // Substring matching (expansive)
2162                        line.contains(&pattern_owned)
2163                    } else {
2164                        // Word-boundary matching (restrictive, default)
2165                        Self::has_word_boundary_match(line, &pattern_owned)
2166                    };
2167
2168                    if !line_matches {
2169                        continue;
2170                    }
2171
2172                    seen_lines.insert(line_no);
2173
2174                    // Create a text match result (no symbol lookup for performance)
2175                    file_results.push(SearchResult {
2176                        path: file_path_str.clone(),
2177                        lang: lang.clone(),
2178                        kind: SymbolKind::Unknown("text_match".to_string()),
2179                        symbol: None, // No symbol name for text matches (avoid duplication)
2180                        span: Span {
2181                            start_line: line_no,
2182                            end_line: line_no,
2183                        },
2184                        preview: line.to_string(),
2185                        dependencies: None,
2186                    });
2187                }
2188
2189                file_results
2190            })
2191            .collect();
2192
2193        Ok(results)
2194    }
2195
2196    /// Linear scan fallback for patterns shorter than 3 characters.
2197    ///
2198    /// The trigram index requires 3-char n-grams; patterns like "fn" or "i" yield
2199    /// zero trigrams and therefore zero results.  This method scans every file in
2200    /// the content store directly using the same matching logic (word-boundary,
2201    /// contains, or regex) so short-pattern queries always return real results.
2202    fn linear_scan_candidates(
2203        &self,
2204        pattern: &str,
2205        filter: &QueryFilter,
2206        content_reader: &ContentReader,
2207    ) -> Result<Vec<SearchResult>> {
2208        use rayon::prelude::*;
2209
2210        let pattern_owned = pattern.to_string();
2211        let file_count = content_reader.file_count();
2212
2213        let compiled_regex = if filter.use_regex {
2214            match Regex::new(&pattern_owned) {
2215                Ok(re) => Some(re),
2216                Err(e) => anyhow::bail!("Invalid regex pattern '{}': {}", pattern_owned, e),
2217            }
2218        } else {
2219            None
2220        };
2221
2222        let results: Vec<SearchResult> = (0..file_count as u32)
2223            .collect::<Vec<_>>()
2224            .par_iter()
2225            .flat_map(|&file_id| {
2226                let file_path = match content_reader.get_file_path(file_id) {
2227                    Some(p) => p.to_path_buf(),
2228                    None => return Vec::new(),
2229                };
2230                let content = match content_reader.get_file_content(file_id) {
2231                    Ok(c) => c,
2232                    Err(_) => return Vec::new(),
2233                };
2234
2235                let file_path_str = file_path.to_string_lossy().to_string();
2236                let ext = file_path.extension().and_then(|e| e.to_str()).unwrap_or("");
2237                let lang = Language::from_extension(ext);
2238
2239                let mut seen_lines = std::collections::HashSet::new();
2240                let mut file_results = Vec::new();
2241
2242                for (line_idx, line) in content.lines().enumerate() {
2243                    let line_no = line_idx + 1;
2244                    if seen_lines.contains(&line_no) {
2245                        continue;
2246                    }
2247
2248                    let line_matches = if filter.use_regex {
2249                        compiled_regex
2250                            .as_ref()
2251                            .map(|re| re.is_match(line))
2252                            .unwrap_or(false)
2253                    } else if filter.use_contains {
2254                        line.contains(&pattern_owned)
2255                    } else {
2256                        Self::has_word_boundary_match(line, &pattern_owned)
2257                    };
2258
2259                    if !line_matches {
2260                        continue;
2261                    }
2262
2263                    seen_lines.insert(line_no);
2264                    file_results.push(SearchResult {
2265                        path: file_path_str.clone(),
2266                        lang: lang.clone(),
2267                        kind: SymbolKind::Unknown("text_match".to_string()),
2268                        symbol: None,
2269                        span: Span {
2270                            start_line: line_no,
2271                            end_line: line_no,
2272                        },
2273                        preview: line.to_string(),
2274                        dependencies: None,
2275                    });
2276                }
2277
2278                file_results
2279            })
2280            .collect();
2281
2282        log::info!(
2283            "Linear scan (short pattern '{}') found {} results across {} files",
2284            pattern,
2285            results.len(),
2286            file_count
2287        );
2288        Ok(results)
2289    }
2290
2291    /// Get candidate results using regex patterns with trigram optimization
2292    ///
2293    /// # Algorithm
2294    ///
2295    /// 1. Extract literal sequences from the regex pattern (≥3 chars)
2296    /// 2. If literals found: search for files containing ANY of the literals (UNION)
2297    /// 3. If no literals: fall back to full content scan
2298    /// 4. Compile regex and verify matches in candidate files
2299    /// 5. Return matching results with context
2300    ///
2301    /// # File Selection Strategy
2302    ///
2303    /// Uses UNION of files containing any literal (conservative approach):
2304    /// - For alternation patterns `(a|b)`: Correctly searches files with a OR b
2305    /// - For sequential patterns `a.*b`: Searches files with a OR b (may include extra files)
2306    /// - Trade-off: Ensures correctness at the cost of scanning 2-3x more files for sequential patterns
2307    /// - Performance impact is minimal due to memory-mapped I/O (<5ms overhead typically)
2308    ///
2309    /// # Performance
2310    ///
2311    /// - Best case (pattern with literals): <20ms (trigram optimization)
2312    /// - Typical case (alternation/sequential): 5-15ms on small codebases (<100 files)
2313    /// - Worst case (no literals like `.*`): ~100ms (full scan)
2314    fn get_regex_candidates(
2315        &self,
2316        pattern: &str,
2317        timeout: Option<&std::time::Duration>,
2318        start_time: &std::time::Instant,
2319        suppress_output: bool,
2320    ) -> Result<Vec<SearchResult>> {
2321        // Step 1: Compile the regex
2322        let regex =
2323            Regex::new(pattern).with_context(|| format!("Invalid regex pattern: {}", pattern))?;
2324
2325        // Check timeout before expensive operations
2326        if let Some(timeout_duration) = timeout {
2327            if start_time.elapsed() > *timeout_duration {
2328                anyhow::bail!(
2329                    "Query timeout exceeded ({} seconds) during regex compilation",
2330                    timeout_duration.as_secs()
2331                );
2332            }
2333        }
2334
2335        // Step 2: Extract trigrams from regex
2336        let trigrams = extract_trigrams_from_regex(pattern);
2337
2338        // Load content store
2339        let content_path = self.cache.path().join("content.bin");
2340        let content_reader =
2341            ContentReader::open(&content_path).context("Failed to open content store")?;
2342
2343        let mut results = Vec::new();
2344
2345        if trigrams.is_empty() {
2346            // No trigrams - fall back to full scan
2347            if !suppress_output {
2348                output::warn(&format!(
2349                    "Regex pattern '{}' has no literals (≥3 chars), falling back to full content scan. This may be slow on large codebases. Consider using patterns with literal text.",
2350                    pattern
2351                ));
2352            }
2353
2354            // Scan all files
2355            for file_id in 0..content_reader.file_count() {
2356                let file_path = content_reader
2357                    .get_file_path(file_id as u32)
2358                    .context("Invalid file_id")?;
2359                let content = content_reader.get_file_content(file_id as u32)?;
2360
2361                self.find_regex_matches_in_file(&regex, file_path, content, &mut results)?;
2362            }
2363        } else {
2364            // Use trigrams to narrow down candidates
2365            log::debug!(
2366                "Using {} trigrams to narrow regex search candidates",
2367                trigrams.len()
2368            );
2369
2370            // Load trigram index
2371            let trigrams_path = self.cache.path().join("trigrams.bin");
2372            let trigram_index = if trigrams_path.exists() {
2373                TrigramIndex::load(&trigrams_path)?
2374            } else {
2375                Self::rebuild_trigram_index(&content_reader)?
2376            };
2377
2378            // Extract the literal sequences from the regex pattern
2379            use crate::regex_trigrams::extract_literal_sequences;
2380            let literals = extract_literal_sequences(pattern);
2381
2382            if literals.is_empty() {
2383                log::warn!(
2384                    "Regex extraction found trigrams but no literal sequences - this shouldn't happen"
2385                );
2386                // Fall back to full scan
2387                for file_id in 0..content_reader.file_count() {
2388                    let file_path = content_reader
2389                        .get_file_path(file_id as u32)
2390                        .context("Invalid file_id")?;
2391                    let content = content_reader.get_file_content(file_id as u32)?;
2392                    self.find_regex_matches_in_file(&regex, file_path, content, &mut results)?;
2393                }
2394            } else {
2395                // Search for each literal sequence and union the results
2396                // This ensures we find matches for ANY literal (important for alternation patterns like (a|b))
2397                // Trade-off: May scan more files than necessary for sequential patterns (a.*b),
2398                // but ensures correctness for all regex patterns
2399                use std::collections::HashSet;
2400                let mut candidate_files: HashSet<u32> = HashSet::new();
2401
2402                for literal in &literals {
2403                    // Search for this literal in the trigram index
2404                    let candidates = trigram_index.search(literal);
2405                    let file_ids: HashSet<u32> = candidates.iter().map(|loc| loc.file_id).collect();
2406
2407                    log::debug!("Literal '{}' found in {} files", literal, file_ids.len());
2408
2409                    // Union with existing candidate files (not intersection)
2410                    // This ensures we search files containing ANY of the literals
2411                    candidate_files.extend(file_ids);
2412                }
2413
2414                let final_candidates = candidate_files;
2415                log::debug!(
2416                    "After union: searching {} files that contain any literal",
2417                    final_candidates.len()
2418                );
2419
2420                // Verify regex matches in candidate files only
2421                for &file_id in &final_candidates {
2422                    let file_path = trigram_index
2423                        .get_file(file_id)
2424                        .context("Invalid file_id from trigram search")?;
2425                    let content = content_reader.get_file_content(file_id)?;
2426
2427                    self.find_regex_matches_in_file(&regex, file_path, content, &mut results)?;
2428                }
2429            }
2430        }
2431
2432        log::info!(
2433            "Regex search found {} matches for pattern '{}'",
2434            results.len(),
2435            pattern
2436        );
2437        Ok(results)
2438    }
2439
2440    /// Find all regex matches in a single file
2441    fn find_regex_matches_in_file(
2442        &self,
2443        regex: &Regex,
2444        file_path: &std::path::Path,
2445        content: &str,
2446        results: &mut Vec<SearchResult>,
2447    ) -> Result<()> {
2448        let file_path_str = file_path.to_string_lossy().to_string();
2449
2450        // Detect language from file extension
2451        let ext = file_path.extension().and_then(|e| e.to_str()).unwrap_or("");
2452        let lang = Language::from_extension(ext);
2453
2454        // Find all regex matches line by line
2455        for (line_idx, line) in content.lines().enumerate() {
2456            if regex.is_match(line) {
2457                let line_no = line_idx + 1;
2458
2459                // Create text match result
2460                // Note: We don't extract symbol names from regex matches because:
2461                // 1. Regex might match partial identifiers (e.g., "UserController" in "ListUserController")
2462                // 2. Regex might match across language-specific delimiters (namespaces, scopes, etc.)
2463                // 3. Accurate symbol extraction requires tree-sitter parsing (expensive)
2464                // The user can see the full context in the 'preview' field
2465                results.push(SearchResult {
2466                    path: file_path_str.clone(),
2467                    lang: lang.clone(),
2468                    kind: SymbolKind::Unknown("regex_match".to_string()),
2469                    symbol: None, // No symbol name for regex matches
2470                    span: Span {
2471                        start_line: line_no,
2472                        end_line: line_no,
2473                    },
2474                    preview: line.to_string(),
2475                    dependencies: None,
2476                });
2477            }
2478        }
2479
2480        Ok(())
2481    }
2482
2483    fn find_file_id(content_reader: &ContentReader, target_path: &str) -> Option<u32> {
2484        result::find_file_id(content_reader, target_path)
2485    }
2486
2487    fn rebuild_trigram_index(content_reader: &ContentReader) -> Result<TrigramIndex> {
2488        result::rebuild_trigram_index(content_reader)
2489    }
2490
2491    fn normalize_glob_pattern(pattern: &str) -> String {
2492        result::normalize_glob_pattern(pattern)
2493    }
2494
2495    fn has_word_boundary_match(line: &str, pattern: &str) -> bool {
2496        filter::has_word_boundary_match(line, pattern)
2497    }
2498
2499    /// Get index status for programmatic use (doesn't print warnings)
2500    ///
2501    /// Returns (status, can_trust_results, warning) tuple for JSON output.
2502    /// This is optimized for AI agents to detect staleness and auto-reindex.
2503    pub fn get_index_status(&self) -> Result<(IndexStatus, bool, Option<IndexWarning>)> {
2504        let root = self.cache.workspace_root();
2505
2506        // Check git state if in a git repo
2507        if crate::git::is_git_repo(&root) {
2508            if let Ok(current_branch) = crate::git::get_current_branch(&root) {
2509                // Check if we're on a different branch than what was indexed
2510                if !self.cache.branch_exists(&current_branch).unwrap_or(false) {
2511                    let warning = IndexWarning {
2512                        reason: format!("Branch '{}' has not been indexed", current_branch),
2513                        action_required: "rfx index".to_string(),
2514                        files_modified: None,
2515                        details: Some(IndexWarningDetails {
2516                            current_branch: Some(current_branch),
2517                            indexed_branch: None,
2518                            current_commit: None,
2519                            indexed_commit: None,
2520                        }),
2521                    };
2522                    return Ok((IndexStatus::Stale, false, Some(warning)));
2523                }
2524
2525                // Branch exists - check if commit changed
2526                if let (Ok(current_commit), Ok(branch_info)) = (
2527                    crate::git::get_current_commit(&root),
2528                    self.cache.get_branch_info(&current_branch),
2529                ) {
2530                    if branch_info.commit_sha != current_commit {
2531                        let warning = IndexWarning {
2532                            reason: format!(
2533                                "Commit changed from {} to {}",
2534                                &branch_info.commit_sha[..7],
2535                                &current_commit[..7]
2536                            ),
2537                            action_required: "rfx index".to_string(),
2538                            files_modified: None,
2539                            details: Some(IndexWarningDetails {
2540                                current_branch: Some(current_branch.clone()),
2541                                indexed_branch: Some(current_branch.clone()),
2542                                current_commit: Some(current_commit.clone()),
2543                                indexed_commit: Some(branch_info.commit_sha.clone()),
2544                            }),
2545                        };
2546                        return Ok((IndexStatus::Stale, false, Some(warning)));
2547                    }
2548
2549                    // If commits match, do a quick file freshness check
2550                    if let Ok(branch_files) = self.cache.get_branch_files(&current_branch) {
2551                        let mut checked = 0;
2552                        let mut changed = 0;
2553                        const SAMPLE_SIZE: usize = 10;
2554
2555                        for (path, _indexed_hash) in branch_files.iter().take(SAMPLE_SIZE) {
2556                            checked += 1;
2557                            let file_path = std::path::Path::new(path);
2558
2559                            if let Ok(metadata) = std::fs::metadata(file_path) {
2560                                if let Ok(modified) = metadata.modified() {
2561                                    let indexed_time = branch_info.last_indexed;
2562                                    let file_time = modified
2563                                        .duration_since(std::time::UNIX_EPOCH)
2564                                        .unwrap_or_default()
2565                                        .as_secs()
2566                                        as i64;
2567
2568                                    if file_time > indexed_time {
2569                                        // File modified after indexing - likely stale
2570                                        // Note: We skip hash verification for performance (mtime check is sufficient)
2571                                        changed += 1;
2572                                    }
2573                                }
2574                            }
2575                        }
2576
2577                        if changed > 0 {
2578                            let warning = IndexWarning {
2579                                reason: format!(
2580                                    "{} of {} sampled files modified",
2581                                    changed, checked
2582                                ),
2583                                action_required: "rfx index".to_string(),
2584                                files_modified: Some(changed as u32),
2585                                details: Some(IndexWarningDetails {
2586                                    current_branch: Some(current_branch.clone()),
2587                                    indexed_branch: Some(branch_info.branch.clone()),
2588                                    current_commit: Some(current_commit.clone()),
2589                                    indexed_commit: Some(branch_info.commit_sha.clone()),
2590                                }),
2591                            };
2592                            return Ok((IndexStatus::Stale, false, Some(warning)));
2593                        }
2594                    }
2595
2596                    // All checks passed - index is fresh
2597                    return Ok((IndexStatus::Fresh, true, None));
2598                }
2599            }
2600        }
2601
2602        // Not in a git repo or couldn't get git info - assume fresh
2603        Ok((IndexStatus::Fresh, true, None))
2604    }
2605
2606    /// Check index freshness and show non-blocking warnings
2607    ///
2608    /// This performs lightweight checks to warn users if their index might be stale:
2609    /// 1. Branch mismatch: indexed different branch
2610    /// 2. Commit changed: HEAD moved since indexing
2611    /// 3. File changes: quick mtime check on sample of files (if available)
2612    fn check_index_freshness(&self, filter: &QueryFilter) -> Result<()> {
2613        let root = self.cache.workspace_root();
2614
2615        // Check git state if in a git repo
2616        if crate::git::is_git_repo(&root) {
2617            if !crate::git::is_git_available() {
2618                static WARNED: std::sync::OnceLock<()> = std::sync::OnceLock::new();
2619                if !filter.suppress_output {
2620                    WARNED.get_or_init(|| {
2621                        output::warn("⚠️  git binary not found in PATH; index freshness checks disabled for this session.");
2622                    });
2623                }
2624                return Ok(());
2625            }
2626            if let Ok(current_branch) = crate::git::get_current_branch(&root) {
2627                // Check if we're on a different branch than what was indexed
2628                if !self.cache.branch_exists(&current_branch).unwrap_or(false) {
2629                    if !filter.suppress_output {
2630                        output::warn(&format!(
2631                            "⚠️  WARNING: Index not found for branch '{}'. Run 'rfx index' to index this branch.",
2632                            current_branch
2633                        ));
2634                    }
2635                    return Ok(());
2636                }
2637
2638                // Branch exists - check if commit changed
2639                if let (Ok(current_commit), Ok(branch_info)) = (
2640                    crate::git::get_current_commit(&root),
2641                    self.cache.get_branch_info(&current_branch),
2642                ) {
2643                    if branch_info.commit_sha != current_commit {
2644                        if !filter.suppress_output {
2645                            output::warn(&format!(
2646                                "⚠️  WARNING: Index may be stale (commit changed: {} → {}). Consider running 'rfx index'.",
2647                                &branch_info.commit_sha[..7],
2648                                &current_commit[..7]
2649                            ));
2650                        }
2651                        return Ok(());
2652                    }
2653
2654                    // If commits match, do a quick file freshness check
2655                    // Sample up to 10 files to check for modifications (cheap mtime check)
2656                    if let Ok(branch_files) = self.cache.get_branch_files(&current_branch) {
2657                        let mut checked = 0;
2658                        let mut changed = 0;
2659                        const SAMPLE_SIZE: usize = 10;
2660
2661                        for (path, _indexed_hash) in branch_files.iter().take(SAMPLE_SIZE) {
2662                            checked += 1;
2663                            let file_path = std::path::Path::new(path);
2664
2665                            // Check if file exists and has been modified (mtime/size heuristic)
2666                            if let Ok(metadata) = std::fs::metadata(file_path) {
2667                                if let Ok(modified) = metadata.modified() {
2668                                    let indexed_time = branch_info.last_indexed;
2669                                    let file_time = modified
2670                                        .duration_since(std::time::UNIX_EPOCH)
2671                                        .unwrap_or_default()
2672                                        .as_secs()
2673                                        as i64;
2674
2675                                    // If file modified after indexing, it might be stale
2676                                    if file_time > indexed_time {
2677                                        // File modified after indexing - likely stale
2678                                        // Note: We skip hash verification for performance (mtime check is sufficient)
2679                                        // This may cause false positives if files were touched without changes,
2680                                        // but the warning is non-blocking and vastly better than slow queries
2681                                        changed += 1;
2682                                    }
2683                                }
2684                            }
2685                        }
2686
2687                        if changed > 0 && !filter.suppress_output {
2688                            output::warn(&format!(
2689                                "⚠️  WARNING: {} of {} sampled files changed since indexing. Consider running 'rfx index'.",
2690                                changed, checked
2691                            ));
2692                        }
2693                    }
2694                }
2695            }
2696        }
2697
2698        Ok(())
2699    }
2700}
2701
2702/// Generate AI instruction based on query results
2703///
2704/// Provides context-aware guidance to AI agents on how to handle search results.
2705/// Uses priority-based logic to determine the most relevant instruction.
2706pub fn generate_ai_instruction(
2707    result_count: usize,
2708    total_count: usize,
2709    has_more: bool,
2710    symbols_mode: bool,
2711    paths_only: bool,
2712    use_ast: bool,
2713    use_regex: bool,
2714    language_filter: bool,
2715    glob_filter: bool,
2716    exact_mode: bool,
2717) -> Option<String> {
2718    // Priority 1: No results
2719    if result_count == 0 {
2720        return Some(
2721            "No results found. Consider these alternatives: 1) Check pattern spelling, 2) Remove --kind or --lang filters to broaden search, 3) Try partial match or related term, 4) Use search_regex tool for pattern matching with special characters or complex patterns."
2722            .to_string()
2723        );
2724    }
2725
2726    // Priority 2: Query too broad (500+ results)
2727    if total_count >= 500 {
2728        return Some(format!(
2729            "Query too broad: {} results found. STOP. Do not list results. Refine search automatically by adding filters: kind parameter (Function/Struct/Class), lang parameter (rust/python/etc), or glob parameter (['src/**/*.rs']). Call search_code again with appropriate filters.",
2730            total_count
2731        ));
2732    }
2733
2734    // Priority 3: Paginated results
2735    if has_more {
2736        return Some(format!(
2737            "Showing {} of {} results. PAGINATED - there are more results available. Do not automatically fetch all results. Show current page, ask user if these results answer their question before fetching more with --offset parameter.",
2738            result_count, total_count
2739        ));
2740    }
2741
2742    // Priority 4: Single precise result (symbols mode)
2743    if result_count == 1 && symbols_mode {
2744        return Some(
2745            "Found 1 precise result. Respond concisely: '[symbol] at [path]:[line]'.".to_string(),
2746        );
2747    }
2748
2749    // Priority 5: Few precise results (symbols mode)
2750    if result_count >= 2 && result_count <= 10 && symbols_mode {
2751        return Some(format!(
2752            "Found {} precise results (definitions only, not usages). List locations concisely: '[symbol] at [path]:[line]' for each result.",
2753            result_count
2754        ));
2755    }
2756
2757    // Priority 6: Many results (101-500)
2758    if total_count >= 101 && total_count < 500 {
2759        return Some(format!(
2760            "Found {} results - this is broad. Suggest refining search with: kind parameter (Function/Struct/Class/etc), lang parameter (rust/python/etc), or glob parameter to narrow file scope.",
2761            total_count
2762        ));
2763    }
2764
2765    // Priority 7: Full-text mode with many results (suggest symbols mode)
2766    if result_count >= 100 && !symbols_mode {
2767        return Some(format!(
2768            "Found {} results in full-text search mode (includes definitions AND all usages). Consider using symbols=true parameter to filter to definitions only. This typically reduces results by 80-90%.",
2769            result_count
2770        ));
2771    }
2772
2773    // Priority 8: Paths-only mode
2774    if paths_only {
2775        return Some(format!(
2776            "Found {} unique files (paths-only mode - no code content included). Next step: Use Read tool on specific files that look relevant based on their paths.",
2777            result_count
2778        ));
2779    }
2780
2781    // Priority 9: AST query results
2782    if use_ast {
2783        return Some(format!(
2784            "Found {} results using AST pattern matching. These are structure-based matches using Tree-sitter patterns, not text search.",
2785            result_count
2786        ));
2787    }
2788
2789    // Priority 10: Regex with many results
2790    if use_regex && result_count >= 100 {
2791        return Some(format!(
2792            "Found {} results using regex pattern matching. Regex matches are expansive. Consider using exact text search or symbols mode for more precise results.",
2793            result_count
2794        ));
2795    }
2796
2797    // Priority 11: Language filter with few results
2798    if language_filter && result_count <= 5 {
2799        return Some(format!(
2800            "Found {} results with language filter active. Results are limited to this language only. Remove lang parameter if you want to search all languages.",
2801            result_count
2802        ));
2803    }
2804
2805    // Priority 12: Glob filter with few results
2806    if glob_filter && result_count <= 10 {
2807        return Some(format!(
2808            "Found {} results with glob filter active. Results are limited to matching paths. Remove glob parameter to search entire codebase.",
2809            result_count
2810        ));
2811    }
2812
2813    // Priority 13: Exact mode with few results
2814    if exact_mode && result_count <= 5 {
2815        return Some(format!(
2816            "Found {} results in exact match mode. Only exact symbol name matches are included. Remove exact parameter to allow substring matching.",
2817            result_count
2818        ));
2819    }
2820
2821    // Normal case (11-100 results, no special conditions) - no instruction
2822    None
2823}
2824
2825#[cfg(test)]
2826mod tests {
2827    use super::*;
2828    use crate::indexer::Indexer;
2829    use crate::models::IndexConfig;
2830    use std::fs;
2831    use tempfile::TempDir;
2832
2833    // ==================== Basic Tests ====================
2834
2835    #[test]
2836    fn test_query_engine_creation() {
2837        let temp = TempDir::new().unwrap();
2838        let cache = CacheManager::new(temp.path());
2839        let engine = QueryEngine::new(cache);
2840
2841        assert!(engine.cache.path().ends_with(".reflex"));
2842    }
2843
2844    #[test]
2845    fn test_filter_modes() {
2846        // Test that symbols_mode works as expected
2847        let filter_fulltext = QueryFilter::default();
2848        assert!(!filter_fulltext.symbols_mode);
2849
2850        let filter_symbols = QueryFilter {
2851            symbols_mode: true,
2852            ..Default::default()
2853        };
2854        assert!(filter_symbols.symbols_mode);
2855
2856        // Test that kind implies symbols_mode (handled in CLI layer)
2857        let filter_with_kind = QueryFilter {
2858            kind: Some(SymbolKind::Function),
2859            symbols_mode: true,
2860            ..Default::default()
2861        };
2862        assert!(filter_with_kind.symbols_mode);
2863    }
2864
2865    // ==================== Search Mode Tests ====================
2866
2867    #[test]
2868    fn test_fulltext_search() {
2869        let temp = TempDir::new().unwrap();
2870        let project = temp.path().join("project");
2871        fs::create_dir(&project).unwrap();
2872
2873        // Create test files
2874        fs::write(
2875            project.join("main.rs"),
2876            "fn main() {\n    println!(\"hello\");\n}",
2877        )
2878        .unwrap();
2879        fs::write(project.join("lib.rs"), "pub fn hello() {}").unwrap();
2880
2881        // Index the project
2882        let cache = CacheManager::new(&project);
2883        let indexer = Indexer::new(cache, IndexConfig::default());
2884        indexer.index(&project, false).unwrap();
2885
2886        // Search for "hello"
2887        let cache = CacheManager::new(&project);
2888        let engine = QueryEngine::new(cache);
2889        let filter = QueryFilter::default(); // full-text mode
2890        let results = engine.search("hello", filter).unwrap();
2891
2892        // Should find both occurrences (println and function name)
2893        assert!(results.len() >= 2);
2894        assert!(results.iter().any(|r| r.path.contains("main.rs")));
2895        assert!(results.iter().any(|r| r.path.contains("lib.rs")));
2896    }
2897
2898    #[test]
2899    fn test_symbol_search() {
2900        let temp = TempDir::new().unwrap();
2901        let project = temp.path().join("project");
2902        fs::create_dir(&project).unwrap();
2903
2904        // Create test file with function definition and call
2905        fs::write(
2906            project.join("main.rs"),
2907            "fn greet() {}\nfn main() {\n    greet();\n}",
2908        )
2909        .unwrap();
2910
2911        // Index
2912        let cache = CacheManager::new(&project);
2913        let indexer = Indexer::new(cache, IndexConfig::default());
2914        indexer.index(&project, false).unwrap();
2915
2916        let cache = CacheManager::new(&project);
2917
2918        // Symbol search (definitions only)
2919        let engine = QueryEngine::new(cache);
2920        let filter = QueryFilter {
2921            symbols_mode: true,
2922            ..Default::default()
2923        };
2924        let results = engine.search("greet", filter).unwrap();
2925
2926        // Should find only the definition, not the call
2927        assert!(results.len() >= 1);
2928        assert!(results.iter().any(|r| r.kind == SymbolKind::Function));
2929    }
2930
2931    #[test]
2932    fn test_regex_search() {
2933        let temp = TempDir::new().unwrap();
2934        let project = temp.path().join("project");
2935        fs::create_dir(&project).unwrap();
2936
2937        fs::write(
2938            project.join("main.rs"),
2939            "fn test1() {}\nfn test2() {}\nfn other() {}",
2940        )
2941        .unwrap();
2942
2943        let cache = CacheManager::new(&project);
2944        let indexer = Indexer::new(cache, IndexConfig::default());
2945        indexer.index(&project, false).unwrap();
2946
2947        let cache = CacheManager::new(&project);
2948
2949        let engine = QueryEngine::new(cache);
2950        let filter = QueryFilter {
2951            use_regex: true,
2952            ..Default::default()
2953        };
2954        let results = engine.search(r"fn test\d", filter).unwrap();
2955
2956        // Should match test1 and test2 but not other
2957        assert_eq!(results.len(), 2);
2958        assert!(results.iter().all(|r| r.preview.contains("test")));
2959    }
2960
2961    // ==================== Filter Tests ====================
2962
2963    #[test]
2964    fn test_language_filter() {
2965        let temp = TempDir::new().unwrap();
2966        let project = temp.path().join("project");
2967        fs::create_dir(&project).unwrap();
2968
2969        fs::write(project.join("main.rs"), "fn main() {}").unwrap();
2970        fs::write(project.join("main.js"), "function main() {}").unwrap();
2971
2972        let cache = CacheManager::new(&project);
2973        let indexer = Indexer::new(cache, IndexConfig::default());
2974        indexer.index(&project, false).unwrap();
2975
2976        let cache = CacheManager::new(&project);
2977
2978        let engine = QueryEngine::new(cache);
2979
2980        // Filter to Rust only
2981        let filter = QueryFilter {
2982            language: Some(Language::Rust),
2983            ..Default::default()
2984        };
2985        let results = engine.search("main", filter).unwrap();
2986
2987        assert!(results.iter().all(|r| r.lang == Language::Rust));
2988        assert!(results.iter().all(|r| r.path.ends_with(".rs")));
2989    }
2990
2991    #[test]
2992    fn test_kind_filter() {
2993        let temp = TempDir::new().unwrap();
2994        let project = temp.path().join("project");
2995        fs::create_dir(&project).unwrap();
2996
2997        fs::write(
2998            project.join("main.rs"),
2999            "struct Point {}\nfn main() {}\nimpl Point { fn new() {} }",
3000        )
3001        .unwrap();
3002
3003        let cache = CacheManager::new(&project);
3004        let indexer = Indexer::new(cache, IndexConfig::default());
3005        indexer.index(&project, false).unwrap();
3006
3007        let cache = CacheManager::new(&project);
3008
3009        let engine = QueryEngine::new(cache);
3010
3011        // Filter to functions only (includes methods)
3012        let filter = QueryFilter {
3013            symbols_mode: true,
3014            kind: Some(SymbolKind::Function),
3015            use_contains: true, // "mai" is substring of "main"
3016            ..Default::default()
3017        };
3018        // Search for "mai" which should match "main" (tri gram pattern will def be in index)
3019        let results = engine.search("mai", filter).unwrap();
3020
3021        // Should find main function
3022        assert!(results.len() > 0, "Should find at least one result");
3023        assert!(
3024            results.iter().any(|r| r.symbol.as_deref() == Some("main")),
3025            "Should find 'main' function"
3026        );
3027    }
3028
3029    #[test]
3030    fn test_file_pattern_filter() {
3031        let temp = TempDir::new().unwrap();
3032        let project = temp.path().join("project");
3033        fs::create_dir_all(project.join("src")).unwrap();
3034        fs::create_dir_all(project.join("tests")).unwrap();
3035
3036        fs::write(project.join("src/lib.rs"), "fn foo() {}").unwrap();
3037        fs::write(project.join("tests/test.rs"), "fn foo() {}").unwrap();
3038
3039        let cache = CacheManager::new(&project);
3040        let indexer = Indexer::new(cache, IndexConfig::default());
3041        indexer.index(&project, false).unwrap();
3042
3043        let cache = CacheManager::new(&project);
3044
3045        let engine = QueryEngine::new(cache);
3046
3047        // Filter to src/ only
3048        let filter = QueryFilter {
3049            file_pattern: Some("src/".to_string()),
3050            ..Default::default()
3051        };
3052        let results = engine.search("foo", filter).unwrap();
3053
3054        assert!(results.iter().all(|r| r.path.contains("src/")));
3055        assert!(!results.iter().any(|r| r.path.contains("tests/")));
3056    }
3057
3058    #[test]
3059    fn test_limit_filter() {
3060        let temp = TempDir::new().unwrap();
3061        let project = temp.path().join("project");
3062        fs::create_dir(&project).unwrap();
3063
3064        // Create file with many matches
3065        let content = (0..20)
3066            .map(|i| format!("fn test{}() {{}}", i))
3067            .collect::<Vec<_>>()
3068            .join("\n");
3069        fs::write(project.join("main.rs"), content).unwrap();
3070
3071        let cache = CacheManager::new(&project);
3072        let indexer = Indexer::new(cache, IndexConfig::default());
3073        indexer.index(&project, false).unwrap();
3074
3075        let cache = CacheManager::new(&project);
3076
3077        let engine = QueryEngine::new(cache);
3078
3079        // Limit to 5 results
3080        let filter = QueryFilter {
3081            limit: Some(5),
3082            use_contains: true, // "test" is substring of "test0", "test1", etc.
3083            ..Default::default()
3084        };
3085        let results = engine.search("test", filter).unwrap();
3086
3087        assert_eq!(results.len(), 5);
3088    }
3089
3090    #[test]
3091    fn test_exact_match_filter() {
3092        let temp = TempDir::new().unwrap();
3093        let project = temp.path().join("project");
3094        fs::create_dir(&project).unwrap();
3095
3096        fs::write(
3097            project.join("main.rs"),
3098            "fn test() {}\nfn test_helper() {}\nfn other_test() {}",
3099        )
3100        .unwrap();
3101
3102        let cache = CacheManager::new(&project);
3103        let indexer = Indexer::new(cache, IndexConfig::default());
3104        indexer.index(&project, false).unwrap();
3105
3106        let cache = CacheManager::new(&project);
3107
3108        let engine = QueryEngine::new(cache);
3109
3110        // Exact match for "test"
3111        let filter = QueryFilter {
3112            symbols_mode: true,
3113            exact: true,
3114            ..Default::default()
3115        };
3116        let results = engine.search("test", filter).unwrap();
3117
3118        // Should only match exactly "test", not "test_helper" or "other_test"
3119        assert_eq!(results.len(), 1);
3120        assert_eq!(results[0].symbol.as_deref(), Some("test"));
3121    }
3122
3123    // ==================== Expand Mode Tests ====================
3124
3125    #[test]
3126    fn test_expand_mode() {
3127        let temp = TempDir::new().unwrap();
3128        let project = temp.path().join("project");
3129        fs::create_dir(&project).unwrap();
3130
3131        fs::write(
3132            project.join("main.rs"),
3133            "fn greet() {\n    println!(\"Hello\");\n    println!(\"World\");\n}",
3134        )
3135        .unwrap();
3136
3137        let cache = CacheManager::new(&project);
3138        let indexer = Indexer::new(cache, IndexConfig::default());
3139        indexer.index(&project, false).unwrap();
3140
3141        let cache = CacheManager::new(&project);
3142
3143        let engine = QueryEngine::new(cache);
3144
3145        // Search with expand mode
3146        let filter = QueryFilter {
3147            symbols_mode: true,
3148            expand: true,
3149            ..Default::default()
3150        };
3151        let results = engine.search("greet", filter).unwrap();
3152
3153        // Should have full function body in preview
3154        assert!(results.len() >= 1);
3155        let result = &results[0];
3156        assert!(result.preview.contains("println"));
3157    }
3158
3159    // ==================== Edge Cases ====================
3160
3161    #[test]
3162    fn test_search_empty_index() {
3163        let temp = TempDir::new().unwrap();
3164        let project = temp.path().join("project");
3165        fs::create_dir(&project).unwrap();
3166
3167        let cache = CacheManager::new(&project);
3168        let indexer = Indexer::new(cache, IndexConfig::default());
3169        indexer.index(&project, false).unwrap();
3170
3171        let cache = CacheManager::new(&project);
3172
3173        let engine = QueryEngine::new(cache);
3174        let filter = QueryFilter::default();
3175        let results = engine.search("nonexistent", filter).unwrap();
3176
3177        assert_eq!(results.len(), 0);
3178    }
3179
3180    #[test]
3181    fn test_search_no_index() {
3182        let temp = TempDir::new().unwrap();
3183        let project = temp.path().join("project");
3184        fs::create_dir(&project).unwrap();
3185
3186        let cache = CacheManager::new(&project);
3187        let engine = QueryEngine::new(cache);
3188        let filter = QueryFilter::default();
3189
3190        // Should fail when index doesn't exist
3191        assert!(engine.search("test", filter).is_err());
3192    }
3193
3194    #[test]
3195    fn test_search_special_characters() {
3196        let temp = TempDir::new().unwrap();
3197        let project = temp.path().join("project");
3198        fs::create_dir(&project).unwrap();
3199
3200        fs::write(project.join("main.rs"), "let x = 42;\nlet y = x + 1;").unwrap();
3201
3202        let cache = CacheManager::new(&project);
3203        let indexer = Indexer::new(cache, IndexConfig::default());
3204        indexer.index(&project, false).unwrap();
3205
3206        let cache = CacheManager::new(&project);
3207
3208        let engine = QueryEngine::new(cache);
3209        let filter = QueryFilter::default();
3210
3211        // Search for special characters
3212        let results = engine.search("x + ", filter).unwrap();
3213        assert!(results.len() >= 1);
3214    }
3215
3216    #[test]
3217    fn test_search_unicode() {
3218        let temp = TempDir::new().unwrap();
3219        let project = temp.path().join("project");
3220        fs::create_dir(&project).unwrap();
3221
3222        fs::write(project.join("main.rs"), "// 你好世界\nfn main() {}").unwrap();
3223
3224        let cache = CacheManager::new(&project);
3225        let indexer = Indexer::new(cache, IndexConfig::default());
3226        indexer.index(&project, false).unwrap();
3227
3228        let cache = CacheManager::new(&project);
3229
3230        let engine = QueryEngine::new(cache);
3231        let filter = QueryFilter {
3232            use_contains: true, // Unicode word boundaries may not work as expected
3233            force: true,        // Bypass broad query detection for 2-char Unicode pattern
3234            ..Default::default()
3235        };
3236
3237        // Search for unicode characters
3238        let results = engine.search("你好", filter).unwrap();
3239        assert!(results.len() >= 1);
3240    }
3241
3242    #[test]
3243    fn test_case_sensitive_search() {
3244        let temp = TempDir::new().unwrap();
3245        let project = temp.path().join("project");
3246        fs::create_dir(&project).unwrap();
3247
3248        fs::write(project.join("main.rs"), "fn Test() {}\nfn test() {}").unwrap();
3249
3250        let cache = CacheManager::new(&project);
3251        let indexer = Indexer::new(cache, IndexConfig::default());
3252        indexer.index(&project, false).unwrap();
3253
3254        let cache = CacheManager::new(&project);
3255
3256        let engine = QueryEngine::new(cache);
3257        let filter = QueryFilter::default();
3258
3259        // Search is case-sensitive
3260        let results = engine.search("Test", filter).unwrap();
3261        assert!(results.iter().any(|r| r.preview.contains("Test()")));
3262    }
3263
3264    // ==================== Determinism Tests ====================
3265
3266    #[test]
3267    fn test_results_sorted_deterministically() {
3268        let temp = TempDir::new().unwrap();
3269        let project = temp.path().join("project");
3270        fs::create_dir(&project).unwrap();
3271
3272        fs::write(project.join("a.rs"), "fn test() {}").unwrap();
3273        fs::write(project.join("z.rs"), "fn test() {}").unwrap();
3274        fs::write(project.join("m.rs"), "fn test() {}\nfn test2() {}").unwrap();
3275
3276        let cache = CacheManager::new(&project);
3277        let indexer = Indexer::new(cache, IndexConfig::default());
3278        indexer.index(&project, false).unwrap();
3279
3280        let cache = CacheManager::new(&project);
3281
3282        let engine = QueryEngine::new(cache);
3283        let filter = QueryFilter::default();
3284
3285        // Run search multiple times
3286        let results1 = engine.search("test", filter.clone()).unwrap();
3287        let results2 = engine.search("test", filter.clone()).unwrap();
3288        let results3 = engine.search("test", filter).unwrap();
3289
3290        // Results should be identical and sorted by path then line
3291        assert_eq!(results1.len(), results2.len());
3292        assert_eq!(results1.len(), results3.len());
3293
3294        for i in 0..results1.len() {
3295            assert_eq!(results1[i].path, results2[i].path);
3296            assert_eq!(results1[i].path, results3[i].path);
3297            assert_eq!(results1[i].span.start_line, results2[i].span.start_line);
3298            assert_eq!(results1[i].span.start_line, results3[i].span.start_line);
3299        }
3300
3301        // Verify sorting (path ascending, then line ascending)
3302        for i in 0..results1.len().saturating_sub(1) {
3303            let curr = &results1[i];
3304            let next = &results1[i + 1];
3305            assert!(
3306                curr.path < next.path
3307                    || (curr.path == next.path && curr.span.start_line <= next.span.start_line)
3308            );
3309        }
3310    }
3311
3312    // ==================== Combined Filter Tests ====================
3313
3314    #[test]
3315    fn test_multiple_filters_combined() {
3316        let temp = TempDir::new().unwrap();
3317        let project = temp.path().join("project");
3318        fs::create_dir_all(project.join("src")).unwrap();
3319
3320        fs::write(project.join("src/main.rs"), "fn test() {}\nstruct Test {}").unwrap();
3321        fs::write(project.join("src/lib.rs"), "fn test() {}").unwrap();
3322        fs::write(project.join("test.js"), "function test() {}").unwrap();
3323
3324        let cache = CacheManager::new(&project);
3325        let indexer = Indexer::new(cache, IndexConfig::default());
3326        indexer.index(&project, false).unwrap();
3327
3328        let cache = CacheManager::new(&project);
3329
3330        let engine = QueryEngine::new(cache);
3331
3332        // Combine language, kind, and file pattern filters
3333        let filter = QueryFilter {
3334            language: Some(Language::Rust),
3335            kind: Some(SymbolKind::Function),
3336            file_pattern: Some("src/main".to_string()),
3337            symbols_mode: true,
3338            ..Default::default()
3339        };
3340        let results = engine.search("test", filter).unwrap();
3341
3342        // Should only find the function in src/main.rs
3343        assert_eq!(results.len(), 1);
3344        assert!(results[0].path.contains("src/main.rs"));
3345        assert_eq!(results[0].kind, SymbolKind::Function);
3346    }
3347
3348    // ==================== Helper Method Tests ====================
3349
3350    #[test]
3351    fn test_find_symbol_helper() {
3352        let temp = TempDir::new().unwrap();
3353        let project = temp.path().join("project");
3354        fs::create_dir(&project).unwrap();
3355
3356        fs::write(project.join("main.rs"), "fn greet() {}").unwrap();
3357
3358        let cache = CacheManager::new(&project);
3359        let indexer = Indexer::new(cache, IndexConfig::default());
3360        indexer.index(&project, false).unwrap();
3361
3362        let cache = CacheManager::new(&project);
3363
3364        let engine = QueryEngine::new(cache);
3365        let results = engine.find_symbol("greet").unwrap();
3366
3367        assert!(results.len() >= 1);
3368        assert_eq!(results[0].kind, SymbolKind::Function);
3369    }
3370
3371    #[test]
3372    fn test_list_by_kind_helper() {
3373        let temp = TempDir::new().unwrap();
3374        let project = temp.path().join("project");
3375        fs::create_dir(&project).unwrap();
3376
3377        fs::write(
3378            project.join("main.rs"),
3379            "struct Point {}\nfn test() {}\nstruct Line {}",
3380        )
3381        .unwrap();
3382
3383        let cache = CacheManager::new(&project);
3384        let indexer = Indexer::new(cache, IndexConfig::default());
3385        indexer.index(&project, false).unwrap();
3386
3387        let cache = CacheManager::new(&project);
3388
3389        let engine = QueryEngine::new(cache);
3390
3391        // Search for structs that contain "oin" (Point contains it, Line doesn't)
3392        let filter = QueryFilter {
3393            kind: Some(SymbolKind::Struct),
3394            symbols_mode: true,
3395            use_contains: true, // "oin" is substring of "Point"
3396            ..Default::default()
3397        };
3398        let results = engine.search("oin", filter).unwrap();
3399
3400        // Should find Point struct
3401        assert!(results.len() >= 1, "Should find at least Point struct");
3402        assert!(results.iter().all(|r| r.kind == SymbolKind::Struct));
3403        assert!(results.iter().any(|r| r.symbol.as_deref() == Some("Point")));
3404    }
3405
3406    // ==================== Metadata Tests ====================
3407
3408    #[test]
3409    fn test_search_with_metadata() {
3410        let temp = TempDir::new().unwrap();
3411        let project = temp.path().join("project");
3412        fs::create_dir(&project).unwrap();
3413
3414        fs::write(project.join("main.rs"), "fn test() {}").unwrap();
3415
3416        let cache = CacheManager::new(&project);
3417        let indexer = Indexer::new(cache, IndexConfig::default());
3418        indexer.index(&project, false).unwrap();
3419
3420        let cache = CacheManager::new(&project);
3421
3422        let engine = QueryEngine::new(cache);
3423        let filter = QueryFilter::default();
3424        let response = engine.search_with_metadata("test", filter).unwrap();
3425
3426        // Check metadata is present (status might be stale if run inside git repo)
3427        assert!(response.results.len() >= 1);
3428        // Note: can_trust_results may be false if running in a git repo without branch index
3429    }
3430
3431    // ==================== Multi-language Tests ====================
3432
3433    #[test]
3434    fn test_search_across_languages() {
3435        let temp = TempDir::new().unwrap();
3436        let project = temp.path().join("project");
3437        fs::create_dir(&project).unwrap();
3438
3439        fs::write(project.join("main.rs"), "fn greet() {}").unwrap();
3440        fs::write(project.join("main.ts"), "function greet() {}").unwrap();
3441        fs::write(project.join("main.py"), "def greet(): pass").unwrap();
3442
3443        let cache = CacheManager::new(&project);
3444        let indexer = Indexer::new(cache, IndexConfig::default());
3445        indexer.index(&project, false).unwrap();
3446
3447        let cache = CacheManager::new(&project);
3448
3449        let engine = QueryEngine::new(cache);
3450        let filter = QueryFilter::default();
3451        let results = engine.search("greet", filter).unwrap();
3452
3453        // Should find greet in all three languages
3454        assert!(results.len() >= 3);
3455        assert!(results.iter().any(|r| r.lang == Language::Rust));
3456        assert!(results.iter().any(|r| r.lang == Language::TypeScript));
3457        assert!(results.iter().any(|r| r.lang == Language::Python));
3458    }
3459}