1pub mod filter;
7pub mod result;
8
9pub use filter::QueryFilter;
10
11use anyhow::{Context, Result};
12use regex::Regex;
13
14use crate::cache::CacheManager;
15use crate::content_store::ContentReader;
16use crate::models::{
17 IndexStatus, IndexWarning, IndexWarningDetails, Language, QueryResponse, SearchResult, Span,
18 SymbolKind,
19};
20use crate::output;
21use crate::parsers::ParserFactory;
22use crate::regex_trigrams::extract_trigrams_from_regex;
23use crate::trigram::TrigramIndex;
24
25pub struct QueryEngine {
27 cache: CacheManager,
28}
29
30impl QueryEngine {
31 pub fn new(cache: CacheManager) -> Self {
33 Self { cache }
34 }
35
36 fn load_dependencies(&self, results: &mut [SearchResult], include_deps: bool) -> Result<()> {
39 if !include_deps || results.is_empty() {
40 return Ok(());
41 }
42
43 log::debug!("Loading dependencies for {} results", results.len());
44
45 let workspace_root = self.cache.path().parent()
49 .ok_or_else(|| anyhow::anyhow!("Cache path has no parent"))?;
50 let cache_for_deps = CacheManager::new(workspace_root);
51 let dep_index = crate::dependency::DependencyIndex::new(cache_for_deps);
52
53 for result in results {
55 let normalized_path = result.path.strip_prefix("./").unwrap_or(&result.path);
57
58 match self.cache.get_file_id(normalized_path) {
60 Ok(Some(file_id)) => {
61 log::debug!("Found file_id={} for path={}", file_id, result.path);
62 match dep_index.get_dependencies_info(file_id) {
64 Ok(dep_infos) => {
65 log::debug!("Loaded {} dependencies for file_id={}", dep_infos.len(), file_id);
66 if !dep_infos.is_empty() {
67 result.dependencies = Some(dep_infos);
68 }
69 }
70 Err(e) => {
71 log::warn!("Failed to get dependencies for file_id={}: {}", file_id, e);
72 }
73 }
74 }
75 Ok(None) => {
76 log::warn!("No file_id found for path: {}", result.path);
77 }
78 Err(e) => {
79 log::warn!("Failed to get file_id for path {}: {}", result.path, e);
80 }
81 }
82 }
83
84 Ok(())
85 }
86
87 fn group_and_load_dependencies(
90 &self,
91 results: Vec<SearchResult>,
92 include_deps: bool,
93 context_lines: usize,
94 ) -> Result<Vec<crate::models::FileGroupedResult>> {
95 use std::collections::HashMap;
96 use crate::models::{FileGroupedResult, MatchResult};
97
98 if results.is_empty() {
99 return Ok(Vec::new());
100 }
101
102 let mut grouped: HashMap<String, Vec<SearchResult>> = HashMap::new();
104 for result in results {
105 grouped
106 .entry(result.path.clone())
107 .or_default()
108 .push(result);
109 }
110
111 let dep_index = if include_deps {
113 let workspace_root = self.cache.path().parent()
114 .ok_or_else(|| anyhow::anyhow!("Cache path has no parent"))?;
115 let cache_for_deps = CacheManager::new(workspace_root);
116 Some(crate::dependency::DependencyIndex::new(cache_for_deps))
117 } else {
118 None
119 };
120
121 let content_path = self.cache.path().join("content.bin");
123 let content_reader_opt = ContentReader::open(&content_path).ok();
124
125 let mut file_results: Vec<FileGroupedResult> = grouped
127 .into_iter()
128 .map(|(path, file_matches)| {
129 let language = file_matches.first().map(|r| r.lang).unwrap_or_default();
131
132 let dependencies = if let Some(dep_idx) = &dep_index {
134 let normalized_path = path.strip_prefix("./").unwrap_or(&path);
135 match self.cache.get_file_id(normalized_path) {
136 Ok(Some(file_id)) => {
137 match dep_idx.get_dependencies_info(file_id) {
138 Ok(dep_infos) if !dep_infos.is_empty() => {
139 log::debug!("Loaded {} dependencies for file: {}", dep_infos.len(), path);
140 Some(dep_infos)
141 }
142 Ok(_) => None,
143 Err(e) => {
144 log::warn!("Failed to get dependencies for {}: {}", path, e);
145 None
146 }
147 }
148 }
149 Ok(None) => {
150 log::warn!("No file_id found for path: {}", path);
151 None
152 }
153 Err(e) => {
154 log::warn!("Failed to get file_id for path {}: {}", path, e);
155 None
156 }
157 }
158 } else {
159 None
160 };
161
162 let normalized_path = path.strip_prefix("./").unwrap_or(&path);
166 let file_id_for_context = if let Some(reader) = &content_reader_opt {
167 reader.get_file_id_by_path(normalized_path)
168 } else {
169 None
170 };
171 log::debug!("Context extraction: file={}, file_id={:?}, content_reader={}",
172 path, file_id_for_context, content_reader_opt.is_some());
173
174 let matches: Vec<MatchResult> = file_matches
176 .into_iter()
177 .map(|r| {
178 let (context_before, context_after) = if context_lines > 0 {
180 if let (Some(reader), Some(fid)) = (&content_reader_opt, file_id_for_context) {
181 let result = reader.get_context_by_line(fid as u32, r.span.start_line, context_lines)
182 .unwrap_or_else(|e| {
183 log::warn!("Failed to extract context for {}:{}: {}", path, r.span.start_line, e);
184 (vec![], vec![])
185 });
186 log::debug!("Extracted context for {}:{} - before: {}, after: {}",
187 path, r.span.start_line, result.0.len(), result.1.len());
188 result
189 } else {
190 if content_reader_opt.is_none() {
191 log::debug!("No ContentReader available for context extraction");
192 }
193 if file_id_for_context.is_none() {
194 log::debug!("No file_id found for {}", path);
195 }
196 (vec![], vec![])
197 }
198 } else {
199 (vec![], vec![])
200 };
201
202 MatchResult {
203 kind: r.kind,
204 symbol: r.symbol,
205 span: r.span,
206 preview: r.preview,
207 context_before,
208 context_after,
209 }
210 })
211 .collect();
212
213 FileGroupedResult {
214 path,
215 language,
216 dependencies,
217 matches,
218 }
219 })
220 .collect();
221
222 file_results.sort_by(|a, b| a.path.cmp(&b.path));
224
225 Ok(file_results)
226 }
227
228 pub fn search_with_metadata(&self, pattern: &str, filter: QueryFilter) -> Result<QueryResponse> {
233 log::info!("Executing query with metadata: pattern='{}', filter={:?}", pattern, filter);
234
235 if !self.cache.exists() {
237 anyhow::bail!(
238 "Index not found. Run 'rfx index' to build the cache first."
239 );
240 }
241
242 if let Err(e) = self.cache.validate() {
244 anyhow::bail!(
245 "Cache appears to be corrupted: {}. Run 'rfx clear' followed by 'rfx index' to rebuild.",
246 e
247 );
248 }
249
250 let (status, can_trust_results, warning) = self.get_index_status()?;
252
253 let (results, total) = self.search_internal(pattern, filter.clone())?;
255
256 use crate::models::PaginationInfo;
258 let pagination = PaginationInfo {
259 total,
260 count: results.len(),
261 offset: filter.offset.unwrap_or(0),
262 limit: filter.limit,
263 has_more: total > filter.offset.unwrap_or(0) + results.len(),
264 };
265
266 let grouped_results = self.group_and_load_dependencies(results, filter.include_dependencies, filter.context_lines)?;
269
270 Ok(QueryResponse {
271 ai_instruction: None, status,
273 can_trust_results,
274 warning,
275 pagination,
276 results: grouped_results,
277 })
278 }
279
280 pub fn search(&self, pattern: &str, filter: QueryFilter) -> Result<Vec<SearchResult>> {
285 log::info!("Executing query: pattern='{}', filter={:?}", pattern, filter);
286
287 if !self.cache.exists() {
289 anyhow::bail!(
290 "Index not found. Run 'rfx index' to build the cache first."
291 );
292 }
293
294 if let Err(e) = self.cache.validate() {
296 anyhow::bail!(
297 "Cache appears to be corrupted: {}. Run 'rfx clear' followed by 'rfx index' to rebuild.",
298 e
299 );
300 }
301
302 self.check_index_freshness(&filter)?;
304
305 let (mut results, _total_count) = self.search_internal(pattern, filter.clone())?;
307
308 self.load_dependencies(&mut results, filter.include_dependencies)?;
310
311 Ok(results)
312 }
313
314 fn search_internal(&self, pattern: &str, filter: QueryFilter) -> Result<(Vec<SearchResult>, usize)> {
317 use std::time::{Duration, Instant};
318
319 let start_time = Instant::now();
321 let timeout = if filter.timeout_secs > 0 {
322 Some(Duration::from_secs(filter.timeout_secs))
323 } else {
324 None
325 };
326
327 let is_keyword_query = if filter.symbols_mode || filter.kind.is_some() {
341 pattern.is_empty() || ParserFactory::get_all_keywords().contains(&pattern)
342 } else {
343 false
344 };
345
346 let mut filter = filter.clone(); if is_keyword_query && filter.kind.is_none() {
351 if let Some(inferred_kind) = Self::keyword_to_kind(pattern) {
352 log::info!("Keyword '{}' mapped to kind {:?} (auto-inferred)", pattern, inferred_kind);
353 filter.kind = Some(inferred_kind);
354 }
355 }
356
357 if !filter.force && !filter.use_regex && !is_keyword_query {
369 let stats = self.cache.stats()?;
370 let total_files = stats.total_files;
371 let pattern_len = pattern.chars().count();
372
373 let large_index_threshold = filter.test_large_index_threshold.unwrap_or(20_000);
378 let short_pattern_threshold = filter.test_short_pattern_threshold.unwrap_or(4);
379
380 if total_files > large_index_threshold && pattern_len < short_pattern_threshold {
381 anyhow::bail!(
382 "Query too broad - would be expensive to execute on this large index\n\
383 \n\
384 This index contains {} files, and pattern '{}' ({} characters) is too short for efficient searching.\n\
385 On large codebases, short patterns can take 10-30+ seconds to complete.\n\
386 \n\
387 This query could:\n\
388 • Hang for an extended period before returning results\n\
389 • Return thousands of results\n\
390 • Flood LLM context windows with excessive data\n\
391 • Fail entirely\n\
392 \n\
393 Suggestions to narrow the query:\n\
394 • Use a longer, more specific pattern (4+ characters recommended for large indexes)\n\
395 • Add a language filter: --lang <language>\n\
396 • Add a file filter: --glob <pattern> or --file <path>\n\
397 • Use --force to bypass this check if you really need all results\n\
398 \n\
399 To force execution anyway:\n\
400 rfx query \"{}\" --force",
401 total_files,
402 pattern,
403 pattern_len,
404 pattern
405 );
406 }
407 }
408
409 let mut results = if is_keyword_query {
411 if let Some(lang) = filter.language {
414 log::info!("Keyword query detected for '{}' - scanning all {:?} files (bypassing trigram search)",
415 pattern, lang);
416 } else {
417 log::info!("Keyword query detected for '{}' - scanning all files (bypassing trigram search)", pattern);
418 }
419 self.get_all_language_files(&filter)?
420 } else if filter.use_regex {
421 self.get_regex_candidates(pattern, timeout.as_ref(), &start_time, filter.suppress_output)?
423 } else {
424 self.get_trigram_candidates(pattern, &filter)?
426 };
427
428 if !is_keyword_query {
434 if let Some(lang) = filter.language {
435 let before_count = results.len();
436 results.retain(|r| r.lang == lang);
437 log::debug!(
438 "Language filter ({:?}): reduced {} candidates to {} candidates",
439 lang,
440 before_count,
441 results.len()
442 );
443 }
444 }
445
446 if !filter.glob_patterns.is_empty() || !filter.exclude_patterns.is_empty() {
450 use globset::{Glob, GlobSetBuilder};
451
452 let include_matcher = if !filter.glob_patterns.is_empty() {
454 let mut builder = GlobSetBuilder::new();
455 for pattern in &filter.glob_patterns {
456 let normalized = Self::normalize_glob_pattern(pattern);
458 match Glob::new(&normalized) {
459 Ok(glob) => {
460 builder.add(glob);
461 }
462 Err(e) => {
463 log::warn!("Invalid glob pattern '{}': {}", pattern, e);
464 }
465 }
466 }
467 match builder.build() {
468 Ok(matcher) => Some(matcher),
469 Err(e) => {
470 log::warn!("Failed to build glob matcher: {}", e);
471 None
472 }
473 }
474 } else {
475 None
476 };
477
478 let exclude_matcher = if !filter.exclude_patterns.is_empty() {
480 let mut builder = GlobSetBuilder::new();
481 for pattern in &filter.exclude_patterns {
482 let normalized = Self::normalize_glob_pattern(pattern);
484 match Glob::new(&normalized) {
485 Ok(glob) => {
486 builder.add(glob);
487 }
488 Err(e) => {
489 log::warn!("Invalid exclude pattern '{}': {}", pattern, e);
490 }
491 }
492 }
493 match builder.build() {
494 Ok(matcher) => Some(matcher),
495 Err(e) => {
496 log::warn!("Failed to build exclude matcher: {}", e);
497 None
498 }
499 }
500 } else {
501 None
502 };
503
504 let before_count = results.len();
506 results.retain(|r| {
507 let included = if let Some(ref matcher) = include_matcher {
509 matcher.is_match(&r.path)
510 } else {
511 true };
513
514 let excluded = if let Some(ref matcher) = exclude_matcher {
516 matcher.is_match(&r.path)
517 } else {
518 false };
520
521 included && !excluded
522 });
523 log::debug!(
524 "Glob filter: reduced {} candidates to {} candidates",
525 before_count,
526 results.len()
527 );
528 }
529
530 if let Some(timeout_duration) = timeout {
532 if start_time.elapsed() > timeout_duration {
533 anyhow::bail!(
534 "Query timeout exceeded ({} seconds).\n\
535 \n\
536 The query took too long to complete. Try one of these approaches:\n\
537 • Use a more specific search pattern (longer patterns = faster search)\n\
538 • Add a language filter with --lang to narrow the search space\n\
539 • Add a file filter with --file to search specific directories\n\
540 • Increase the timeout with --timeout <seconds>\n\
541 \n\
542 Example: rfx query \"{}\" --lang rust --timeout 60",
543 filter.timeout_secs,
544 pattern
545 );
546 }
547 }
548
549 if !filter.force {
552 let candidate_count = results.len();
553 let pattern_len = pattern.chars().count();
554
555 let is_short_pattern = pattern_len < 3 && !filter.use_regex && !is_keyword_query;
558
559 let is_broad_ast = filter.use_ast && filter.glob_patterns.is_empty() && candidate_count >= 100;
562
563 let threshold = if filter.use_ast && filter.glob_patterns.is_empty() {
570 100 } else if filter.use_ast {
572 10_000 } else if is_keyword_query {
574 20_000 } else {
576 50_000 };
578
579 let has_many_candidates = candidate_count > threshold &&
580 (filter.symbols_mode || filter.kind.is_some() || filter.use_ast);
581
582 if is_short_pattern || has_many_candidates || is_broad_ast {
583 let reason = if is_short_pattern {
584 format!("Pattern '{}' is too short ({} characters). Short patterns bypass trigram optimization and require scanning many files.", pattern, pattern_len)
585 } else if is_broad_ast {
586 format!("AST query without --glob restriction will scan the entire codebase ({} files). AST queries are SLOW (500ms-10s+).", candidate_count)
587 } else if is_keyword_query {
588 format!("Keyword query '{}' matched {} files. This query scans all files of the target language, which will take significant time and produce excessive results.", pattern, candidate_count)
589 } else {
590 format!("Query matched {} files. Parsing this many files with --symbols or --kind will take significant time and produce excessive results.", candidate_count)
591 };
592
593 let suggestions = if is_short_pattern {
594 vec![
595 "• Use a longer, more specific pattern (3+ characters recommended)",
596 "• Add a language filter: --lang <language>",
597 "• Add a file path filter: --file <path> or --glob <pattern>",
598 "• Use --force to bypass this check if you really need all results"
599 ]
600 } else if is_broad_ast {
601 vec![
602 "• Add --glob to restrict AST query to specific files: --glob 'src/**/*.rs'",
603 "• Use --symbols instead (10-100x faster in 95% of cases)",
604 "• Use --force to bypass this check if you need a full codebase scan"
605 ]
606 } else if is_keyword_query {
607 vec![
608 "• Add a language filter to reduce files scanned: --lang <language>",
609 "• Add glob patterns to search specific directories: --glob 'src/**/*.rs'",
610 "• Add --kind to filter to specific symbol types: --kind function",
611 "• Use a more specific pattern instead of a keyword",
612 "• Use --force to bypass this check if you need all results"
613 ]
614 } else {
615 vec![
616 "• Add a language filter to reduce candidate set: --lang <language>",
617 "• Add glob patterns to search specific directories: --glob 'src/**/*.rs'",
618 "• Use a more specific search pattern",
619 "• Use --force to bypass this check if you need all results"
620 ]
621 };
622
623 let mut cmd_flags = String::new();
625 if filter.symbols_mode {
626 cmd_flags.push_str("--symbols ");
627 }
628 if let Some(ref lang) = filter.language {
629 cmd_flags.push_str(&format!("--lang {:?} ", lang));
630 }
631 if let Some(ref kind) = filter.kind {
632 cmd_flags.push_str(&format!("--kind {:?} ", kind));
633 }
634 if filter.use_ast {
635 cmd_flags.push_str("--ast ");
636 }
637
638 anyhow::bail!(
639 "Query too broad - would be expensive to execute\n\
640 \n\
641 {}\n\
642 \n\
643 This query could:\n\
644 • Hang for an extended period before returning results\n\
645 • Return thousands of results\n\
646 • Flood LLM context windows with excessive data\n\
647 • Fail entirely\n\
648 \n\
649 Suggestions to narrow the query:\n\
650 {}\n\
651 \n\
652 To force execution anyway:\n\
653 rfx query \"{}\" --force {}",
654 reason,
655 suggestions.join("\n "),
656 pattern,
657 cmd_flags
658 );
659 }
660 }
661
662 if filter.symbols_mode || filter.kind.is_some() || filter.use_ast {
665 results.sort_by(|a, b| {
666 a.path.cmp(&b.path)
667 .then_with(|| a.span.start_line.cmp(&b.span.start_line))
668 });
669
670 let candidate_count = results.len();
672 if candidate_count > 1000 && !filter.suppress_output {
673 output::warn(&format!(
674 "Pattern '{}' matched {} files - parsing may take some time. Consider using --file, --glob, or a more specific pattern to narrow the search.",
675 pattern,
676 candidate_count
677 ));
678 } else if candidate_count > 100 {
679 log::info!("Parsing {} candidate files for symbol extraction", candidate_count);
680 }
681 }
682
683 if filter.use_ast {
685 results = self.enrich_with_ast(results, pattern, filter.language)?;
687 } else if filter.symbols_mode || filter.kind.is_some() {
688 results = self.enrich_with_symbols(results, pattern, &filter)?;
690 }
691
692 if filter.symbols_mode || filter.kind.is_some() {
701 let mut seen = std::collections::HashSet::<(String, usize, Option<String>)>::new();
702 results.retain(|r| seen.insert((r.path.clone(), r.span.start_line, r.symbol.clone())));
703 }
704
705 if let Some(ref kind) = filter.kind {
708 results.retain(|r| {
709 if matches!(kind, SymbolKind::Function) {
710 matches!(r.kind, SymbolKind::Function | SymbolKind::Method)
712 } else {
713 r.kind == *kind
714 }
715 });
716 }
717
718 if let Some(ref file_pattern) = filter.file_pattern {
720 results.retain(|r| r.path.contains(file_pattern));
721 }
722
723 if filter.exact && filter.symbols_mode {
725 results.retain(|r| r.symbol.as_deref() == Some(pattern));
726 }
727
728 if filter.expand {
731 let content_path = self.cache.path().join("content.bin");
733 if let Ok(content_reader) = ContentReader::open(&content_path) {
734 for result in &mut results {
735 if result.span.start_line < result.span.end_line {
737 if let Some(file_id) = Self::find_file_id(&content_reader, &result.path) {
739 if let Ok(content) = content_reader.get_file_content(file_id) {
741 let lines: Vec<&str> = content.lines().collect();
742 let start_idx = (result.span.start_line as usize).saturating_sub(1);
743 let end_idx = (result.span.end_line as usize).min(lines.len());
744
745 if start_idx < end_idx {
746 let full_body = lines[start_idx..end_idx].join("\n");
747 result.preview = full_body;
748 }
749 }
750 }
751 }
752 }
753 }
754 }
755
756 if filter.paths_only {
758 use std::collections::HashSet;
759 let mut seen_paths = HashSet::new();
760 results.retain(|r| seen_paths.insert(r.path.clone()));
761 }
762
763 results.sort_by(|a, b| {
765 a.path.cmp(&b.path)
766 .then_with(|| a.span.start_line.cmp(&b.span.start_line))
767 });
768
769 let total_count = results.len();
772
773 if let Some(offset) = filter.offset {
775 if offset < results.len() {
776 results = results.into_iter().skip(offset).collect();
777 } else {
778 results.clear();
780 }
781 }
782
783 if let Some(limit) = filter.limit {
785 results.truncate(limit);
786 }
787
788 log::info!("Query returned {} results (total before pagination: {})", results.len(), total_count);
789
790 Ok((results, total_count))
791 }
792
793 pub fn find_symbol(&self, name: &str) -> Result<Vec<SearchResult>> {
795 let filter = QueryFilter {
796 symbols_mode: true,
797 ..Default::default()
798 };
799 self.search(name, filter)
800 }
801
802 pub fn search_ast(&self, pattern: &str, lang: Option<Language>) -> Result<Vec<SearchResult>> {
804 let filter = QueryFilter {
805 language: lang,
806 use_ast: true,
807 ..Default::default()
808 };
809
810 self.search(pattern, filter)
811 }
812
813 pub fn search_ast_all_files(&self, ast_pattern: &str, filter: QueryFilter) -> Result<Vec<SearchResult>> {
834 log::info!("Executing AST query on all files: pattern='{}', filter={:?}", ast_pattern, filter);
835
836 let lang = filter.language.ok_or_else(|| anyhow::anyhow!(
838 "Language must be specified for AST pattern matching. Use --lang to specify the language.\n\
839 \n\
840 Example: rfx query \"(function_definition) @fn\" --ast --lang python"
841 ))?;
842
843 if !self.cache.exists() {
845 anyhow::bail!(
846 "Index not found. Run 'rfx index' to build the cache first."
847 );
848 }
849
850 self.check_index_freshness(&filter)?;
852
853 let content_path = self.cache.path().join("content.bin");
855 let content_reader = ContentReader::open(&content_path)
856 .context("Failed to open content store")?;
857
858 use globset::{Glob, GlobSetBuilder};
860
861 let include_matcher = if !filter.glob_patterns.is_empty() {
862 let mut builder = GlobSetBuilder::new();
863 for pattern in &filter.glob_patterns {
864 let normalized = Self::normalize_glob_pattern(pattern);
866 if let Ok(glob) = Glob::new(&normalized) {
867 builder.add(glob);
868 }
869 }
870 builder.build().ok()
871 } else {
872 None
873 };
874
875 let exclude_matcher = if !filter.exclude_patterns.is_empty() {
876 let mut builder = GlobSetBuilder::new();
877 for pattern in &filter.exclude_patterns {
878 let normalized = Self::normalize_glob_pattern(pattern);
880 if let Ok(glob) = Glob::new(&normalized) {
881 builder.add(glob);
882 }
883 }
884 builder.build().ok()
885 } else {
886 None
887 };
888
889 let mut candidates: Vec<SearchResult> = Vec::new();
891
892 for file_id in 0..content_reader.file_count() {
893 let file_path = match content_reader.get_file_path(file_id as u32) {
894 Some(p) => p,
895 None => continue,
896 };
897
898 let ext = file_path.extension()
900 .and_then(|e| e.to_str())
901 .unwrap_or("");
902 let detected_lang = Language::from_extension(ext);
903
904 if detected_lang != lang {
906 continue;
907 }
908
909 let file_path_str = file_path.to_string_lossy().to_string();
910
911 let included = include_matcher.as_ref().map_or(true, |m| m.is_match(&file_path_str));
913 let excluded = exclude_matcher.as_ref().map_or(false, |m| m.is_match(&file_path_str));
914
915 if !included || excluded {
916 continue;
917 }
918
919 candidates.push(SearchResult {
921 path: file_path_str,
922 lang: detected_lang,
923 span: Span { start_line: 1, end_line: 1 },
924 symbol: None,
925 kind: SymbolKind::Unknown("ast_query".to_string()),
926 preview: String::new(),
927 dependencies: None,
928 });
929 }
930
931 log::info!("AST query scanning {} files for language {:?}", candidates.len(), lang);
932
933 if !filter.force && filter.glob_patterns.is_empty() && candidates.len() >= 100 {
936 anyhow::bail!(
937 "Query too broad - would be expensive to execute\n\
938 \n\
939 AST query without --glob restriction will scan the ENTIRE codebase ({} files). AST queries are SLOW (500ms-10s+).\n\
940 \n\
941 This query could:\n\
942 • Hang for an extended period before returning results\n\
943 • Return thousands of results\n\
944 • Flood LLM context windows with excessive data\n\
945 • Fail entirely\n\
946 \n\
947 Suggestions to narrow the query:\n\
948 • Add --glob to restrict AST query to specific files: --glob 'src/**/*.rs'\n\
949 • Use --symbols instead (10-100x faster in 95% of cases)\n\
950 • Use --force to bypass this check if you need a full codebase scan\n\
951 \n\
952 To force execution anyway:\n\
953 rfx query \"{}\" --force --ast --lang {:?}",
954 candidates.len(),
955 ast_pattern,
956 lang
957 );
958 }
959
960 if candidates.is_empty() {
961 if !filter.suppress_output {
962 output::warn(&format!("No files found for language {:?}. Check your language filter or glob patterns.", lang));
963 }
964 return Ok(Vec::new());
965 }
966
967 let mut results = self.enrich_with_ast(candidates, ast_pattern, filter.language)?;
970
971 log::debug!("AST query found {} matches before filtering", results.len());
972
973 if let Some(ref kind) = filter.kind {
977 results.retain(|r| {
978 if matches!(kind, SymbolKind::Function) {
979 matches!(r.kind, SymbolKind::Function | SymbolKind::Method)
980 } else {
981 r.kind == *kind
982 }
983 });
984 }
985
986 if filter.expand {
990 let content_path = self.cache.path().join("content.bin");
991 if let Ok(content_reader) = ContentReader::open(&content_path) {
992 for result in &mut results {
993 if result.span.start_line < result.span.end_line {
994 if let Some(file_id) = Self::find_file_id(&content_reader, &result.path) {
995 if let Ok(content) = content_reader.get_file_content(file_id) {
996 let lines: Vec<&str> = content.lines().collect();
997 let start_idx = (result.span.start_line as usize).saturating_sub(1);
998 let end_idx = (result.span.end_line as usize).min(lines.len());
999
1000 if start_idx < end_idx {
1001 let full_body = lines[start_idx..end_idx].join("\n");
1002 result.preview = full_body;
1003 }
1004 }
1005 }
1006 }
1007 }
1008 }
1009 }
1010
1011 if filter.paths_only {
1013 use std::collections::HashSet;
1014 let mut seen_paths = HashSet::new();
1015 results.retain(|r| seen_paths.insert(r.path.clone()));
1016 }
1017
1018 results.sort_by(|a, b| {
1020 a.path.cmp(&b.path)
1021 .then_with(|| a.span.start_line.cmp(&b.span.start_line))
1022 });
1023
1024 if let Some(offset) = filter.offset {
1026 if offset < results.len() {
1027 results = results.into_iter().skip(offset).collect();
1028 } else {
1029 results.clear();
1030 }
1031 }
1032
1033 if let Some(limit) = filter.limit {
1035 results.truncate(limit);
1036 }
1037
1038 log::info!("AST query returned {} results", results.len());
1039
1040 self.load_dependencies(&mut results, filter.include_dependencies)?;
1042
1043 Ok(results)
1044 }
1045
1046 pub fn search_ast_with_text_filter(
1058 &self,
1059 text_pattern: &str,
1060 ast_pattern: &str,
1061 filter: QueryFilter,
1062 ) -> Result<Vec<SearchResult>> {
1063 log::info!("Executing AST query with text filter: text='{}', ast='{}', filter={:?}",
1064 text_pattern, ast_pattern, filter);
1065
1066 if !self.cache.exists() {
1068 anyhow::bail!(
1069 "Index not found. Run 'rfx index' to build the cache first."
1070 );
1071 }
1072
1073 self.check_index_freshness(&filter)?;
1075
1076 use std::time::{Duration, Instant};
1078 let start_time = Instant::now();
1079 let timeout = if filter.timeout_secs > 0 {
1080 Some(Duration::from_secs(filter.timeout_secs))
1081 } else {
1082 None
1083 };
1084
1085 let candidates = if filter.use_regex {
1087 self.get_regex_candidates(text_pattern, timeout.as_ref(), &start_time, filter.suppress_output)?
1088 } else {
1089 self.get_trigram_candidates(text_pattern, &filter)?
1090 };
1091
1092 log::debug!("Phase 1 found {} candidate locations", candidates.len());
1093
1094 let mut results = self.enrich_with_ast(candidates, ast_pattern, filter.language)?;
1096
1097 log::debug!("Phase 2 AST matching found {} results", results.len());
1098
1099 if let Some(lang) = filter.language {
1101 results.retain(|r| r.lang == lang);
1102 }
1103
1104 if let Some(ref kind) = filter.kind {
1105 results.retain(|r| {
1106 if matches!(kind, SymbolKind::Function) {
1107 matches!(r.kind, SymbolKind::Function | SymbolKind::Method)
1108 } else {
1109 r.kind == *kind
1110 }
1111 });
1112 }
1113
1114 if let Some(ref file_pattern) = filter.file_pattern {
1115 results.retain(|r| r.path.contains(file_pattern));
1116 }
1117
1118 if !filter.glob_patterns.is_empty() || !filter.exclude_patterns.is_empty() {
1120 use globset::{Glob, GlobSetBuilder};
1121
1122 let include_matcher = if !filter.glob_patterns.is_empty() {
1123 let mut builder = GlobSetBuilder::new();
1124 for pattern in &filter.glob_patterns {
1125 let normalized = Self::normalize_glob_pattern(pattern);
1127 if let Ok(glob) = Glob::new(&normalized) {
1128 builder.add(glob);
1129 }
1130 }
1131 builder.build().ok()
1132 } else {
1133 None
1134 };
1135
1136 let exclude_matcher = if !filter.exclude_patterns.is_empty() {
1137 let mut builder = GlobSetBuilder::new();
1138 for pattern in &filter.exclude_patterns {
1139 let normalized = Self::normalize_glob_pattern(pattern);
1141 if let Ok(glob) = Glob::new(&normalized) {
1142 builder.add(glob);
1143 }
1144 }
1145 builder.build().ok()
1146 } else {
1147 None
1148 };
1149
1150 results.retain(|r| {
1151 let included = include_matcher.as_ref().map_or(true, |m| m.is_match(&r.path));
1152 let excluded = exclude_matcher.as_ref().map_or(false, |m| m.is_match(&r.path));
1153 included && !excluded
1154 });
1155 }
1156
1157 if filter.exact && filter.symbols_mode {
1158 results.retain(|r| r.symbol.as_deref() == Some(text_pattern));
1159 }
1160
1161 if filter.expand {
1163 let content_path = self.cache.path().join("content.bin");
1164 if let Ok(content_reader) = ContentReader::open(&content_path) {
1165 for result in &mut results {
1166 if result.span.start_line < result.span.end_line {
1167 if let Some(file_id) = Self::find_file_id(&content_reader, &result.path) {
1168 if let Ok(content) = content_reader.get_file_content(file_id) {
1169 let lines: Vec<&str> = content.lines().collect();
1170 let start_idx = (result.span.start_line as usize).saturating_sub(1);
1171 let end_idx = (result.span.end_line as usize).min(lines.len());
1172
1173 if start_idx < end_idx {
1174 let full_body = lines[start_idx..end_idx].join("\n");
1175 result.preview = full_body;
1176 }
1177 }
1178 }
1179 }
1180 }
1181 }
1182 }
1183
1184 results.sort_by(|a, b| {
1186 a.path.cmp(&b.path)
1187 .then_with(|| a.span.start_line.cmp(&b.span.start_line))
1188 });
1189
1190 if let Some(offset) = filter.offset {
1192 if offset < results.len() {
1193 results = results.into_iter().skip(offset).collect();
1194 } else {
1195 results.clear();
1196 }
1197 }
1198
1199 if let Some(limit) = filter.limit {
1201 results.truncate(limit);
1202 }
1203
1204 log::info!("AST query returned {} results", results.len());
1205
1206 Ok(results)
1207 }
1208
1209 pub fn list_by_kind(&self, kind: SymbolKind) -> Result<Vec<SearchResult>> {
1211 let filter = QueryFilter {
1212 kind: Some(kind),
1213 symbols_mode: true,
1214 ..Default::default()
1215 };
1216
1217 self.search("*", filter)
1218 }
1219
1220 fn enrich_with_symbols(&self, candidates: Vec<SearchResult>, pattern: &str, filter: &QueryFilter) -> Result<Vec<SearchResult>> {
1241 let content_path = self.cache.path().join("content.bin");
1243 let content_reader = ContentReader::open(&content_path)
1244 .context("Failed to open content store")?;
1245
1246 let trigrams_path = self.cache.path().join("trigrams.bin");
1248 let trigram_index = if trigrams_path.exists() {
1249 TrigramIndex::load(&trigrams_path)?
1250 } else {
1251 Self::rebuild_trigram_index(&content_reader)?
1252 };
1253
1254 let symbol_cache = crate::symbol_cache::SymbolCache::open(self.cache.path())
1256 .context("Failed to open symbol cache")?;
1257
1258 let root = self.cache.workspace_root();
1260 let branch = crate::git::get_current_branch(&root)
1261 .unwrap_or_else(|_| "_default".to_string());
1262 let file_hashes = self.cache.load_hashes_for_branch(&branch)
1263 .context("Failed to load file hashes")?;
1264 log::debug!("Loaded {} file hashes for branch '{}' for symbol cache lookups", file_hashes.len(), branch);
1265
1266 use std::collections::HashMap;
1268 let mut files_by_path: HashMap<String, Vec<SearchResult>> = HashMap::new();
1269 let mut skipped_unsupported = 0;
1270
1271 for candidate in candidates {
1272 if !candidate.lang.is_supported() {
1274 skipped_unsupported += 1;
1275 continue;
1276 }
1277
1278 files_by_path
1279 .entry(candidate.path.clone())
1280 .or_insert_with(Vec::new)
1281 .push(candidate);
1282 }
1283
1284 let total_files = files_by_path.len();
1285 log::debug!("Processing {} candidate files for symbol enrichment (skipped {} unsupported language files)",
1286 total_files, skipped_unsupported);
1287
1288 if total_files > 1000 && !filter.suppress_output {
1290 output::warn(&format!(
1291 "Pattern '{}' matched {} files. This may take some time to parse. Consider using a more specific pattern or adding --lang/--file filters to narrow the search.",
1292 pattern,
1293 total_files
1294 ));
1295 }
1296
1297 let mut files_to_process: Vec<String> = files_by_path.keys().cloned().collect();
1299
1300 let mut files_to_skip: std::collections::HashSet<String> = std::collections::HashSet::new();
1303
1304 for file_path in &files_to_process {
1305 let ext = std::path::Path::new(file_path)
1307 .extension()
1308 .and_then(|e| e.to_str())
1309 .unwrap_or("");
1310 let lang = Language::from_extension(ext);
1311
1312 if let Some(line_filter) = crate::line_filter::get_filter(lang) {
1314 let file_id = match Self::find_file_id_by_path(&content_reader, &trigram_index, file_path) {
1316 Some(id) => id,
1317 None => continue,
1318 };
1319
1320 let content = match content_reader.get_file_content(file_id) {
1322 Ok(c) => c,
1323 Err(_) => continue,
1324 };
1325
1326 let mut all_in_non_code = true;
1328 for line in content.lines() {
1329 let mut search_start = 0;
1331 while let Some(pos) = line[search_start..].find(pattern) {
1332 let absolute_pos = search_start + pos;
1333
1334 let in_comment = line_filter.is_in_comment(line, absolute_pos);
1336 let in_string = line_filter.is_in_string(line, absolute_pos);
1337
1338 if !in_comment && !in_string {
1339 all_in_non_code = false;
1341 break;
1342 }
1343
1344 search_start = absolute_pos + pattern.len();
1345 }
1346
1347 if !all_in_non_code {
1348 break;
1349 }
1350 }
1351
1352 if all_in_non_code {
1354 if content.contains(pattern) {
1356 files_to_skip.insert(file_path.clone());
1357 log::debug!("Pre-filter: Skipping {} (all matches in comments/strings)", file_path);
1358 }
1359 }
1360 }
1361 }
1362
1363 files_to_process.retain(|path| !files_to_skip.contains(path));
1365
1366 log::debug!("Pre-filter: Skipped {} files where all matches are in comments/strings (parsing {} files)",
1367 files_to_skip.len(), files_to_process.len());
1368
1369 let num_threads = {
1371 let available_cores = std::thread::available_parallelism()
1372 .map(|n| n.get())
1373 .unwrap_or(4);
1374 ((available_cores as f64 * 0.8).ceil() as usize).max(1).min(8)
1377 };
1378
1379 log::debug!("Using {} threads for parallel symbol extraction (out of {} available cores)",
1380 num_threads,
1381 std::thread::available_parallelism().map(|n| n.get()).unwrap_or(4));
1382
1383 let pool = rayon::ThreadPoolBuilder::new()
1385 .num_threads(num_threads)
1386 .build()
1387 .context("Failed to create thread pool for symbol extraction")?;
1388
1389 let files_with_hashes: Vec<String> = files_to_process
1394 .iter()
1395 .filter(|path| file_hashes.contains_key(path.as_str()))
1396 .cloned()
1397 .collect();
1398
1399 let file_id_map = self.cache.batch_get_file_ids(&files_with_hashes)
1401 .context("Failed to batch lookup file IDs")?;
1402
1403 let file_lookup_tuples: Vec<(i64, String, String)> = files_with_hashes
1405 .iter()
1406 .filter_map(|path| {
1407 let file_id = file_id_map.get(path)?;
1408 let hash = file_hashes.get(path.as_str())?;
1409 Some((*file_id, hash.clone(), path.clone()))
1410 })
1411 .collect();
1412
1413 let batch_results = symbol_cache.batch_get_with_kind(&file_lookup_tuples, filter.kind.clone())
1415 .context("Failed to batch read symbol cache")?;
1416
1417 let mut cached_symbols: HashMap<String, Vec<SearchResult>> = HashMap::new();
1419 let mut files_needing_parse: Vec<String> = Vec::new();
1420
1421 let id_to_path: HashMap<i64, String> = file_id_map
1423 .iter()
1424 .map(|(path, id)| (*id, path.clone()))
1425 .collect();
1426
1427 for (file_id, symbols) in batch_results {
1429 if let Some(file_path) = id_to_path.get(&file_id) {
1430 cached_symbols.insert(file_path.clone(), symbols);
1431 }
1432 }
1433
1434 for path in &files_with_hashes {
1436 if file_id_map.contains_key(path) && !cached_symbols.contains_key(path) {
1437 files_needing_parse.push(path.clone());
1438 }
1439 }
1440
1441 for file_path in &files_to_process {
1443 if !file_hashes.contains_key(file_path.as_str()) {
1444 files_needing_parse.push(file_path.clone());
1445 }
1446 }
1447
1448 log::debug!(
1449 "Symbol cache: {} hits, {} need parsing",
1450 cached_symbols.len(),
1451 files_needing_parse.len()
1452 );
1453
1454 use rayon::prelude::*;
1456
1457 let parsed_symbols: Vec<SearchResult> = pool.install(|| {
1458 files_needing_parse
1459 .par_iter()
1460 .flat_map(|file_path| {
1461 let file_id = match Self::find_file_id_by_path(&content_reader, &trigram_index, file_path) {
1463 Some(id) => id,
1464 None => {
1465 log::warn!("Could not find file_id for path: {}", file_path);
1466 return Vec::new();
1467 }
1468 };
1469
1470 let content = match content_reader.get_file_content(file_id) {
1471 Ok(c) => c,
1472 Err(e) => {
1473 log::warn!("Failed to read file {}: {}", file_path, e);
1474 return Vec::new();
1475 }
1476 };
1477
1478 let ext = std::path::Path::new(file_path)
1480 .extension()
1481 .and_then(|e| e.to_str())
1482 .unwrap_or("");
1483 let lang = Language::from_extension(ext);
1484
1485 let symbols = match ParserFactory::parse(file_path, content, lang) {
1487 Ok(symbols) => {
1488 log::debug!("Parsed {} symbols from {}", symbols.len(), file_path);
1489 symbols
1490 }
1491 Err(e) => {
1492 log::debug!("Failed to parse {}: {}", file_path, e);
1493 Vec::new()
1494 }
1495 };
1496
1497 if let Some(file_hash) = file_hashes.get(file_path.as_str()) {
1499 if let Err(e) = symbol_cache.set(file_path, file_hash, &symbols) {
1500 log::debug!("Failed to cache symbols for {}: {}", file_path, e);
1501 }
1502 }
1503
1504 symbols
1505 })
1506 .collect()
1507 });
1508
1509 let mut all_symbols: Vec<SearchResult> = Vec::new();
1511
1512 for symbols in cached_symbols.values() {
1514 all_symbols.extend_from_slice(symbols);
1515 }
1516
1517 all_symbols.extend(parsed_symbols);
1519
1520 let is_keyword_query = {
1528 let lang_to_check = if let Some(lang) = filter.language {
1530 vec![lang]
1533 } else {
1534 let mut langs: Vec<Language> = all_symbols.iter()
1538 .map(|s| s.lang)
1539 .collect::<Vec<_>>();
1540 langs.sort_by(|a, b| format!("{:?}", a).cmp(&format!("{:?}", b))); langs.dedup(); langs
1543 };
1544
1545 lang_to_check.iter().any(|lang| {
1547 ParserFactory::get_keywords(*lang).contains(&pattern)
1548 })
1549 };
1550
1551 let filtered: Vec<SearchResult> = if is_keyword_query {
1554 log::info!("Pattern '{}' is a language keyword - listing all symbols (kind filtering will be applied in Phase 3)", pattern);
1555 all_symbols
1556 } else if filter.use_regex {
1557 use std::collections::{HashMap, HashSet};
1563 let mut candidate_lines: HashMap<String, HashSet<usize>> = HashMap::new();
1564 for candidate in &files_by_path {
1565 for cand in candidate.1 {
1566 candidate_lines
1567 .entry(candidate.0.clone())
1568 .or_insert_with(HashSet::new)
1569 .insert(cand.span.start_line);
1570 }
1571 }
1572
1573 all_symbols
1575 .into_iter()
1576 .filter(|sym| {
1577 if let Some(lines) = candidate_lines.get(&sym.path) {
1578 for line in sym.span.start_line..=sym.span.end_line {
1580 if lines.contains(&line) {
1581 return true;
1582 }
1583 }
1584 }
1585 false
1586 })
1587 .collect()
1588 } else if filter.use_contains {
1589 all_symbols
1591 .into_iter()
1592 .filter(|sym| sym.symbol.as_deref().map_or(false, |s| s.contains(pattern)))
1593 .collect()
1594 } else {
1595 all_symbols
1597 .into_iter()
1598 .filter(|sym| sym.symbol.as_deref().map_or(false, |s| s == pattern))
1599 .collect()
1600 };
1601
1602 log::info!("Symbol enrichment found {} matches for pattern '{}'", filtered.len(), pattern);
1603
1604 Ok(filtered)
1605 }
1606
1607 fn enrich_with_ast(&self, candidates: Vec<SearchResult>, ast_pattern: &str, language: Option<Language>) -> Result<Vec<SearchResult>> {
1626 let lang = language.ok_or_else(|| anyhow::anyhow!(
1628 "Language must be specified for AST pattern matching. Use --lang to specify the language."
1629 ))?;
1630
1631 let content_path = self.cache.path().join("content.bin");
1633 let content_reader = ContentReader::open(&content_path)
1634 .context("Failed to open content store")?;
1635
1636 let trigrams_path = self.cache.path().join("trigrams.bin");
1638 let trigram_index = if trigrams_path.exists() {
1639 TrigramIndex::load(&trigrams_path)?
1640 } else {
1641 Self::rebuild_trigram_index(&content_reader)?
1642 };
1643
1644 use std::collections::HashMap;
1646 let mut file_contents: HashMap<String, String> = HashMap::new();
1647
1648 for candidate in &candidates {
1649 if file_contents.contains_key(&candidate.path) {
1650 continue;
1651 }
1652
1653 let file_id = match Self::find_file_id_by_path(&content_reader, &trigram_index, &candidate.path) {
1655 Some(id) => id,
1656 None => {
1657 log::warn!("Could not find file_id for path: {}", candidate.path);
1658 continue;
1659 }
1660 };
1661
1662 let content = match content_reader.get_file_content(file_id) {
1664 Ok(c) => c,
1665 Err(e) => {
1666 log::warn!("Failed to read file {}: {}", candidate.path, e);
1667 continue;
1668 }
1669 };
1670
1671 file_contents.insert(candidate.path.clone(), content.to_string());
1672 }
1673
1674 log::debug!("Executing AST query on {} candidate files with language {:?}", file_contents.len(), lang);
1675
1676 let results = crate::ast_query::execute_ast_query(candidates, ast_pattern, lang, &file_contents)?;
1678
1679 log::info!("AST query found {} matches for pattern '{}'", results.len(), ast_pattern);
1680
1681 Ok(results)
1682 }
1683
1684 fn find_file_id_by_path(
1686 content_reader: &ContentReader,
1687 trigram_index: &TrigramIndex,
1688 target_path: &str,
1689 ) -> Option<u32> {
1690 for file_id in 0..trigram_index.file_count() {
1692 if let Some(path) = trigram_index.get_file(file_id as u32) {
1693 if path.to_string_lossy() == target_path {
1694 return Some(file_id as u32);
1695 }
1696 }
1697 }
1698
1699 for file_id in 0..content_reader.file_count() {
1701 if let Some(path) = content_reader.get_file_path(file_id as u32) {
1702 if path.to_string_lossy() == target_path {
1703 return Some(file_id as u32);
1704 }
1705 }
1706 }
1707
1708 None
1709 }
1710
1711 fn keyword_to_kind(keyword: &str) -> Option<SymbolKind> {
1719 filter::keyword_to_kind(keyword)
1720 }
1721
1722 fn get_all_language_files(&self, filter: &QueryFilter) -> Result<Vec<SearchResult>> {
1730 let content_path = self.cache.path().join("content.bin");
1735 let content_reader = ContentReader::open(&content_path)
1736 .context("Failed to open content store")?;
1737
1738 use globset::{Glob, GlobSetBuilder};
1740
1741 let include_matcher = if !filter.glob_patterns.is_empty() {
1742 let mut builder = GlobSetBuilder::new();
1743 for pattern in &filter.glob_patterns {
1744 let normalized = Self::normalize_glob_pattern(pattern);
1745 if let Ok(glob) = Glob::new(&normalized) {
1746 builder.add(glob);
1747 }
1748 }
1749 builder.build().ok()
1750 } else {
1751 None
1752 };
1753
1754 let exclude_matcher = if !filter.exclude_patterns.is_empty() {
1755 let mut builder = GlobSetBuilder::new();
1756 for pattern in &filter.exclude_patterns {
1757 let normalized = Self::normalize_glob_pattern(pattern);
1758 if let Ok(glob) = Glob::new(&normalized) {
1759 builder.add(glob);
1760 }
1761 }
1762 builder.build().ok()
1763 } else {
1764 None
1765 };
1766
1767 let mut candidates: Vec<SearchResult> = Vec::new();
1769
1770 for file_id in 0..content_reader.file_count() {
1771 let file_path = match content_reader.get_file_path(file_id as u32) {
1772 Some(p) => p,
1773 None => continue,
1774 };
1775
1776 let ext = file_path.extension()
1778 .and_then(|e| e.to_str())
1779 .unwrap_or("");
1780 let detected_lang = Language::from_extension(ext);
1781
1782 if let Some(lang) = filter.language {
1784 if detected_lang != lang {
1785 continue;
1786 }
1787 }
1788
1789 let file_path_str = file_path.to_string_lossy().to_string();
1790
1791 let included = include_matcher.as_ref().map_or(true, |m| m.is_match(&file_path_str));
1793 let excluded = exclude_matcher.as_ref().map_or(false, |m| m.is_match(&file_path_str));
1794
1795 if !included || excluded {
1796 continue;
1797 }
1798
1799 if let Some(ref file_pattern) = filter.file_pattern {
1801 if !file_path_str.contains(file_pattern) {
1802 continue;
1803 }
1804 }
1805
1806 candidates.push(SearchResult {
1809 path: file_path_str,
1810 lang: detected_lang,
1811 span: Span { start_line: 1, end_line: 1 },
1812 symbol: None,
1813 kind: SymbolKind::Unknown("keyword_query".to_string()),
1814 preview: String::new(),
1815 dependencies: None,
1816 });
1817 }
1818
1819 if let Some(lang) = filter.language {
1820 log::info!("Keyword query will scan {} {:?} files for symbol extraction", candidates.len(), lang);
1821 } else {
1822 log::info!("Keyword query will scan {} files (all languages) for symbol extraction", candidates.len());
1823 }
1824
1825 Ok(candidates)
1826 }
1827
1828 fn get_trigram_candidates(&self, pattern: &str, filter: &QueryFilter) -> Result<Vec<SearchResult>> {
1830 let content_path = self.cache.path().join("content.bin");
1832 let content_reader = ContentReader::open(&content_path)
1833 .context("Failed to open content store")?;
1834
1835 if pattern.chars().count() < 3 {
1839 log::info!(
1840 "Pattern '{}' is shorter than 3 chars — trigram index cannot be used, \
1841 falling back to linear scan",
1842 pattern
1843 );
1844 return self.linear_scan_candidates(pattern, filter, &content_reader);
1845 }
1846
1847 let trigrams_path = self.cache.path().join("trigrams.bin");
1849 let trigram_index = if trigrams_path.exists() {
1850 match TrigramIndex::load(&trigrams_path) {
1851 Ok(index) => {
1852 log::debug!("Loaded trigram index from disk: {} trigrams, {} files",
1853 index.trigram_count(), index.file_count());
1854 index
1855 }
1856 Err(e) => {
1857 log::warn!("Failed to load trigram index from disk: {}", e);
1858 log::warn!("Rebuilding trigram index from content store...");
1859 Self::rebuild_trigram_index(&content_reader)?
1860 }
1861 }
1862 } else {
1863 log::debug!("trigrams.bin not found, rebuilding from content store");
1864 Self::rebuild_trigram_index(&content_reader)?
1865 };
1866
1867 let candidates = trigram_index.search(pattern);
1869 log::debug!("Found {} candidate locations from trigram search", candidates.len());
1870
1871 let pattern_owned = pattern.to_string();
1873
1874 let compiled_regex = if filter.use_regex {
1876 match Regex::new(&pattern_owned) {
1877 Ok(re) => Some(re),
1878 Err(e) => {
1879 log::error!("Invalid regex pattern '{}': {}", pattern_owned, e);
1880 anyhow::bail!("Invalid regex pattern '{}': {}", pattern_owned, e);
1881 }
1882 }
1883 } else {
1884 None
1885 };
1886
1887 use std::collections::HashMap;
1889 let mut candidates_by_file: HashMap<u32, Vec<crate::trigram::FileLocation>> = HashMap::new();
1890 for loc in candidates {
1891 candidates_by_file
1892 .entry(loc.file_id)
1893 .or_insert_with(Vec::new)
1894 .push(loc);
1895 }
1896
1897 log::debug!("Scanning {} files with trigram matches", candidates_by_file.len());
1898
1899 use rayon::prelude::*;
1901
1902 let results: Vec<SearchResult> = candidates_by_file
1903 .par_iter()
1904 .flat_map(|(file_id, locations)| {
1905 let file_path = match trigram_index.get_file(*file_id) {
1907 Some(p) => p,
1908 None => return Vec::new(),
1909 };
1910
1911 let content = match content_reader.get_file_content(*file_id) {
1912 Ok(c) => c,
1913 Err(_) => return Vec::new(),
1914 };
1915
1916 let file_path_str = file_path.to_string_lossy().to_string();
1917
1918 let ext = file_path.extension()
1920 .and_then(|e| e.to_str())
1921 .unwrap_or("");
1922 let lang = Language::from_extension(ext);
1923
1924 let lines: Vec<&str> = content.lines().collect();
1926
1927 let mut seen_lines: std::collections::HashSet<usize> = std::collections::HashSet::new();
1929 let mut file_results = Vec::new();
1930
1931 for loc in locations {
1933 let line_no = loc.line_no as usize;
1934
1935 if seen_lines.contains(&line_no) {
1937 continue;
1938 }
1939
1940 if line_no == 0 || line_no > lines.len() {
1942 log::debug!("Line {} out of bounds (file has {} lines)", line_no, lines.len());
1943 continue;
1944 }
1945
1946 let line = lines[line_no - 1];
1947
1948 let line_matches = if filter.use_regex {
1953 compiled_regex.as_ref()
1956 .map(|re| re.is_match(line))
1957 .unwrap_or(false)
1958 } else if filter.use_contains {
1959 line.contains(&pattern_owned)
1961 } else {
1962 Self::has_word_boundary_match(line, &pattern_owned)
1964 };
1965
1966 if !line_matches {
1967 continue;
1968 }
1969
1970 seen_lines.insert(line_no);
1971
1972 file_results.push(SearchResult {
1974 path: file_path_str.clone(),
1975 lang: lang.clone(),
1976 kind: SymbolKind::Unknown("text_match".to_string()),
1977 symbol: None, span: Span {
1979 start_line: line_no,
1980 end_line: line_no,
1981 },
1982 preview: line.to_string(),
1983 dependencies: None,
1984 });
1985 }
1986
1987 file_results
1988 })
1989 .collect();
1990
1991 Ok(results)
1992 }
1993
1994 fn linear_scan_candidates(
2001 &self,
2002 pattern: &str,
2003 filter: &QueryFilter,
2004 content_reader: &ContentReader,
2005 ) -> Result<Vec<SearchResult>> {
2006 use rayon::prelude::*;
2007
2008 let pattern_owned = pattern.to_string();
2009 let file_count = content_reader.file_count();
2010
2011 let compiled_regex = if filter.use_regex {
2012 match Regex::new(&pattern_owned) {
2013 Ok(re) => Some(re),
2014 Err(e) => anyhow::bail!("Invalid regex pattern '{}': {}", pattern_owned, e),
2015 }
2016 } else {
2017 None
2018 };
2019
2020 let results: Vec<SearchResult> = (0..file_count as u32)
2021 .collect::<Vec<_>>()
2022 .par_iter()
2023 .flat_map(|&file_id| {
2024 let file_path = match content_reader.get_file_path(file_id) {
2025 Some(p) => p.to_path_buf(),
2026 None => return Vec::new(),
2027 };
2028 let content = match content_reader.get_file_content(file_id) {
2029 Ok(c) => c,
2030 Err(_) => return Vec::new(),
2031 };
2032
2033 let file_path_str = file_path.to_string_lossy().to_string();
2034 let ext = file_path.extension().and_then(|e| e.to_str()).unwrap_or("");
2035 let lang = Language::from_extension(ext);
2036
2037 let mut seen_lines = std::collections::HashSet::new();
2038 let mut file_results = Vec::new();
2039
2040 for (line_idx, line) in content.lines().enumerate() {
2041 let line_no = line_idx + 1;
2042 if seen_lines.contains(&line_no) {
2043 continue;
2044 }
2045
2046 let line_matches = if filter.use_regex {
2047 compiled_regex.as_ref().map(|re| re.is_match(line)).unwrap_or(false)
2048 } else if filter.use_contains {
2049 line.contains(&pattern_owned)
2050 } else {
2051 Self::has_word_boundary_match(line, &pattern_owned)
2052 };
2053
2054 if !line_matches {
2055 continue;
2056 }
2057
2058 seen_lines.insert(line_no);
2059 file_results.push(SearchResult {
2060 path: file_path_str.clone(),
2061 lang: lang.clone(),
2062 kind: SymbolKind::Unknown("text_match".to_string()),
2063 symbol: None,
2064 span: Span { start_line: line_no, end_line: line_no },
2065 preview: line.to_string(),
2066 dependencies: None,
2067 });
2068 }
2069
2070 file_results
2071 })
2072 .collect();
2073
2074 log::info!(
2075 "Linear scan (short pattern '{}') found {} results across {} files",
2076 pattern, results.len(), file_count
2077 );
2078 Ok(results)
2079 }
2080
2081 fn get_regex_candidates(&self, pattern: &str, timeout: Option<&std::time::Duration>, start_time: &std::time::Instant, suppress_output: bool) -> Result<Vec<SearchResult>> {
2105 let regex = Regex::new(pattern)
2107 .with_context(|| format!("Invalid regex pattern: {}", pattern))?;
2108
2109 if let Some(timeout_duration) = timeout {
2111 if start_time.elapsed() > *timeout_duration {
2112 anyhow::bail!(
2113 "Query timeout exceeded ({} seconds) during regex compilation",
2114 timeout_duration.as_secs()
2115 );
2116 }
2117 }
2118
2119 let trigrams = extract_trigrams_from_regex(pattern);
2121
2122 let content_path = self.cache.path().join("content.bin");
2124 let content_reader = ContentReader::open(&content_path)
2125 .context("Failed to open content store")?;
2126
2127 let mut results = Vec::new();
2128
2129 if trigrams.is_empty() {
2130 if !suppress_output {
2132 output::warn(&format!(
2133 "Regex pattern '{}' has no literals (≥3 chars), falling back to full content scan. This may be slow on large codebases. Consider using patterns with literal text.",
2134 pattern
2135 ));
2136 }
2137
2138 for file_id in 0..content_reader.file_count() {
2140 let file_path = content_reader.get_file_path(file_id as u32)
2141 .context("Invalid file_id")?;
2142 let content = content_reader.get_file_content(file_id as u32)?;
2143
2144 self.find_regex_matches_in_file(
2145 ®ex,
2146 file_path,
2147 content,
2148 &mut results,
2149 )?;
2150 }
2151 } else {
2152 log::debug!("Using {} trigrams to narrow regex search candidates", trigrams.len());
2154
2155 let trigrams_path = self.cache.path().join("trigrams.bin");
2157 let trigram_index = if trigrams_path.exists() {
2158 TrigramIndex::load(&trigrams_path)?
2159 } else {
2160 Self::rebuild_trigram_index(&content_reader)?
2161 };
2162
2163 use crate::regex_trigrams::extract_literal_sequences;
2165 let literals = extract_literal_sequences(pattern);
2166
2167 if literals.is_empty() {
2168 log::warn!("Regex extraction found trigrams but no literal sequences - this shouldn't happen");
2169 for file_id in 0..content_reader.file_count() {
2171 let file_path = content_reader.get_file_path(file_id as u32)
2172 .context("Invalid file_id")?;
2173 let content = content_reader.get_file_content(file_id as u32)?;
2174 self.find_regex_matches_in_file(®ex, file_path, content, &mut results)?;
2175 }
2176 } else {
2177 use std::collections::HashSet;
2182 let mut candidate_files: HashSet<u32> = HashSet::new();
2183
2184 for literal in &literals {
2185 let candidates = trigram_index.search(literal);
2187 let file_ids: HashSet<u32> = candidates.iter().map(|loc| loc.file_id).collect();
2188
2189 log::debug!("Literal '{}' found in {} files", literal, file_ids.len());
2190
2191 candidate_files.extend(file_ids);
2194 }
2195
2196 let final_candidates = candidate_files;
2197 log::debug!("After union: searching {} files that contain any literal", final_candidates.len());
2198
2199 for &file_id in &final_candidates {
2201 let file_path = trigram_index.get_file(file_id)
2202 .context("Invalid file_id from trigram search")?;
2203 let content = content_reader.get_file_content(file_id)?;
2204
2205 self.find_regex_matches_in_file(
2206 ®ex,
2207 file_path,
2208 content,
2209 &mut results,
2210 )?;
2211 }
2212 }
2213 }
2214
2215 log::info!("Regex search found {} matches for pattern '{}'", results.len(), pattern);
2216 Ok(results)
2217 }
2218
2219 fn find_regex_matches_in_file(
2221 &self,
2222 regex: &Regex,
2223 file_path: &std::path::Path,
2224 content: &str,
2225 results: &mut Vec<SearchResult>,
2226 ) -> Result<()> {
2227 let file_path_str = file_path.to_string_lossy().to_string();
2228
2229 let ext = file_path.extension()
2231 .and_then(|e| e.to_str())
2232 .unwrap_or("");
2233 let lang = Language::from_extension(ext);
2234
2235 for (line_idx, line) in content.lines().enumerate() {
2237 if regex.is_match(line) {
2238 let line_no = line_idx + 1;
2239
2240 results.push(SearchResult {
2247 path: file_path_str.clone(),
2248 lang: lang.clone(),
2249 kind: SymbolKind::Unknown("regex_match".to_string()),
2250 symbol: None, span: Span {
2252 start_line: line_no,
2253 end_line: line_no,
2254 },
2255 preview: line.to_string(),
2256 dependencies: None,
2257 });
2258 }
2259 }
2260
2261 Ok(())
2262 }
2263
2264 fn find_file_id(content_reader: &ContentReader, target_path: &str) -> Option<u32> {
2265 result::find_file_id(content_reader, target_path)
2266 }
2267
2268 fn rebuild_trigram_index(content_reader: &ContentReader) -> Result<TrigramIndex> {
2269 result::rebuild_trigram_index(content_reader)
2270 }
2271
2272 fn normalize_glob_pattern(pattern: &str) -> String {
2273 result::normalize_glob_pattern(pattern)
2274 }
2275
2276 fn has_word_boundary_match(line: &str, pattern: &str) -> bool {
2277 filter::has_word_boundary_match(line, pattern)
2278 }
2279
2280 pub fn get_index_status(&self) -> Result<(IndexStatus, bool, Option<IndexWarning>)> {
2285 let root = self.cache.workspace_root();
2286
2287 if crate::git::is_git_repo(&root) {
2289 if let Ok(current_branch) = crate::git::get_current_branch(&root) {
2290 if !self.cache.branch_exists(¤t_branch).unwrap_or(false) {
2292 let warning = IndexWarning {
2293 reason: format!("Branch '{}' has not been indexed", current_branch),
2294 action_required: "rfx index".to_string(),
2295 files_modified: None,
2296 details: Some(IndexWarningDetails {
2297 current_branch: Some(current_branch),
2298 indexed_branch: None,
2299 current_commit: None,
2300 indexed_commit: None,
2301 }),
2302 };
2303 return Ok((IndexStatus::Stale, false, Some(warning)));
2304 }
2305
2306 if let (Ok(current_commit), Ok(branch_info)) =
2308 (crate::git::get_current_commit(&root), self.cache.get_branch_info(¤t_branch)) {
2309
2310 if branch_info.commit_sha != current_commit {
2311 let warning = IndexWarning {
2312 reason: format!(
2313 "Commit changed from {} to {}",
2314 &branch_info.commit_sha[..7],
2315 ¤t_commit[..7]
2316 ),
2317 action_required: "rfx index".to_string(),
2318 files_modified: None,
2319 details: Some(IndexWarningDetails {
2320 current_branch: Some(current_branch.clone()),
2321 indexed_branch: Some(current_branch.clone()),
2322 current_commit: Some(current_commit.clone()),
2323 indexed_commit: Some(branch_info.commit_sha.clone()),
2324 }),
2325 };
2326 return Ok((IndexStatus::Stale, false, Some(warning)));
2327 }
2328
2329 if let Ok(branch_files) = self.cache.get_branch_files(¤t_branch) {
2331 let mut checked = 0;
2332 let mut changed = 0;
2333 const SAMPLE_SIZE: usize = 10;
2334
2335 for (path, _indexed_hash) in branch_files.iter().take(SAMPLE_SIZE) {
2336 checked += 1;
2337 let file_path = std::path::Path::new(path);
2338
2339 if let Ok(metadata) = std::fs::metadata(file_path) {
2340 if let Ok(modified) = metadata.modified() {
2341 let indexed_time = branch_info.last_indexed;
2342 let file_time = modified.duration_since(std::time::UNIX_EPOCH)
2343 .unwrap_or_default()
2344 .as_secs() as i64;
2345
2346 if file_time > indexed_time {
2347 changed += 1;
2350 }
2351 }
2352 }
2353 }
2354
2355 if changed > 0 {
2356 let warning = IndexWarning {
2357 reason: format!("{} of {} sampled files modified", changed, checked),
2358 action_required: "rfx index".to_string(),
2359 files_modified: Some(changed as u32),
2360 details: Some(IndexWarningDetails {
2361 current_branch: Some(current_branch.clone()),
2362 indexed_branch: Some(branch_info.branch.clone()),
2363 current_commit: Some(current_commit.clone()),
2364 indexed_commit: Some(branch_info.commit_sha.clone()),
2365 }),
2366 };
2367 return Ok((IndexStatus::Stale, false, Some(warning)));
2368 }
2369 }
2370
2371 return Ok((IndexStatus::Fresh, true, None));
2373 }
2374 }
2375 }
2376
2377 Ok((IndexStatus::Fresh, true, None))
2379 }
2380
2381 fn check_index_freshness(&self, filter: &QueryFilter) -> Result<()> {
2388 let root = self.cache.workspace_root();
2389
2390 if crate::git::is_git_repo(&root) {
2392 if !crate::git::is_git_available() {
2393 static WARNED: std::sync::OnceLock<()> = std::sync::OnceLock::new();
2394 if !filter.suppress_output {
2395 WARNED.get_or_init(|| {
2396 output::warn("⚠️ git binary not found in PATH; index freshness checks disabled for this session.");
2397 });
2398 }
2399 return Ok(());
2400 }
2401 if let Ok(current_branch) = crate::git::get_current_branch(&root) {
2402 if !self.cache.branch_exists(¤t_branch).unwrap_or(false) {
2404 if !filter.suppress_output {
2405 output::warn(&format!("⚠️ WARNING: Index not found for branch '{}'. Run 'rfx index' to index this branch.", current_branch));
2406 }
2407 return Ok(());
2408 }
2409
2410 if let (Ok(current_commit), Ok(branch_info)) =
2412 (crate::git::get_current_commit(&root), self.cache.get_branch_info(¤t_branch)) {
2413
2414 if branch_info.commit_sha != current_commit {
2415 if !filter.suppress_output {
2416 output::warn(&format!("⚠️ WARNING: Index may be stale (commit changed: {} → {}). Consider running 'rfx index'.",
2417 &branch_info.commit_sha[..7], ¤t_commit[..7]));
2418 }
2419 return Ok(());
2420 }
2421
2422 if let Ok(branch_files) = self.cache.get_branch_files(¤t_branch) {
2425 let mut checked = 0;
2426 let mut changed = 0;
2427 const SAMPLE_SIZE: usize = 10;
2428
2429 for (path, _indexed_hash) in branch_files.iter().take(SAMPLE_SIZE) {
2430 checked += 1;
2431 let file_path = std::path::Path::new(path);
2432
2433 if let Ok(metadata) = std::fs::metadata(file_path) {
2435 if let Ok(modified) = metadata.modified() {
2436 let indexed_time = branch_info.last_indexed;
2437 let file_time = modified.duration_since(std::time::UNIX_EPOCH)
2438 .unwrap_or_default()
2439 .as_secs() as i64;
2440
2441 if file_time > indexed_time {
2443 changed += 1;
2448 }
2449 }
2450 }
2451 }
2452
2453 if changed > 0 && !filter.suppress_output {
2454 output::warn(&format!("⚠️ WARNING: {} of {} sampled files changed since indexing. Consider running 'rfx index'.", changed, checked));
2455 }
2456 }
2457 }
2458 }
2459 }
2460
2461 Ok(())
2462 }
2463}
2464
2465pub fn generate_ai_instruction(
2470 result_count: usize,
2471 total_count: usize,
2472 has_more: bool,
2473 symbols_mode: bool,
2474 paths_only: bool,
2475 use_ast: bool,
2476 use_regex: bool,
2477 language_filter: bool,
2478 glob_filter: bool,
2479 exact_mode: bool,
2480) -> Option<String> {
2481 if result_count == 0 {
2483 return Some(
2484 "No results found. Consider these alternatives: 1) Check pattern spelling, 2) Remove --kind or --lang filters to broaden search, 3) Try partial match or related term, 4) Use search_regex tool for pattern matching with special characters or complex patterns."
2485 .to_string()
2486 );
2487 }
2488
2489 if total_count >= 500 {
2491 return Some(
2492 format!("Query too broad: {} results found. STOP. Do not list results. Refine search automatically by adding filters: kind parameter (Function/Struct/Class), lang parameter (rust/python/etc), or glob parameter (['src/**/*.rs']). Call search_code again with appropriate filters.", total_count)
2493 );
2494 }
2495
2496 if has_more {
2498 return Some(
2499 format!("Showing {} of {} results. PAGINATED - there are more results available. Do not automatically fetch all results. Show current page, ask user if these results answer their question before fetching more with --offset parameter.", result_count, total_count)
2500 );
2501 }
2502
2503 if result_count == 1 && symbols_mode {
2505 return Some(
2506 "Found 1 precise result. Respond concisely: '[symbol] at [path]:[line]'.".to_string()
2507 );
2508 }
2509
2510 if result_count >= 2 && result_count <= 10 && symbols_mode {
2512 return Some(
2513 format!("Found {} precise results (definitions only, not usages). List locations concisely: '[symbol] at [path]:[line]' for each result.", result_count)
2514 );
2515 }
2516
2517 if total_count >= 101 && total_count < 500 {
2519 return Some(
2520 format!("Found {} results - this is broad. Suggest refining search with: kind parameter (Function/Struct/Class/etc), lang parameter (rust/python/etc), or glob parameter to narrow file scope.", total_count)
2521 );
2522 }
2523
2524 if result_count >= 100 && !symbols_mode {
2526 return Some(
2527 format!("Found {} results in full-text search mode (includes definitions AND all usages). Consider using symbols=true parameter to filter to definitions only. This typically reduces results by 80-90%.", result_count)
2528 );
2529 }
2530
2531 if paths_only {
2533 return Some(
2534 format!("Found {} unique files (paths-only mode - no code content included). Next step: Use Read tool on specific files that look relevant based on their paths.", result_count)
2535 );
2536 }
2537
2538 if use_ast {
2540 return Some(
2541 format!("Found {} results using AST pattern matching. These are structure-based matches using Tree-sitter patterns, not text search.", result_count)
2542 );
2543 }
2544
2545 if use_regex && result_count >= 100 {
2547 return Some(
2548 format!("Found {} results using regex pattern matching. Regex matches are expansive. Consider using exact text search or symbols mode for more precise results.", result_count)
2549 );
2550 }
2551
2552 if language_filter && result_count <= 5 {
2554 return Some(
2555 format!("Found {} results with language filter active. Results are limited to this language only. Remove lang parameter if you want to search all languages.", result_count)
2556 );
2557 }
2558
2559 if glob_filter && result_count <= 10 {
2561 return Some(
2562 format!("Found {} results with glob filter active. Results are limited to matching paths. Remove glob parameter to search entire codebase.", result_count)
2563 );
2564 }
2565
2566 if exact_mode && result_count <= 5 {
2568 return Some(
2569 format!("Found {} results in exact match mode. Only exact symbol name matches are included. Remove exact parameter to allow substring matching.", result_count)
2570 );
2571 }
2572
2573 None
2575}
2576
2577#[cfg(test)]
2578mod tests {
2579 use super::*;
2580 use crate::indexer::Indexer;
2581 use crate::models::IndexConfig;
2582 use std::fs;
2583 use tempfile::TempDir;
2584
2585 #[test]
2588 fn test_query_engine_creation() {
2589 let temp = TempDir::new().unwrap();
2590 let cache = CacheManager::new(temp.path());
2591 let engine = QueryEngine::new(cache);
2592
2593 assert!(engine.cache.path().ends_with(".reflex"));
2594 }
2595
2596 #[test]
2597 fn test_filter_modes() {
2598 let filter_fulltext = QueryFilter::default();
2600 assert!(!filter_fulltext.symbols_mode);
2601
2602 let filter_symbols = QueryFilter {
2603 symbols_mode: true,
2604 ..Default::default()
2605 };
2606 assert!(filter_symbols.symbols_mode);
2607
2608 let filter_with_kind = QueryFilter {
2610 kind: Some(SymbolKind::Function),
2611 symbols_mode: true,
2612 ..Default::default()
2613 };
2614 assert!(filter_with_kind.symbols_mode);
2615 }
2616
2617 #[test]
2620 fn test_fulltext_search() {
2621 let temp = TempDir::new().unwrap();
2622 let project = temp.path().join("project");
2623 fs::create_dir(&project).unwrap();
2624
2625 fs::write(project.join("main.rs"), "fn main() {\n println!(\"hello\");\n}").unwrap();
2627 fs::write(project.join("lib.rs"), "pub fn hello() {}").unwrap();
2628
2629 let cache = CacheManager::new(&project);
2631 let indexer = Indexer::new(cache, IndexConfig::default());
2632 indexer.index(&project, false).unwrap();
2633
2634 let cache = CacheManager::new(&project);
2636 let engine = QueryEngine::new(cache);
2637 let filter = QueryFilter::default(); let results = engine.search("hello", filter).unwrap();
2639
2640 assert!(results.len() >= 2);
2642 assert!(results.iter().any(|r| r.path.contains("main.rs")));
2643 assert!(results.iter().any(|r| r.path.contains("lib.rs")));
2644 }
2645
2646 #[test]
2647 fn test_symbol_search() {
2648 let temp = TempDir::new().unwrap();
2649 let project = temp.path().join("project");
2650 fs::create_dir(&project).unwrap();
2651
2652 fs::write(
2654 project.join("main.rs"),
2655 "fn greet() {}\nfn main() {\n greet();\n}"
2656 ).unwrap();
2657
2658 let cache = CacheManager::new(&project);
2660 let indexer = Indexer::new(cache, IndexConfig::default());
2661 indexer.index(&project, false).unwrap();
2662
2663 let cache = CacheManager::new(&project);
2664
2665 let engine = QueryEngine::new(cache);
2667 let filter = QueryFilter {
2668 symbols_mode: true,
2669 ..Default::default()
2670 };
2671 let results = engine.search("greet", filter).unwrap();
2672
2673 assert!(results.len() >= 1);
2675 assert!(results.iter().any(|r| r.kind == SymbolKind::Function));
2676 }
2677
2678 #[test]
2679 fn test_regex_search() {
2680 let temp = TempDir::new().unwrap();
2681 let project = temp.path().join("project");
2682 fs::create_dir(&project).unwrap();
2683
2684 fs::write(
2685 project.join("main.rs"),
2686 "fn test1() {}\nfn test2() {}\nfn other() {}"
2687 ).unwrap();
2688
2689 let cache = CacheManager::new(&project);
2690 let indexer = Indexer::new(cache, IndexConfig::default());
2691 indexer.index(&project, false).unwrap();
2692
2693 let cache = CacheManager::new(&project);
2694
2695 let engine = QueryEngine::new(cache);
2696 let filter = QueryFilter {
2697 use_regex: true,
2698 ..Default::default()
2699 };
2700 let results = engine.search(r"fn test\d", filter).unwrap();
2701
2702 assert_eq!(results.len(), 2);
2704 assert!(results.iter().all(|r| r.preview.contains("test")));
2705 }
2706
2707 #[test]
2710 fn test_language_filter() {
2711 let temp = TempDir::new().unwrap();
2712 let project = temp.path().join("project");
2713 fs::create_dir(&project).unwrap();
2714
2715 fs::write(project.join("main.rs"), "fn main() {}").unwrap();
2716 fs::write(project.join("main.js"), "function main() {}").unwrap();
2717
2718 let cache = CacheManager::new(&project);
2719 let indexer = Indexer::new(cache, IndexConfig::default());
2720 indexer.index(&project, false).unwrap();
2721
2722 let cache = CacheManager::new(&project);
2723
2724 let engine = QueryEngine::new(cache);
2725
2726 let filter = QueryFilter {
2728 language: Some(Language::Rust),
2729 ..Default::default()
2730 };
2731 let results = engine.search("main", filter).unwrap();
2732
2733 assert!(results.iter().all(|r| r.lang == Language::Rust));
2734 assert!(results.iter().all(|r| r.path.ends_with(".rs")));
2735 }
2736
2737 #[test]
2738 fn test_kind_filter() {
2739 let temp = TempDir::new().unwrap();
2740 let project = temp.path().join("project");
2741 fs::create_dir(&project).unwrap();
2742
2743 fs::write(
2744 project.join("main.rs"),
2745 "struct Point {}\nfn main() {}\nimpl Point { fn new() {} }"
2746 ).unwrap();
2747
2748 let cache = CacheManager::new(&project);
2749 let indexer = Indexer::new(cache, IndexConfig::default());
2750 indexer.index(&project, false).unwrap();
2751
2752 let cache = CacheManager::new(&project);
2753
2754 let engine = QueryEngine::new(cache);
2755
2756 let filter = QueryFilter {
2758 symbols_mode: true,
2759 kind: Some(SymbolKind::Function),
2760 use_contains: true, ..Default::default()
2762 };
2763 let results = engine.search("mai", filter).unwrap();
2765
2766 assert!(results.len() > 0, "Should find at least one result");
2768 assert!(results.iter().any(|r| r.symbol.as_deref() == Some("main")), "Should find 'main' function");
2769 }
2770
2771 #[test]
2772 fn test_file_pattern_filter() {
2773 let temp = TempDir::new().unwrap();
2774 let project = temp.path().join("project");
2775 fs::create_dir_all(project.join("src")).unwrap();
2776 fs::create_dir_all(project.join("tests")).unwrap();
2777
2778 fs::write(project.join("src/lib.rs"), "fn foo() {}").unwrap();
2779 fs::write(project.join("tests/test.rs"), "fn foo() {}").unwrap();
2780
2781 let cache = CacheManager::new(&project);
2782 let indexer = Indexer::new(cache, IndexConfig::default());
2783 indexer.index(&project, false).unwrap();
2784
2785 let cache = CacheManager::new(&project);
2786
2787 let engine = QueryEngine::new(cache);
2788
2789 let filter = QueryFilter {
2791 file_pattern: Some("src/".to_string()),
2792 ..Default::default()
2793 };
2794 let results = engine.search("foo", filter).unwrap();
2795
2796 assert!(results.iter().all(|r| r.path.contains("src/")));
2797 assert!(!results.iter().any(|r| r.path.contains("tests/")));
2798 }
2799
2800 #[test]
2801 fn test_limit_filter() {
2802 let temp = TempDir::new().unwrap();
2803 let project = temp.path().join("project");
2804 fs::create_dir(&project).unwrap();
2805
2806 let content = (0..20).map(|i| format!("fn test{}() {{}}", i)).collect::<Vec<_>>().join("\n");
2808 fs::write(project.join("main.rs"), content).unwrap();
2809
2810 let cache = CacheManager::new(&project);
2811 let indexer = Indexer::new(cache, IndexConfig::default());
2812 indexer.index(&project, false).unwrap();
2813
2814 let cache = CacheManager::new(&project);
2815
2816 let engine = QueryEngine::new(cache);
2817
2818 let filter = QueryFilter {
2820 limit: Some(5),
2821 use_contains: true, ..Default::default()
2823 };
2824 let results = engine.search("test", filter).unwrap();
2825
2826 assert_eq!(results.len(), 5);
2827 }
2828
2829 #[test]
2830 fn test_exact_match_filter() {
2831 let temp = TempDir::new().unwrap();
2832 let project = temp.path().join("project");
2833 fs::create_dir(&project).unwrap();
2834
2835 fs::write(
2836 project.join("main.rs"),
2837 "fn test() {}\nfn test_helper() {}\nfn other_test() {}"
2838 ).unwrap();
2839
2840 let cache = CacheManager::new(&project);
2841 let indexer = Indexer::new(cache, IndexConfig::default());
2842 indexer.index(&project, false).unwrap();
2843
2844 let cache = CacheManager::new(&project);
2845
2846 let engine = QueryEngine::new(cache);
2847
2848 let filter = QueryFilter {
2850 symbols_mode: true,
2851 exact: true,
2852 ..Default::default()
2853 };
2854 let results = engine.search("test", filter).unwrap();
2855
2856 assert_eq!(results.len(), 1);
2858 assert_eq!(results[0].symbol.as_deref(), Some("test"));
2859 }
2860
2861 #[test]
2864 fn test_expand_mode() {
2865 let temp = TempDir::new().unwrap();
2866 let project = temp.path().join("project");
2867 fs::create_dir(&project).unwrap();
2868
2869 fs::write(
2870 project.join("main.rs"),
2871 "fn greet() {\n println!(\"Hello\");\n println!(\"World\");\n}"
2872 ).unwrap();
2873
2874 let cache = CacheManager::new(&project);
2875 let indexer = Indexer::new(cache, IndexConfig::default());
2876 indexer.index(&project, false).unwrap();
2877
2878 let cache = CacheManager::new(&project);
2879
2880 let engine = QueryEngine::new(cache);
2881
2882 let filter = QueryFilter {
2884 symbols_mode: true,
2885 expand: true,
2886 ..Default::default()
2887 };
2888 let results = engine.search("greet", filter).unwrap();
2889
2890 assert!(results.len() >= 1);
2892 let result = &results[0];
2893 assert!(result.preview.contains("println"));
2894 }
2895
2896 #[test]
2899 fn test_search_empty_index() {
2900 let temp = TempDir::new().unwrap();
2901 let project = temp.path().join("project");
2902 fs::create_dir(&project).unwrap();
2903
2904 let cache = CacheManager::new(&project);
2905 let indexer = Indexer::new(cache, IndexConfig::default());
2906 indexer.index(&project, false).unwrap();
2907
2908 let cache = CacheManager::new(&project);
2909
2910 let engine = QueryEngine::new(cache);
2911 let filter = QueryFilter::default();
2912 let results = engine.search("nonexistent", filter).unwrap();
2913
2914 assert_eq!(results.len(), 0);
2915 }
2916
2917 #[test]
2918 fn test_search_no_index() {
2919 let temp = TempDir::new().unwrap();
2920 let project = temp.path().join("project");
2921 fs::create_dir(&project).unwrap();
2922
2923 let cache = CacheManager::new(&project);
2924 let engine = QueryEngine::new(cache);
2925 let filter = QueryFilter::default();
2926
2927 assert!(engine.search("test", filter).is_err());
2929 }
2930
2931 #[test]
2932 fn test_search_special_characters() {
2933 let temp = TempDir::new().unwrap();
2934 let project = temp.path().join("project");
2935 fs::create_dir(&project).unwrap();
2936
2937 fs::write(project.join("main.rs"), "let x = 42;\nlet y = x + 1;").unwrap();
2938
2939 let cache = CacheManager::new(&project);
2940 let indexer = Indexer::new(cache, IndexConfig::default());
2941 indexer.index(&project, false).unwrap();
2942
2943 let cache = CacheManager::new(&project);
2944
2945 let engine = QueryEngine::new(cache);
2946 let filter = QueryFilter::default();
2947
2948 let results = engine.search("x + ", filter).unwrap();
2950 assert!(results.len() >= 1);
2951 }
2952
2953 #[test]
2954 fn test_search_unicode() {
2955 let temp = TempDir::new().unwrap();
2956 let project = temp.path().join("project");
2957 fs::create_dir(&project).unwrap();
2958
2959 fs::write(project.join("main.rs"), "// 你好世界\nfn main() {}").unwrap();
2960
2961 let cache = CacheManager::new(&project);
2962 let indexer = Indexer::new(cache, IndexConfig::default());
2963 indexer.index(&project, false).unwrap();
2964
2965 let cache = CacheManager::new(&project);
2966
2967 let engine = QueryEngine::new(cache);
2968 let filter = QueryFilter {
2969 use_contains: true, force: true, ..Default::default()
2972 };
2973
2974 let results = engine.search("你好", filter).unwrap();
2976 assert!(results.len() >= 1);
2977 }
2978
2979 #[test]
2980 fn test_case_sensitive_search() {
2981 let temp = TempDir::new().unwrap();
2982 let project = temp.path().join("project");
2983 fs::create_dir(&project).unwrap();
2984
2985 fs::write(project.join("main.rs"), "fn Test() {}\nfn test() {}").unwrap();
2986
2987 let cache = CacheManager::new(&project);
2988 let indexer = Indexer::new(cache, IndexConfig::default());
2989 indexer.index(&project, false).unwrap();
2990
2991 let cache = CacheManager::new(&project);
2992
2993 let engine = QueryEngine::new(cache);
2994 let filter = QueryFilter::default();
2995
2996 let results = engine.search("Test", filter).unwrap();
2998 assert!(results.iter().any(|r| r.preview.contains("Test()")));
2999 }
3000
3001 #[test]
3004 fn test_results_sorted_deterministically() {
3005 let temp = TempDir::new().unwrap();
3006 let project = temp.path().join("project");
3007 fs::create_dir(&project).unwrap();
3008
3009 fs::write(project.join("a.rs"), "fn test() {}").unwrap();
3010 fs::write(project.join("z.rs"), "fn test() {}").unwrap();
3011 fs::write(project.join("m.rs"), "fn test() {}\nfn test2() {}").unwrap();
3012
3013 let cache = CacheManager::new(&project);
3014 let indexer = Indexer::new(cache, IndexConfig::default());
3015 indexer.index(&project, false).unwrap();
3016
3017 let cache = CacheManager::new(&project);
3018
3019 let engine = QueryEngine::new(cache);
3020 let filter = QueryFilter::default();
3021
3022 let results1 = engine.search("test", filter.clone()).unwrap();
3024 let results2 = engine.search("test", filter.clone()).unwrap();
3025 let results3 = engine.search("test", filter).unwrap();
3026
3027 assert_eq!(results1.len(), results2.len());
3029 assert_eq!(results1.len(), results3.len());
3030
3031 for i in 0..results1.len() {
3032 assert_eq!(results1[i].path, results2[i].path);
3033 assert_eq!(results1[i].path, results3[i].path);
3034 assert_eq!(results1[i].span.start_line, results2[i].span.start_line);
3035 assert_eq!(results1[i].span.start_line, results3[i].span.start_line);
3036 }
3037
3038 for i in 0..results1.len().saturating_sub(1) {
3040 let curr = &results1[i];
3041 let next = &results1[i + 1];
3042 assert!(
3043 curr.path < next.path ||
3044 (curr.path == next.path && curr.span.start_line <= next.span.start_line)
3045 );
3046 }
3047 }
3048
3049 #[test]
3052 fn test_multiple_filters_combined() {
3053 let temp = TempDir::new().unwrap();
3054 let project = temp.path().join("project");
3055 fs::create_dir_all(project.join("src")).unwrap();
3056
3057 fs::write(project.join("src/main.rs"), "fn test() {}\nstruct Test {}").unwrap();
3058 fs::write(project.join("src/lib.rs"), "fn test() {}").unwrap();
3059 fs::write(project.join("test.js"), "function test() {}").unwrap();
3060
3061 let cache = CacheManager::new(&project);
3062 let indexer = Indexer::new(cache, IndexConfig::default());
3063 indexer.index(&project, false).unwrap();
3064
3065 let cache = CacheManager::new(&project);
3066
3067 let engine = QueryEngine::new(cache);
3068
3069 let filter = QueryFilter {
3071 language: Some(Language::Rust),
3072 kind: Some(SymbolKind::Function),
3073 file_pattern: Some("src/main".to_string()),
3074 symbols_mode: true,
3075 ..Default::default()
3076 };
3077 let results = engine.search("test", filter).unwrap();
3078
3079 assert_eq!(results.len(), 1);
3081 assert!(results[0].path.contains("src/main.rs"));
3082 assert_eq!(results[0].kind, SymbolKind::Function);
3083 }
3084
3085 #[test]
3088 fn test_find_symbol_helper() {
3089 let temp = TempDir::new().unwrap();
3090 let project = temp.path().join("project");
3091 fs::create_dir(&project).unwrap();
3092
3093 fs::write(project.join("main.rs"), "fn greet() {}").unwrap();
3094
3095 let cache = CacheManager::new(&project);
3096 let indexer = Indexer::new(cache, IndexConfig::default());
3097 indexer.index(&project, false).unwrap();
3098
3099 let cache = CacheManager::new(&project);
3100
3101 let engine = QueryEngine::new(cache);
3102 let results = engine.find_symbol("greet").unwrap();
3103
3104 assert!(results.len() >= 1);
3105 assert_eq!(results[0].kind, SymbolKind::Function);
3106 }
3107
3108 #[test]
3109 fn test_list_by_kind_helper() {
3110 let temp = TempDir::new().unwrap();
3111 let project = temp.path().join("project");
3112 fs::create_dir(&project).unwrap();
3113
3114 fs::write(
3115 project.join("main.rs"),
3116 "struct Point {}\nfn test() {}\nstruct Line {}"
3117 ).unwrap();
3118
3119 let cache = CacheManager::new(&project);
3120 let indexer = Indexer::new(cache, IndexConfig::default());
3121 indexer.index(&project, false).unwrap();
3122
3123 let cache = CacheManager::new(&project);
3124
3125 let engine = QueryEngine::new(cache);
3126
3127 let filter = QueryFilter {
3129 kind: Some(SymbolKind::Struct),
3130 symbols_mode: true,
3131 use_contains: true, ..Default::default()
3133 };
3134 let results = engine.search("oin", filter).unwrap();
3135
3136 assert!(results.len() >= 1, "Should find at least Point struct");
3138 assert!(results.iter().all(|r| r.kind == SymbolKind::Struct));
3139 assert!(results.iter().any(|r| r.symbol.as_deref() == Some("Point")));
3140 }
3141
3142 #[test]
3145 fn test_search_with_metadata() {
3146 let temp = TempDir::new().unwrap();
3147 let project = temp.path().join("project");
3148 fs::create_dir(&project).unwrap();
3149
3150 fs::write(project.join("main.rs"), "fn test() {}").unwrap();
3151
3152 let cache = CacheManager::new(&project);
3153 let indexer = Indexer::new(cache, IndexConfig::default());
3154 indexer.index(&project, false).unwrap();
3155
3156 let cache = CacheManager::new(&project);
3157
3158 let engine = QueryEngine::new(cache);
3159 let filter = QueryFilter::default();
3160 let response = engine.search_with_metadata("test", filter).unwrap();
3161
3162 assert!(response.results.len() >= 1);
3164 }
3166
3167 #[test]
3170 fn test_search_across_languages() {
3171 let temp = TempDir::new().unwrap();
3172 let project = temp.path().join("project");
3173 fs::create_dir(&project).unwrap();
3174
3175 fs::write(project.join("main.rs"), "fn greet() {}").unwrap();
3176 fs::write(project.join("main.ts"), "function greet() {}").unwrap();
3177 fs::write(project.join("main.py"), "def greet(): pass").unwrap();
3178
3179 let cache = CacheManager::new(&project);
3180 let indexer = Indexer::new(cache, IndexConfig::default());
3181 indexer.index(&project, false).unwrap();
3182
3183 let cache = CacheManager::new(&project);
3184
3185 let engine = QueryEngine::new(cache);
3186 let filter = QueryFilter::default();
3187 let results = engine.search("greet", filter).unwrap();
3188
3189 assert!(results.len() >= 3);
3191 assert!(results.iter().any(|r| r.lang == Language::Rust));
3192 assert!(results.iter().any(|r| r.lang == Language::TypeScript));
3193 assert!(results.iter().any(|r| r.lang == Language::Python));
3194 }
3195}