1pub mod filter;
7pub mod result;
8
9pub use filter::QueryFilter;
10
11use anyhow::{Context, Result};
12use regex::Regex;
13
14use crate::cache::CacheManager;
15use crate::content_store::ContentReader;
16use crate::models::{
17 IndexStatus, IndexWarning, IndexWarningDetails, Language, QueryResponse, SearchResult, Span,
18 SymbolKind,
19};
20use crate::output;
21use crate::parsers::ParserFactory;
22use crate::regex_trigrams::extract_trigrams_from_regex;
23use crate::trigram::TrigramIndex;
24
25pub struct QueryEngine {
27 cache: CacheManager,
28}
29
30impl QueryEngine {
31 pub fn new(cache: CacheManager) -> Self {
33 Self { cache }
34 }
35
36 fn load_dependencies(&self, results: &mut [SearchResult], include_deps: bool) -> Result<()> {
39 if !include_deps || results.is_empty() {
40 return Ok(());
41 }
42
43 log::debug!("Loading dependencies for {} results", results.len());
44
45 let workspace_root = self
49 .cache
50 .path()
51 .parent()
52 .ok_or_else(|| anyhow::anyhow!("Cache path has no parent"))?;
53 let cache_for_deps = CacheManager::new(workspace_root);
54 let dep_index = crate::dependency::DependencyIndex::new(cache_for_deps);
55
56 for result in results {
58 let normalized_path = result.path.strip_prefix("./").unwrap_or(&result.path);
60
61 match self.cache.get_file_id(normalized_path) {
63 Ok(Some(file_id)) => {
64 log::debug!("Found file_id={} for path={}", file_id, result.path);
65 match dep_index.get_dependencies_info(file_id) {
67 Ok(dep_infos) => {
68 log::debug!(
69 "Loaded {} dependencies for file_id={}",
70 dep_infos.len(),
71 file_id
72 );
73 if !dep_infos.is_empty() {
74 result.dependencies = Some(dep_infos);
75 }
76 }
77 Err(e) => {
78 log::warn!("Failed to get dependencies for file_id={}: {}", file_id, e);
79 }
80 }
81 }
82 Ok(None) => {
83 log::warn!("No file_id found for path: {}", result.path);
84 }
85 Err(e) => {
86 log::warn!("Failed to get file_id for path {}: {}", result.path, e);
87 }
88 }
89 }
90
91 Ok(())
92 }
93
94 fn group_and_load_dependencies(
97 &self,
98 results: Vec<SearchResult>,
99 include_deps: bool,
100 context_lines: usize,
101 ) -> Result<Vec<crate::models::FileGroupedResult>> {
102 use crate::models::{FileGroupedResult, MatchResult};
103 use std::collections::HashMap;
104
105 if results.is_empty() {
106 return Ok(Vec::new());
107 }
108
109 let mut grouped: HashMap<String, Vec<SearchResult>> = HashMap::new();
111 for result in results {
112 grouped.entry(result.path.clone()).or_default().push(result);
113 }
114
115 let dep_index = if include_deps {
117 let workspace_root = self
118 .cache
119 .path()
120 .parent()
121 .ok_or_else(|| anyhow::anyhow!("Cache path has no parent"))?;
122 let cache_for_deps = CacheManager::new(workspace_root);
123 Some(crate::dependency::DependencyIndex::new(cache_for_deps))
124 } else {
125 None
126 };
127
128 let content_path = self.cache.path().join("content.bin");
130 let content_reader_opt = ContentReader::open(&content_path).ok();
131
132 let mut file_results: Vec<FileGroupedResult> = grouped
134 .into_iter()
135 .map(|(path, file_matches)| {
136 let language = file_matches.first().map(|r| r.lang).unwrap_or_default();
138
139 let dependencies = if let Some(dep_idx) = &dep_index {
141 let normalized_path = path.strip_prefix("./").unwrap_or(&path);
142 match self.cache.get_file_id(normalized_path) {
143 Ok(Some(file_id)) => match dep_idx.get_dependencies_info(file_id) {
144 Ok(dep_infos) if !dep_infos.is_empty() => {
145 log::debug!(
146 "Loaded {} dependencies for file: {}",
147 dep_infos.len(),
148 path
149 );
150 Some(dep_infos)
151 }
152 Ok(_) => None,
153 Err(e) => {
154 log::warn!("Failed to get dependencies for {}: {}", path, e);
155 None
156 }
157 },
158 Ok(None) => {
159 log::warn!("No file_id found for path: {}", path);
160 None
161 }
162 Err(e) => {
163 log::warn!("Failed to get file_id for path {}: {}", path, e);
164 None
165 }
166 }
167 } else {
168 None
169 };
170
171 let normalized_path = path.strip_prefix("./").unwrap_or(&path);
175 let file_id_for_context = if let Some(reader) = &content_reader_opt {
176 reader.get_file_id_by_path(normalized_path)
177 } else {
178 None
179 };
180 log::debug!(
181 "Context extraction: file={}, file_id={:?}, content_reader={}",
182 path,
183 file_id_for_context,
184 content_reader_opt.is_some()
185 );
186
187 let matches: Vec<MatchResult> = file_matches
189 .into_iter()
190 .map(|r| {
191 let (context_before, context_after) = if context_lines > 0 {
193 if let (Some(reader), Some(fid)) =
194 (&content_reader_opt, file_id_for_context)
195 {
196 let result = reader
197 .get_context_by_line(
198 fid as u32,
199 r.span.start_line,
200 context_lines,
201 )
202 .unwrap_or_else(|e| {
203 log::warn!(
204 "Failed to extract context for {}:{}: {}",
205 path,
206 r.span.start_line,
207 e
208 );
209 (vec![], vec![])
210 });
211 log::debug!(
212 "Extracted context for {}:{} - before: {}, after: {}",
213 path,
214 r.span.start_line,
215 result.0.len(),
216 result.1.len()
217 );
218 result
219 } else {
220 if content_reader_opt.is_none() {
221 log::debug!(
222 "No ContentReader available for context extraction"
223 );
224 }
225 if file_id_for_context.is_none() {
226 log::debug!("No file_id found for {}", path);
227 }
228 (vec![], vec![])
229 }
230 } else {
231 (vec![], vec![])
232 };
233
234 MatchResult {
235 kind: r.kind,
236 symbol: r.symbol,
237 span: r.span,
238 preview: r.preview,
239 context_before,
240 context_after,
241 }
242 })
243 .collect();
244
245 FileGroupedResult {
246 path,
247 language,
248 dependencies,
249 matches,
250 }
251 })
252 .collect();
253
254 file_results.sort_by(|a, b| a.path.cmp(&b.path));
256
257 Ok(file_results)
258 }
259
260 pub fn search_with_metadata(
265 &self,
266 pattern: &str,
267 filter: QueryFilter,
268 ) -> Result<QueryResponse> {
269 log::info!(
270 "Executing query with metadata: pattern='{}', filter={:?}",
271 pattern,
272 filter
273 );
274
275 if !self.cache.exists() {
277 anyhow::bail!("Index not found. Run 'rfx index' to build the cache first.");
278 }
279
280 if let Err(e) = self.cache.validate() {
282 anyhow::bail!(
283 "Cache appears to be corrupted: {}. Run 'rfx clear' followed by 'rfx index' to rebuild.",
284 e
285 );
286 }
287
288 let (status, can_trust_results, warning) = self.get_index_status()?;
290
291 let (results, total) = self.search_internal(pattern, filter.clone())?;
293
294 use crate::models::PaginationInfo;
296 let pagination = PaginationInfo {
297 total,
298 count: results.len(),
299 offset: filter.offset.unwrap_or(0),
300 limit: filter.limit,
301 has_more: total > filter.offset.unwrap_or(0) + results.len(),
302 };
303
304 let grouped_results = self.group_and_load_dependencies(
307 results,
308 filter.include_dependencies,
309 filter.context_lines,
310 )?;
311
312 Ok(QueryResponse {
313 ai_instruction: None, status,
315 can_trust_results,
316 warning,
317 pagination,
318 results: grouped_results,
319 })
320 }
321
322 pub fn search(&self, pattern: &str, filter: QueryFilter) -> Result<Vec<SearchResult>> {
327 log::info!(
328 "Executing query: pattern='{}', filter={:?}",
329 pattern,
330 filter
331 );
332
333 if !self.cache.exists() {
335 anyhow::bail!("Index not found. Run 'rfx index' to build the cache first.");
336 }
337
338 if let Err(e) = self.cache.validate() {
340 anyhow::bail!(
341 "Cache appears to be corrupted: {}. Run 'rfx clear' followed by 'rfx index' to rebuild.",
342 e
343 );
344 }
345
346 self.check_index_freshness(&filter)?;
348
349 let (mut results, _total_count) = self.search_internal(pattern, filter.clone())?;
351
352 self.load_dependencies(&mut results, filter.include_dependencies)?;
354
355 Ok(results)
356 }
357
358 fn search_internal(
361 &self,
362 pattern: &str,
363 filter: QueryFilter,
364 ) -> Result<(Vec<SearchResult>, usize)> {
365 use std::time::{Duration, Instant};
366
367 let start_time = Instant::now();
369 let timeout = if filter.timeout_secs > 0 {
370 Some(Duration::from_secs(filter.timeout_secs))
371 } else {
372 None
373 };
374
375 let is_keyword_query = if filter.symbols_mode || filter.kind.is_some() {
389 pattern.is_empty() || ParserFactory::get_all_keywords().contains(&pattern)
390 } else {
391 false
392 };
393
394 let mut filter = filter.clone(); if is_keyword_query && filter.kind.is_none() {
399 if let Some(inferred_kind) = Self::keyword_to_kind(pattern) {
400 log::info!(
401 "Keyword '{}' mapped to kind {:?} (auto-inferred)",
402 pattern,
403 inferred_kind
404 );
405 filter.kind = Some(inferred_kind);
406 }
407 }
408
409 if !filter.force && !filter.use_regex && !is_keyword_query {
421 let stats = self.cache.stats()?;
422 let total_files = stats.total_files;
423 let pattern_len = pattern.chars().count();
424
425 let large_index_threshold = filter.test_large_index_threshold.unwrap_or(20_000);
430 let short_pattern_threshold = filter.test_short_pattern_threshold.unwrap_or(4);
431
432 if total_files > large_index_threshold && pattern_len < short_pattern_threshold {
433 anyhow::bail!(
434 "Query too broad - would be expensive to execute on this large index\n\
435 \n\
436 This index contains {} files, and pattern '{}' ({} characters) is too short for efficient searching.\n\
437 On large codebases, short patterns can take 10-30+ seconds to complete.\n\
438 \n\
439 This query could:\n\
440 • Hang for an extended period before returning results\n\
441 • Return thousands of results\n\
442 • Flood LLM context windows with excessive data\n\
443 • Fail entirely\n\
444 \n\
445 Suggestions to narrow the query:\n\
446 • Use a longer, more specific pattern (4+ characters recommended for large indexes)\n\
447 • Add a language filter: --lang <language>\n\
448 • Add a file filter: --glob <pattern> or --file <path>\n\
449 • Use --force to bypass this check if you really need all results\n\
450 \n\
451 To force execution anyway:\n\
452 rfx query \"{}\" --force",
453 total_files,
454 pattern,
455 pattern_len,
456 pattern
457 );
458 }
459 }
460
461 let mut results = if is_keyword_query {
463 if let Some(lang) = filter.language {
466 log::info!(
467 "Keyword query detected for '{}' - scanning all {:?} files (bypassing trigram search)",
468 pattern,
469 lang
470 );
471 } else {
472 log::info!(
473 "Keyword query detected for '{}' - scanning all files (bypassing trigram search)",
474 pattern
475 );
476 }
477 self.get_all_language_files(&filter)?
478 } else if filter.use_regex {
479 self.get_regex_candidates(
481 pattern,
482 timeout.as_ref(),
483 &start_time,
484 filter.suppress_output,
485 )?
486 } else {
487 self.get_trigram_candidates(pattern, &filter)?
489 };
490
491 if !is_keyword_query {
497 if let Some(lang) = filter.language {
498 let before_count = results.len();
499 results.retain(|r| r.lang == lang);
500 log::debug!(
501 "Language filter ({:?}): reduced {} candidates to {} candidates",
502 lang,
503 before_count,
504 results.len()
505 );
506 }
507 }
508
509 if !filter.glob_patterns.is_empty() || !filter.exclude_patterns.is_empty() {
513 use globset::{Glob, GlobSetBuilder};
514
515 let include_matcher = if !filter.glob_patterns.is_empty() {
517 let mut builder = GlobSetBuilder::new();
518 for pattern in &filter.glob_patterns {
519 let normalized = Self::normalize_glob_pattern(pattern);
521 match Glob::new(&normalized) {
522 Ok(glob) => {
523 builder.add(glob);
524 }
525 Err(e) => {
526 log::warn!("Invalid glob pattern '{}': {}", pattern, e);
527 }
528 }
529 }
530 match builder.build() {
531 Ok(matcher) => Some(matcher),
532 Err(e) => {
533 log::warn!("Failed to build glob matcher: {}", e);
534 None
535 }
536 }
537 } else {
538 None
539 };
540
541 let exclude_matcher = if !filter.exclude_patterns.is_empty() {
543 let mut builder = GlobSetBuilder::new();
544 for pattern in &filter.exclude_patterns {
545 let normalized = Self::normalize_glob_pattern(pattern);
547 match Glob::new(&normalized) {
548 Ok(glob) => {
549 builder.add(glob);
550 }
551 Err(e) => {
552 log::warn!("Invalid exclude pattern '{}': {}", pattern, e);
553 }
554 }
555 }
556 match builder.build() {
557 Ok(matcher) => Some(matcher),
558 Err(e) => {
559 log::warn!("Failed to build exclude matcher: {}", e);
560 None
561 }
562 }
563 } else {
564 None
565 };
566
567 let before_count = results.len();
569 results.retain(|r| {
570 let included = if let Some(ref matcher) = include_matcher {
572 matcher.is_match(&r.path)
573 } else {
574 true };
576
577 let excluded = if let Some(ref matcher) = exclude_matcher {
579 matcher.is_match(&r.path)
580 } else {
581 false };
583
584 included && !excluded
585 });
586 log::debug!(
587 "Glob filter: reduced {} candidates to {} candidates",
588 before_count,
589 results.len()
590 );
591 }
592
593 if let Some(timeout_duration) = timeout {
595 if start_time.elapsed() > timeout_duration {
596 anyhow::bail!(
597 "Query timeout exceeded ({} seconds).\n\
598 \n\
599 The query took too long to complete. Try one of these approaches:\n\
600 • Use a more specific search pattern (longer patterns = faster search)\n\
601 • Add a language filter with --lang to narrow the search space\n\
602 • Add a file filter with --file to search specific directories\n\
603 • Increase the timeout with --timeout <seconds>\n\
604 \n\
605 Example: rfx query \"{}\" --lang rust --timeout 60",
606 filter.timeout_secs,
607 pattern
608 );
609 }
610 }
611
612 if !filter.force {
615 let candidate_count = results.len();
616 let pattern_len = pattern.chars().count();
617
618 let is_short_pattern = pattern_len < 3 && !filter.use_regex && !is_keyword_query;
621
622 let is_broad_ast =
625 filter.use_ast && filter.glob_patterns.is_empty() && candidate_count >= 100;
626
627 let threshold = if filter.use_ast && filter.glob_patterns.is_empty() {
634 100 } else if filter.use_ast {
636 10_000 } else if is_keyword_query {
638 20_000 } else {
640 50_000 };
642
643 let has_many_candidates = candidate_count > threshold
644 && (filter.symbols_mode || filter.kind.is_some() || filter.use_ast);
645
646 if is_short_pattern || has_many_candidates || is_broad_ast {
647 let reason = if is_short_pattern {
648 format!(
649 "Pattern '{}' is too short ({} characters). Short patterns bypass trigram optimization and require scanning many files.",
650 pattern, pattern_len
651 )
652 } else if is_broad_ast {
653 format!(
654 "AST query without --glob restriction will scan the entire codebase ({} files). AST queries are SLOW (500ms-10s+).",
655 candidate_count
656 )
657 } else if is_keyword_query {
658 format!(
659 "Keyword query '{}' matched {} files. This query scans all files of the target language, which will take significant time and produce excessive results.",
660 pattern, candidate_count
661 )
662 } else {
663 format!(
664 "Query matched {} files. Parsing this many files with --symbols or --kind will take significant time and produce excessive results.",
665 candidate_count
666 )
667 };
668
669 let suggestions = if is_short_pattern {
670 vec![
671 "• Use a longer, more specific pattern (3+ characters recommended)",
672 "• Add a language filter: --lang <language>",
673 "• Add a file path filter: --file <path> or --glob <pattern>",
674 "• Use --force to bypass this check if you really need all results",
675 ]
676 } else if is_broad_ast {
677 vec![
678 "• Add --glob to restrict AST query to specific files: --glob 'src/**/*.rs'",
679 "• Use --symbols instead (10-100x faster in 95% of cases)",
680 "• Use --force to bypass this check if you need a full codebase scan",
681 ]
682 } else if is_keyword_query {
683 vec![
684 "• Add a language filter to reduce files scanned: --lang <language>",
685 "• Add glob patterns to search specific directories: --glob 'src/**/*.rs'",
686 "• Add --kind to filter to specific symbol types: --kind function",
687 "• Use a more specific pattern instead of a keyword",
688 "• Use --force to bypass this check if you need all results",
689 ]
690 } else {
691 vec![
692 "• Add a language filter to reduce candidate set: --lang <language>",
693 "• Add glob patterns to search specific directories: --glob 'src/**/*.rs'",
694 "• Use a more specific search pattern",
695 "• Use --force to bypass this check if you need all results",
696 ]
697 };
698
699 let mut cmd_flags = String::new();
701 if filter.symbols_mode {
702 cmd_flags.push_str("--symbols ");
703 }
704 if let Some(ref lang) = filter.language {
705 cmd_flags.push_str(&format!("--lang {:?} ", lang));
706 }
707 if let Some(ref kind) = filter.kind {
708 cmd_flags.push_str(&format!("--kind {:?} ", kind));
709 }
710 if filter.use_ast {
711 cmd_flags.push_str("--ast ");
712 }
713
714 anyhow::bail!(
715 "Query too broad - would be expensive to execute\n\
716 \n\
717 {}\n\
718 \n\
719 This query could:\n\
720 • Hang for an extended period before returning results\n\
721 • Return thousands of results\n\
722 • Flood LLM context windows with excessive data\n\
723 • Fail entirely\n\
724 \n\
725 Suggestions to narrow the query:\n\
726 {}\n\
727 \n\
728 To force execution anyway:\n\
729 rfx query \"{}\" --force {}",
730 reason,
731 suggestions.join("\n "),
732 pattern,
733 cmd_flags
734 );
735 }
736 }
737
738 if filter.symbols_mode || filter.kind.is_some() || filter.use_ast {
741 results.sort_by(|a, b| {
742 a.path
743 .cmp(&b.path)
744 .then_with(|| a.span.start_line.cmp(&b.span.start_line))
745 });
746
747 let candidate_count = results.len();
749 if candidate_count > 1000 && !filter.suppress_output {
750 output::warn(&format!(
751 "Pattern '{}' matched {} files - parsing may take some time. Consider using --file, --glob, or a more specific pattern to narrow the search.",
752 pattern, candidate_count
753 ));
754 } else if candidate_count > 100 {
755 log::info!(
756 "Parsing {} candidate files for symbol extraction",
757 candidate_count
758 );
759 }
760 }
761
762 if filter.use_ast {
764 results = self.enrich_with_ast(results, pattern, filter.language)?;
766 } else if filter.symbols_mode || filter.kind.is_some() {
767 results = self.enrich_with_symbols(results, pattern, &filter)?;
769 }
770
771 if filter.symbols_mode || filter.kind.is_some() {
780 let mut seen = std::collections::HashSet::<(String, usize, Option<String>)>::new();
781 results.retain(|r| seen.insert((r.path.clone(), r.span.start_line, r.symbol.clone())));
782 }
783
784 if let Some(ref kind) = filter.kind {
787 results.retain(|r| {
788 if matches!(kind, SymbolKind::Function) {
789 matches!(r.kind, SymbolKind::Function | SymbolKind::Method)
791 } else {
792 r.kind == *kind
793 }
794 });
795 }
796
797 if let Some(ref file_pattern) = filter.file_pattern {
799 results.retain(|r| r.path.contains(file_pattern));
800 }
801
802 if filter.exact && filter.symbols_mode {
804 results.retain(|r| r.symbol.as_deref() == Some(pattern));
805 }
806
807 if filter.expand {
810 let content_path = self.cache.path().join("content.bin");
812 if let Ok(content_reader) = ContentReader::open(&content_path) {
813 for result in &mut results {
814 if result.span.start_line < result.span.end_line {
816 if let Some(file_id) = Self::find_file_id(&content_reader, &result.path) {
818 if let Ok(content) = content_reader.get_file_content(file_id) {
820 let lines: Vec<&str> = content.lines().collect();
821 let start_idx = (result.span.start_line as usize).saturating_sub(1);
822 let end_idx = (result.span.end_line as usize).min(lines.len());
823
824 if start_idx < end_idx {
825 let full_body = lines[start_idx..end_idx].join("\n");
826 result.preview = full_body;
827 }
828 }
829 }
830 }
831 }
832 }
833 }
834
835 if filter.paths_only {
837 use std::collections::HashSet;
838 let mut seen_paths = HashSet::new();
839 results.retain(|r| seen_paths.insert(r.path.clone()));
840 }
841
842 results.sort_by(|a, b| {
844 a.path
845 .cmp(&b.path)
846 .then_with(|| a.span.start_line.cmp(&b.span.start_line))
847 });
848
849 let total_count = results.len();
852
853 if let Some(offset) = filter.offset {
855 if offset < results.len() {
856 results = results.into_iter().skip(offset).collect();
857 } else {
858 results.clear();
860 }
861 }
862
863 if let Some(limit) = filter.limit {
865 results.truncate(limit);
866 }
867
868 log::info!(
869 "Query returned {} results (total before pagination: {})",
870 results.len(),
871 total_count
872 );
873
874 Ok((results, total_count))
875 }
876
877 pub fn find_symbol(&self, name: &str) -> Result<Vec<SearchResult>> {
879 let filter = QueryFilter {
880 symbols_mode: true,
881 ..Default::default()
882 };
883 self.search(name, filter)
884 }
885
886 pub fn search_ast(&self, pattern: &str, lang: Option<Language>) -> Result<Vec<SearchResult>> {
888 let filter = QueryFilter {
889 language: lang,
890 use_ast: true,
891 ..Default::default()
892 };
893
894 self.search(pattern, filter)
895 }
896
897 pub fn search_ast_all_files(
918 &self,
919 ast_pattern: &str,
920 filter: QueryFilter,
921 ) -> Result<Vec<SearchResult>> {
922 log::info!(
923 "Executing AST query on all files: pattern='{}', filter={:?}",
924 ast_pattern,
925 filter
926 );
927
928 let lang = filter.language.ok_or_else(|| anyhow::anyhow!(
930 "Language must be specified for AST pattern matching. Use --lang to specify the language.\n\
931 \n\
932 Example: rfx query \"(function_definition) @fn\" --ast --lang python"
933 ))?;
934
935 if !self.cache.exists() {
937 anyhow::bail!("Index not found. Run 'rfx index' to build the cache first.");
938 }
939
940 self.check_index_freshness(&filter)?;
942
943 let content_path = self.cache.path().join("content.bin");
945 let content_reader =
946 ContentReader::open(&content_path).context("Failed to open content store")?;
947
948 use globset::{Glob, GlobSetBuilder};
950
951 let include_matcher = if !filter.glob_patterns.is_empty() {
952 let mut builder = GlobSetBuilder::new();
953 for pattern in &filter.glob_patterns {
954 let normalized = Self::normalize_glob_pattern(pattern);
956 if let Ok(glob) = Glob::new(&normalized) {
957 builder.add(glob);
958 }
959 }
960 builder.build().ok()
961 } else {
962 None
963 };
964
965 let exclude_matcher = if !filter.exclude_patterns.is_empty() {
966 let mut builder = GlobSetBuilder::new();
967 for pattern in &filter.exclude_patterns {
968 let normalized = Self::normalize_glob_pattern(pattern);
970 if let Ok(glob) = Glob::new(&normalized) {
971 builder.add(glob);
972 }
973 }
974 builder.build().ok()
975 } else {
976 None
977 };
978
979 let mut candidates: Vec<SearchResult> = Vec::new();
981
982 for file_id in 0..content_reader.file_count() {
983 let file_path = match content_reader.get_file_path(file_id as u32) {
984 Some(p) => p,
985 None => continue,
986 };
987
988 let ext = file_path.extension().and_then(|e| e.to_str()).unwrap_or("");
990 let detected_lang = Language::from_extension(ext);
991
992 if detected_lang != lang {
994 continue;
995 }
996
997 let file_path_str = file_path.to_string_lossy().to_string();
998
999 let included = include_matcher
1001 .as_ref()
1002 .map_or(true, |m| m.is_match(&file_path_str));
1003 let excluded = exclude_matcher
1004 .as_ref()
1005 .map_or(false, |m| m.is_match(&file_path_str));
1006
1007 if !included || excluded {
1008 continue;
1009 }
1010
1011 candidates.push(SearchResult {
1013 path: file_path_str,
1014 lang: detected_lang,
1015 span: Span {
1016 start_line: 1,
1017 end_line: 1,
1018 },
1019 symbol: None,
1020 kind: SymbolKind::Unknown("ast_query".to_string()),
1021 preview: String::new(),
1022 dependencies: None,
1023 });
1024 }
1025
1026 log::info!(
1027 "AST query scanning {} files for language {:?}",
1028 candidates.len(),
1029 lang
1030 );
1031
1032 if !filter.force && filter.glob_patterns.is_empty() && candidates.len() >= 100 {
1035 anyhow::bail!(
1036 "Query too broad - would be expensive to execute\n\
1037 \n\
1038 AST query without --glob restriction will scan the ENTIRE codebase ({} files). AST queries are SLOW (500ms-10s+).\n\
1039 \n\
1040 This query could:\n\
1041 • Hang for an extended period before returning results\n\
1042 • Return thousands of results\n\
1043 • Flood LLM context windows with excessive data\n\
1044 • Fail entirely\n\
1045 \n\
1046 Suggestions to narrow the query:\n\
1047 • Add --glob to restrict AST query to specific files: --glob 'src/**/*.rs'\n\
1048 • Use --symbols instead (10-100x faster in 95% of cases)\n\
1049 • Use --force to bypass this check if you need a full codebase scan\n\
1050 \n\
1051 To force execution anyway:\n\
1052 rfx query \"{}\" --force --ast --lang {:?}",
1053 candidates.len(),
1054 ast_pattern,
1055 lang
1056 );
1057 }
1058
1059 if candidates.is_empty() {
1060 if !filter.suppress_output {
1061 output::warn(&format!(
1062 "No files found for language {:?}. Check your language filter or glob patterns.",
1063 lang
1064 ));
1065 }
1066 return Ok(Vec::new());
1067 }
1068
1069 let mut results = self.enrich_with_ast(candidates, ast_pattern, filter.language)?;
1072
1073 log::debug!("AST query found {} matches before filtering", results.len());
1074
1075 if let Some(ref kind) = filter.kind {
1079 results.retain(|r| {
1080 if matches!(kind, SymbolKind::Function) {
1081 matches!(r.kind, SymbolKind::Function | SymbolKind::Method)
1082 } else {
1083 r.kind == *kind
1084 }
1085 });
1086 }
1087
1088 if filter.expand {
1092 let content_path = self.cache.path().join("content.bin");
1093 if let Ok(content_reader) = ContentReader::open(&content_path) {
1094 for result in &mut results {
1095 if result.span.start_line < result.span.end_line {
1096 if let Some(file_id) = Self::find_file_id(&content_reader, &result.path) {
1097 if let Ok(content) = content_reader.get_file_content(file_id) {
1098 let lines: Vec<&str> = content.lines().collect();
1099 let start_idx = (result.span.start_line as usize).saturating_sub(1);
1100 let end_idx = (result.span.end_line as usize).min(lines.len());
1101
1102 if start_idx < end_idx {
1103 let full_body = lines[start_idx..end_idx].join("\n");
1104 result.preview = full_body;
1105 }
1106 }
1107 }
1108 }
1109 }
1110 }
1111 }
1112
1113 if filter.paths_only {
1115 use std::collections::HashSet;
1116 let mut seen_paths = HashSet::new();
1117 results.retain(|r| seen_paths.insert(r.path.clone()));
1118 }
1119
1120 results.sort_by(|a, b| {
1122 a.path
1123 .cmp(&b.path)
1124 .then_with(|| a.span.start_line.cmp(&b.span.start_line))
1125 });
1126
1127 if let Some(offset) = filter.offset {
1129 if offset < results.len() {
1130 results = results.into_iter().skip(offset).collect();
1131 } else {
1132 results.clear();
1133 }
1134 }
1135
1136 if let Some(limit) = filter.limit {
1138 results.truncate(limit);
1139 }
1140
1141 log::info!("AST query returned {} results", results.len());
1142
1143 self.load_dependencies(&mut results, filter.include_dependencies)?;
1145
1146 Ok(results)
1147 }
1148
1149 pub fn search_ast_with_text_filter(
1161 &self,
1162 text_pattern: &str,
1163 ast_pattern: &str,
1164 filter: QueryFilter,
1165 ) -> Result<Vec<SearchResult>> {
1166 log::info!(
1167 "Executing AST query with text filter: text='{}', ast='{}', filter={:?}",
1168 text_pattern,
1169 ast_pattern,
1170 filter
1171 );
1172
1173 if !self.cache.exists() {
1175 anyhow::bail!("Index not found. Run 'rfx index' to build the cache first.");
1176 }
1177
1178 self.check_index_freshness(&filter)?;
1180
1181 use std::time::{Duration, Instant};
1183 let start_time = Instant::now();
1184 let timeout = if filter.timeout_secs > 0 {
1185 Some(Duration::from_secs(filter.timeout_secs))
1186 } else {
1187 None
1188 };
1189
1190 let candidates = if filter.use_regex {
1192 self.get_regex_candidates(
1193 text_pattern,
1194 timeout.as_ref(),
1195 &start_time,
1196 filter.suppress_output,
1197 )?
1198 } else {
1199 self.get_trigram_candidates(text_pattern, &filter)?
1200 };
1201
1202 log::debug!("Phase 1 found {} candidate locations", candidates.len());
1203
1204 let mut results = self.enrich_with_ast(candidates, ast_pattern, filter.language)?;
1206
1207 log::debug!("Phase 2 AST matching found {} results", results.len());
1208
1209 if let Some(lang) = filter.language {
1211 results.retain(|r| r.lang == lang);
1212 }
1213
1214 if let Some(ref kind) = filter.kind {
1215 results.retain(|r| {
1216 if matches!(kind, SymbolKind::Function) {
1217 matches!(r.kind, SymbolKind::Function | SymbolKind::Method)
1218 } else {
1219 r.kind == *kind
1220 }
1221 });
1222 }
1223
1224 if let Some(ref file_pattern) = filter.file_pattern {
1225 results.retain(|r| r.path.contains(file_pattern));
1226 }
1227
1228 if !filter.glob_patterns.is_empty() || !filter.exclude_patterns.is_empty() {
1230 use globset::{Glob, GlobSetBuilder};
1231
1232 let include_matcher = if !filter.glob_patterns.is_empty() {
1233 let mut builder = GlobSetBuilder::new();
1234 for pattern in &filter.glob_patterns {
1235 let normalized = Self::normalize_glob_pattern(pattern);
1237 if let Ok(glob) = Glob::new(&normalized) {
1238 builder.add(glob);
1239 }
1240 }
1241 builder.build().ok()
1242 } else {
1243 None
1244 };
1245
1246 let exclude_matcher = if !filter.exclude_patterns.is_empty() {
1247 let mut builder = GlobSetBuilder::new();
1248 for pattern in &filter.exclude_patterns {
1249 let normalized = Self::normalize_glob_pattern(pattern);
1251 if let Ok(glob) = Glob::new(&normalized) {
1252 builder.add(glob);
1253 }
1254 }
1255 builder.build().ok()
1256 } else {
1257 None
1258 };
1259
1260 results.retain(|r| {
1261 let included = include_matcher
1262 .as_ref()
1263 .map_or(true, |m| m.is_match(&r.path));
1264 let excluded = exclude_matcher
1265 .as_ref()
1266 .map_or(false, |m| m.is_match(&r.path));
1267 included && !excluded
1268 });
1269 }
1270
1271 if filter.exact && filter.symbols_mode {
1272 results.retain(|r| r.symbol.as_deref() == Some(text_pattern));
1273 }
1274
1275 if filter.expand {
1277 let content_path = self.cache.path().join("content.bin");
1278 if let Ok(content_reader) = ContentReader::open(&content_path) {
1279 for result in &mut results {
1280 if result.span.start_line < result.span.end_line {
1281 if let Some(file_id) = Self::find_file_id(&content_reader, &result.path) {
1282 if let Ok(content) = content_reader.get_file_content(file_id) {
1283 let lines: Vec<&str> = content.lines().collect();
1284 let start_idx = (result.span.start_line as usize).saturating_sub(1);
1285 let end_idx = (result.span.end_line as usize).min(lines.len());
1286
1287 if start_idx < end_idx {
1288 let full_body = lines[start_idx..end_idx].join("\n");
1289 result.preview = full_body;
1290 }
1291 }
1292 }
1293 }
1294 }
1295 }
1296 }
1297
1298 results.sort_by(|a, b| {
1300 a.path
1301 .cmp(&b.path)
1302 .then_with(|| a.span.start_line.cmp(&b.span.start_line))
1303 });
1304
1305 if let Some(offset) = filter.offset {
1307 if offset < results.len() {
1308 results = results.into_iter().skip(offset).collect();
1309 } else {
1310 results.clear();
1311 }
1312 }
1313
1314 if let Some(limit) = filter.limit {
1316 results.truncate(limit);
1317 }
1318
1319 log::info!("AST query returned {} results", results.len());
1320
1321 Ok(results)
1322 }
1323
1324 pub fn list_by_kind(&self, kind: SymbolKind) -> Result<Vec<SearchResult>> {
1326 let filter = QueryFilter {
1327 kind: Some(kind),
1328 symbols_mode: true,
1329 ..Default::default()
1330 };
1331
1332 self.search("*", filter)
1333 }
1334
1335 fn enrich_with_symbols(
1356 &self,
1357 candidates: Vec<SearchResult>,
1358 pattern: &str,
1359 filter: &QueryFilter,
1360 ) -> Result<Vec<SearchResult>> {
1361 let content_path = self.cache.path().join("content.bin");
1363 let content_reader =
1364 ContentReader::open(&content_path).context("Failed to open content store")?;
1365
1366 let trigrams_path = self.cache.path().join("trigrams.bin");
1368 let trigram_index = if trigrams_path.exists() {
1369 TrigramIndex::load(&trigrams_path)?
1370 } else {
1371 Self::rebuild_trigram_index(&content_reader)?
1372 };
1373
1374 let symbol_cache = crate::symbol_cache::SymbolCache::open(self.cache.path())
1376 .context("Failed to open symbol cache")?;
1377
1378 let root = self.cache.workspace_root();
1380 let branch =
1381 crate::git::get_current_branch(&root).unwrap_or_else(|_| "_default".to_string());
1382 let file_hashes = self
1383 .cache
1384 .load_hashes_for_branch(&branch)
1385 .context("Failed to load file hashes")?;
1386 log::debug!(
1387 "Loaded {} file hashes for branch '{}' for symbol cache lookups",
1388 file_hashes.len(),
1389 branch
1390 );
1391
1392 use std::collections::HashMap;
1394 let mut files_by_path: HashMap<String, Vec<SearchResult>> = HashMap::new();
1395 let mut skipped_unsupported = 0;
1396
1397 for candidate in candidates {
1398 if !candidate.lang.is_supported() {
1400 skipped_unsupported += 1;
1401 continue;
1402 }
1403
1404 files_by_path
1405 .entry(candidate.path.clone())
1406 .or_insert_with(Vec::new)
1407 .push(candidate);
1408 }
1409
1410 let total_files = files_by_path.len();
1411 log::debug!(
1412 "Processing {} candidate files for symbol enrichment (skipped {} unsupported language files)",
1413 total_files,
1414 skipped_unsupported
1415 );
1416
1417 if total_files > 1000 && !filter.suppress_output {
1419 output::warn(&format!(
1420 "Pattern '{}' matched {} files. This may take some time to parse. Consider using a more specific pattern or adding --lang/--file filters to narrow the search.",
1421 pattern, total_files
1422 ));
1423 }
1424
1425 let mut files_to_process: Vec<String> = files_by_path.keys().cloned().collect();
1427
1428 let mut files_to_skip: std::collections::HashSet<String> = std::collections::HashSet::new();
1431
1432 for file_path in &files_to_process {
1433 let ext = std::path::Path::new(file_path)
1435 .extension()
1436 .and_then(|e| e.to_str())
1437 .unwrap_or("");
1438 let lang = Language::from_extension(ext);
1439
1440 if let Some(line_filter) = crate::line_filter::get_filter(lang) {
1442 let file_id =
1444 match Self::find_file_id_by_path(&content_reader, &trigram_index, file_path) {
1445 Some(id) => id,
1446 None => continue,
1447 };
1448
1449 let content = match content_reader.get_file_content(file_id) {
1451 Ok(c) => c,
1452 Err(_) => continue,
1453 };
1454
1455 let mut all_in_non_code = true;
1457 for line in content.lines() {
1458 let mut search_start = 0;
1460 while let Some(pos) = line[search_start..].find(pattern) {
1461 let absolute_pos = search_start + pos;
1462
1463 let in_comment = line_filter.is_in_comment(line, absolute_pos);
1465 let in_string = line_filter.is_in_string(line, absolute_pos);
1466
1467 if !in_comment && !in_string {
1468 all_in_non_code = false;
1470 break;
1471 }
1472
1473 search_start = absolute_pos + pattern.len();
1474 }
1475
1476 if !all_in_non_code {
1477 break;
1478 }
1479 }
1480
1481 if all_in_non_code {
1483 if content.contains(pattern) {
1485 files_to_skip.insert(file_path.clone());
1486 log::debug!(
1487 "Pre-filter: Skipping {} (all matches in comments/strings)",
1488 file_path
1489 );
1490 }
1491 }
1492 }
1493 }
1494
1495 files_to_process.retain(|path| !files_to_skip.contains(path));
1497
1498 log::debug!(
1499 "Pre-filter: Skipped {} files where all matches are in comments/strings (parsing {} files)",
1500 files_to_skip.len(),
1501 files_to_process.len()
1502 );
1503
1504 let num_threads = {
1506 let available_cores = std::thread::available_parallelism()
1507 .map(|n| n.get())
1508 .unwrap_or(4);
1509 ((available_cores as f64 * 0.8).ceil() as usize)
1512 .max(1)
1513 .min(8)
1514 };
1515
1516 log::debug!(
1517 "Using {} threads for parallel symbol extraction (out of {} available cores)",
1518 num_threads,
1519 std::thread::available_parallelism()
1520 .map(|n| n.get())
1521 .unwrap_or(4)
1522 );
1523
1524 let pool = rayon::ThreadPoolBuilder::new()
1526 .num_threads(num_threads)
1527 .build()
1528 .context("Failed to create thread pool for symbol extraction")?;
1529
1530 let files_with_hashes: Vec<String> = files_to_process
1535 .iter()
1536 .filter(|path| file_hashes.contains_key(path.as_str()))
1537 .cloned()
1538 .collect();
1539
1540 let file_id_map = self
1542 .cache
1543 .batch_get_file_ids(&files_with_hashes)
1544 .context("Failed to batch lookup file IDs")?;
1545
1546 let file_lookup_tuples: Vec<(i64, String, String)> = files_with_hashes
1548 .iter()
1549 .filter_map(|path| {
1550 let file_id = file_id_map.get(path)?;
1551 let hash = file_hashes.get(path.as_str())?;
1552 Some((*file_id, hash.clone(), path.clone()))
1553 })
1554 .collect();
1555
1556 let batch_results = symbol_cache
1558 .batch_get_with_kind(&file_lookup_tuples, filter.kind.clone())
1559 .context("Failed to batch read symbol cache")?;
1560
1561 let mut cached_symbols: HashMap<String, Vec<SearchResult>> = HashMap::new();
1563 let mut files_needing_parse: Vec<String> = Vec::new();
1564
1565 let id_to_path: HashMap<i64, String> = file_id_map
1567 .iter()
1568 .map(|(path, id)| (*id, path.clone()))
1569 .collect();
1570
1571 for (file_id, symbols) in batch_results {
1573 if let Some(file_path) = id_to_path.get(&file_id) {
1574 cached_symbols.insert(file_path.clone(), symbols);
1575 }
1576 }
1577
1578 for path in &files_with_hashes {
1580 if file_id_map.contains_key(path) && !cached_symbols.contains_key(path) {
1581 files_needing_parse.push(path.clone());
1582 }
1583 }
1584
1585 for file_path in &files_to_process {
1587 if !file_hashes.contains_key(file_path.as_str()) {
1588 files_needing_parse.push(file_path.clone());
1589 }
1590 }
1591
1592 log::debug!(
1593 "Symbol cache: {} hits, {} need parsing",
1594 cached_symbols.len(),
1595 files_needing_parse.len()
1596 );
1597
1598 use rayon::prelude::*;
1600
1601 let parsed_symbols: Vec<SearchResult> = pool.install(|| {
1602 files_needing_parse
1603 .par_iter()
1604 .flat_map(|file_path| {
1605 let file_id = match Self::find_file_id_by_path(
1607 &content_reader,
1608 &trigram_index,
1609 file_path,
1610 ) {
1611 Some(id) => id,
1612 None => {
1613 log::warn!("Could not find file_id for path: {}", file_path);
1614 return Vec::new();
1615 }
1616 };
1617
1618 let content = match content_reader.get_file_content(file_id) {
1619 Ok(c) => c,
1620 Err(e) => {
1621 log::warn!("Failed to read file {}: {}", file_path, e);
1622 return Vec::new();
1623 }
1624 };
1625
1626 let ext = std::path::Path::new(file_path)
1628 .extension()
1629 .and_then(|e| e.to_str())
1630 .unwrap_or("");
1631 let lang = Language::from_extension(ext);
1632
1633 let symbols = match ParserFactory::parse(file_path, content, lang) {
1635 Ok(symbols) => {
1636 log::debug!("Parsed {} symbols from {}", symbols.len(), file_path);
1637 symbols
1638 }
1639 Err(e) => {
1640 log::debug!("Failed to parse {}: {}", file_path, e);
1641 Vec::new()
1642 }
1643 };
1644
1645 if let Some(file_hash) = file_hashes.get(file_path.as_str()) {
1647 if let Err(e) = symbol_cache.set(file_path, file_hash, &symbols) {
1648 log::debug!("Failed to cache symbols for {}: {}", file_path, e);
1649 }
1650 }
1651
1652 symbols
1653 })
1654 .collect()
1655 });
1656
1657 let mut all_symbols: Vec<SearchResult> = Vec::new();
1659
1660 for symbols in cached_symbols.values() {
1662 all_symbols.extend_from_slice(symbols);
1663 }
1664
1665 all_symbols.extend(parsed_symbols);
1667
1668 let is_keyword_query = {
1676 let lang_to_check = if let Some(lang) = filter.language {
1678 vec![lang]
1681 } else {
1682 let mut langs: Vec<Language> =
1686 all_symbols.iter().map(|s| s.lang).collect::<Vec<_>>();
1687 langs.sort_by(|a, b| format!("{:?}", a).cmp(&format!("{:?}", b))); langs.dedup(); langs
1690 };
1691
1692 lang_to_check
1694 .iter()
1695 .any(|lang| ParserFactory::get_keywords(*lang).contains(&pattern))
1696 };
1697
1698 let filtered: Vec<SearchResult> = if is_keyword_query {
1701 log::info!(
1702 "Pattern '{}' is a language keyword - listing all symbols (kind filtering will be applied in Phase 3)",
1703 pattern
1704 );
1705 all_symbols
1706 } else if filter.use_regex {
1707 use std::collections::{HashMap, HashSet};
1713 let mut candidate_lines: HashMap<String, HashSet<usize>> = HashMap::new();
1714 for candidate in &files_by_path {
1715 for cand in candidate.1 {
1716 candidate_lines
1717 .entry(candidate.0.clone())
1718 .or_insert_with(HashSet::new)
1719 .insert(cand.span.start_line);
1720 }
1721 }
1722
1723 all_symbols
1725 .into_iter()
1726 .filter(|sym| {
1727 if let Some(lines) = candidate_lines.get(&sym.path) {
1728 for line in sym.span.start_line..=sym.span.end_line {
1730 if lines.contains(&line) {
1731 return true;
1732 }
1733 }
1734 }
1735 false
1736 })
1737 .collect()
1738 } else if filter.use_contains {
1739 all_symbols
1741 .into_iter()
1742 .filter(|sym| sym.symbol.as_deref().map_or(false, |s| s.contains(pattern)))
1743 .collect()
1744 } else {
1745 all_symbols
1747 .into_iter()
1748 .filter(|sym| sym.symbol.as_deref().map_or(false, |s| s == pattern))
1749 .collect()
1750 };
1751
1752 log::info!(
1753 "Symbol enrichment found {} matches for pattern '{}'",
1754 filtered.len(),
1755 pattern
1756 );
1757
1758 Ok(filtered)
1759 }
1760
1761 fn enrich_with_ast(
1780 &self,
1781 candidates: Vec<SearchResult>,
1782 ast_pattern: &str,
1783 language: Option<Language>,
1784 ) -> Result<Vec<SearchResult>> {
1785 let lang = language.ok_or_else(|| anyhow::anyhow!(
1787 "Language must be specified for AST pattern matching. Use --lang to specify the language."
1788 ))?;
1789
1790 let content_path = self.cache.path().join("content.bin");
1792 let content_reader =
1793 ContentReader::open(&content_path).context("Failed to open content store")?;
1794
1795 let trigrams_path = self.cache.path().join("trigrams.bin");
1797 let trigram_index = if trigrams_path.exists() {
1798 TrigramIndex::load(&trigrams_path)?
1799 } else {
1800 Self::rebuild_trigram_index(&content_reader)?
1801 };
1802
1803 use std::collections::HashMap;
1805 let mut file_contents: HashMap<String, String> = HashMap::new();
1806
1807 for candidate in &candidates {
1808 if file_contents.contains_key(&candidate.path) {
1809 continue;
1810 }
1811
1812 let file_id = match Self::find_file_id_by_path(
1814 &content_reader,
1815 &trigram_index,
1816 &candidate.path,
1817 ) {
1818 Some(id) => id,
1819 None => {
1820 log::warn!("Could not find file_id for path: {}", candidate.path);
1821 continue;
1822 }
1823 };
1824
1825 let content = match content_reader.get_file_content(file_id) {
1827 Ok(c) => c,
1828 Err(e) => {
1829 log::warn!("Failed to read file {}: {}", candidate.path, e);
1830 continue;
1831 }
1832 };
1833
1834 file_contents.insert(candidate.path.clone(), content.to_string());
1835 }
1836
1837 log::debug!(
1838 "Executing AST query on {} candidate files with language {:?}",
1839 file_contents.len(),
1840 lang
1841 );
1842
1843 let results =
1845 crate::ast_query::execute_ast_query(candidates, ast_pattern, lang, &file_contents)?;
1846
1847 log::info!(
1848 "AST query found {} matches for pattern '{}'",
1849 results.len(),
1850 ast_pattern
1851 );
1852
1853 Ok(results)
1854 }
1855
1856 fn find_file_id_by_path(
1858 content_reader: &ContentReader,
1859 trigram_index: &TrigramIndex,
1860 target_path: &str,
1861 ) -> Option<u32> {
1862 for file_id in 0..trigram_index.file_count() {
1864 if let Some(path) = trigram_index.get_file(file_id as u32) {
1865 if path.to_string_lossy() == target_path {
1866 return Some(file_id as u32);
1867 }
1868 }
1869 }
1870
1871 for file_id in 0..content_reader.file_count() {
1873 if let Some(path) = content_reader.get_file_path(file_id as u32) {
1874 if path.to_string_lossy() == target_path {
1875 return Some(file_id as u32);
1876 }
1877 }
1878 }
1879
1880 None
1881 }
1882
1883 fn keyword_to_kind(keyword: &str) -> Option<SymbolKind> {
1891 filter::keyword_to_kind(keyword)
1892 }
1893
1894 fn get_all_language_files(&self, filter: &QueryFilter) -> Result<Vec<SearchResult>> {
1902 let content_path = self.cache.path().join("content.bin");
1907 let content_reader =
1908 ContentReader::open(&content_path).context("Failed to open content store")?;
1909
1910 use globset::{Glob, GlobSetBuilder};
1912
1913 let include_matcher = if !filter.glob_patterns.is_empty() {
1914 let mut builder = GlobSetBuilder::new();
1915 for pattern in &filter.glob_patterns {
1916 let normalized = Self::normalize_glob_pattern(pattern);
1917 if let Ok(glob) = Glob::new(&normalized) {
1918 builder.add(glob);
1919 }
1920 }
1921 builder.build().ok()
1922 } else {
1923 None
1924 };
1925
1926 let exclude_matcher = if !filter.exclude_patterns.is_empty() {
1927 let mut builder = GlobSetBuilder::new();
1928 for pattern in &filter.exclude_patterns {
1929 let normalized = Self::normalize_glob_pattern(pattern);
1930 if let Ok(glob) = Glob::new(&normalized) {
1931 builder.add(glob);
1932 }
1933 }
1934 builder.build().ok()
1935 } else {
1936 None
1937 };
1938
1939 let mut candidates: Vec<SearchResult> = Vec::new();
1941
1942 for file_id in 0..content_reader.file_count() {
1943 let file_path = match content_reader.get_file_path(file_id as u32) {
1944 Some(p) => p,
1945 None => continue,
1946 };
1947
1948 let ext = file_path.extension().and_then(|e| e.to_str()).unwrap_or("");
1950 let detected_lang = Language::from_extension(ext);
1951
1952 if let Some(lang) = filter.language {
1954 if detected_lang != lang {
1955 continue;
1956 }
1957 }
1958
1959 let file_path_str = file_path.to_string_lossy().to_string();
1960
1961 let included = include_matcher
1963 .as_ref()
1964 .map_or(true, |m| m.is_match(&file_path_str));
1965 let excluded = exclude_matcher
1966 .as_ref()
1967 .map_or(false, |m| m.is_match(&file_path_str));
1968
1969 if !included || excluded {
1970 continue;
1971 }
1972
1973 if let Some(ref file_pattern) = filter.file_pattern {
1975 if !file_path_str.contains(file_pattern) {
1976 continue;
1977 }
1978 }
1979
1980 candidates.push(SearchResult {
1983 path: file_path_str,
1984 lang: detected_lang,
1985 span: Span {
1986 start_line: 1,
1987 end_line: 1,
1988 },
1989 symbol: None,
1990 kind: SymbolKind::Unknown("keyword_query".to_string()),
1991 preview: String::new(),
1992 dependencies: None,
1993 });
1994 }
1995
1996 if let Some(lang) = filter.language {
1997 log::info!(
1998 "Keyword query will scan {} {:?} files for symbol extraction",
1999 candidates.len(),
2000 lang
2001 );
2002 } else {
2003 log::info!(
2004 "Keyword query will scan {} files (all languages) for symbol extraction",
2005 candidates.len()
2006 );
2007 }
2008
2009 Ok(candidates)
2010 }
2011
2012 fn get_trigram_candidates(
2014 &self,
2015 pattern: &str,
2016 filter: &QueryFilter,
2017 ) -> Result<Vec<SearchResult>> {
2018 let content_path = self.cache.path().join("content.bin");
2020 let content_reader =
2021 ContentReader::open(&content_path).context("Failed to open content store")?;
2022
2023 if pattern.chars().count() < 3 {
2027 log::info!(
2028 "Pattern '{}' is shorter than 3 chars — trigram index cannot be used, \
2029 falling back to linear scan",
2030 pattern
2031 );
2032 return self.linear_scan_candidates(pattern, filter, &content_reader);
2033 }
2034
2035 let trigrams_path = self.cache.path().join("trigrams.bin");
2037 let trigram_index = if trigrams_path.exists() {
2038 match TrigramIndex::load(&trigrams_path) {
2039 Ok(index) => {
2040 log::debug!(
2041 "Loaded trigram index from disk: {} trigrams, {} files",
2042 index.trigram_count(),
2043 index.file_count()
2044 );
2045 index
2046 }
2047 Err(e) => {
2048 log::warn!("Failed to load trigram index from disk: {}", e);
2049 log::warn!("Rebuilding trigram index from content store...");
2050 Self::rebuild_trigram_index(&content_reader)?
2051 }
2052 }
2053 } else {
2054 log::debug!("trigrams.bin not found, rebuilding from content store");
2055 Self::rebuild_trigram_index(&content_reader)?
2056 };
2057
2058 let candidates = trigram_index.search(pattern);
2060 log::debug!(
2061 "Found {} candidate locations from trigram search",
2062 candidates.len()
2063 );
2064
2065 let pattern_owned = pattern.to_string();
2067
2068 let compiled_regex = if filter.use_regex {
2070 match Regex::new(&pattern_owned) {
2071 Ok(re) => Some(re),
2072 Err(e) => {
2073 log::error!("Invalid regex pattern '{}': {}", pattern_owned, e);
2074 anyhow::bail!("Invalid regex pattern '{}': {}", pattern_owned, e);
2075 }
2076 }
2077 } else {
2078 None
2079 };
2080
2081 use std::collections::HashMap;
2083 let mut candidates_by_file: HashMap<u32, Vec<crate::trigram::FileLocation>> =
2084 HashMap::new();
2085 for loc in candidates {
2086 candidates_by_file
2087 .entry(loc.file_id)
2088 .or_insert_with(Vec::new)
2089 .push(loc);
2090 }
2091
2092 log::debug!(
2093 "Scanning {} files with trigram matches",
2094 candidates_by_file.len()
2095 );
2096
2097 use rayon::prelude::*;
2099
2100 let results: Vec<SearchResult> = candidates_by_file
2101 .par_iter()
2102 .flat_map(|(file_id, locations)| {
2103 let file_path = match trigram_index.get_file(*file_id) {
2105 Some(p) => p,
2106 None => return Vec::new(),
2107 };
2108
2109 let content = match content_reader.get_file_content(*file_id) {
2110 Ok(c) => c,
2111 Err(_) => return Vec::new(),
2112 };
2113
2114 let file_path_str = file_path.to_string_lossy().to_string();
2115
2116 let ext = file_path.extension().and_then(|e| e.to_str()).unwrap_or("");
2118 let lang = Language::from_extension(ext);
2119
2120 let lines: Vec<&str> = content.lines().collect();
2122
2123 let mut seen_lines: std::collections::HashSet<usize> =
2125 std::collections::HashSet::new();
2126 let mut file_results = Vec::new();
2127
2128 for loc in locations {
2130 let line_no = loc.line_no as usize;
2131
2132 if seen_lines.contains(&line_no) {
2134 continue;
2135 }
2136
2137 if line_no == 0 || line_no > lines.len() {
2139 log::debug!(
2140 "Line {} out of bounds (file has {} lines)",
2141 line_no,
2142 lines.len()
2143 );
2144 continue;
2145 }
2146
2147 let line = lines[line_no - 1];
2148
2149 let line_matches = if filter.use_regex {
2154 compiled_regex
2157 .as_ref()
2158 .map(|re| re.is_match(line))
2159 .unwrap_or(false)
2160 } else if filter.use_contains {
2161 line.contains(&pattern_owned)
2163 } else {
2164 Self::has_word_boundary_match(line, &pattern_owned)
2166 };
2167
2168 if !line_matches {
2169 continue;
2170 }
2171
2172 seen_lines.insert(line_no);
2173
2174 file_results.push(SearchResult {
2176 path: file_path_str.clone(),
2177 lang: lang.clone(),
2178 kind: SymbolKind::Unknown("text_match".to_string()),
2179 symbol: None, span: Span {
2181 start_line: line_no,
2182 end_line: line_no,
2183 },
2184 preview: line.to_string(),
2185 dependencies: None,
2186 });
2187 }
2188
2189 file_results
2190 })
2191 .collect();
2192
2193 Ok(results)
2194 }
2195
2196 fn linear_scan_candidates(
2203 &self,
2204 pattern: &str,
2205 filter: &QueryFilter,
2206 content_reader: &ContentReader,
2207 ) -> Result<Vec<SearchResult>> {
2208 use rayon::prelude::*;
2209
2210 let pattern_owned = pattern.to_string();
2211 let file_count = content_reader.file_count();
2212
2213 let compiled_regex = if filter.use_regex {
2214 match Regex::new(&pattern_owned) {
2215 Ok(re) => Some(re),
2216 Err(e) => anyhow::bail!("Invalid regex pattern '{}': {}", pattern_owned, e),
2217 }
2218 } else {
2219 None
2220 };
2221
2222 let results: Vec<SearchResult> = (0..file_count as u32)
2223 .collect::<Vec<_>>()
2224 .par_iter()
2225 .flat_map(|&file_id| {
2226 let file_path = match content_reader.get_file_path(file_id) {
2227 Some(p) => p.to_path_buf(),
2228 None => return Vec::new(),
2229 };
2230 let content = match content_reader.get_file_content(file_id) {
2231 Ok(c) => c,
2232 Err(_) => return Vec::new(),
2233 };
2234
2235 let file_path_str = file_path.to_string_lossy().to_string();
2236 let ext = file_path.extension().and_then(|e| e.to_str()).unwrap_or("");
2237 let lang = Language::from_extension(ext);
2238
2239 let mut seen_lines = std::collections::HashSet::new();
2240 let mut file_results = Vec::new();
2241
2242 for (line_idx, line) in content.lines().enumerate() {
2243 let line_no = line_idx + 1;
2244 if seen_lines.contains(&line_no) {
2245 continue;
2246 }
2247
2248 let line_matches = if filter.use_regex {
2249 compiled_regex
2250 .as_ref()
2251 .map(|re| re.is_match(line))
2252 .unwrap_or(false)
2253 } else if filter.use_contains {
2254 line.contains(&pattern_owned)
2255 } else {
2256 Self::has_word_boundary_match(line, &pattern_owned)
2257 };
2258
2259 if !line_matches {
2260 continue;
2261 }
2262
2263 seen_lines.insert(line_no);
2264 file_results.push(SearchResult {
2265 path: file_path_str.clone(),
2266 lang: lang.clone(),
2267 kind: SymbolKind::Unknown("text_match".to_string()),
2268 symbol: None,
2269 span: Span {
2270 start_line: line_no,
2271 end_line: line_no,
2272 },
2273 preview: line.to_string(),
2274 dependencies: None,
2275 });
2276 }
2277
2278 file_results
2279 })
2280 .collect();
2281
2282 log::info!(
2283 "Linear scan (short pattern '{}') found {} results across {} files",
2284 pattern,
2285 results.len(),
2286 file_count
2287 );
2288 Ok(results)
2289 }
2290
2291 fn get_regex_candidates(
2315 &self,
2316 pattern: &str,
2317 timeout: Option<&std::time::Duration>,
2318 start_time: &std::time::Instant,
2319 suppress_output: bool,
2320 ) -> Result<Vec<SearchResult>> {
2321 let regex =
2323 Regex::new(pattern).with_context(|| format!("Invalid regex pattern: {}", pattern))?;
2324
2325 if let Some(timeout_duration) = timeout {
2327 if start_time.elapsed() > *timeout_duration {
2328 anyhow::bail!(
2329 "Query timeout exceeded ({} seconds) during regex compilation",
2330 timeout_duration.as_secs()
2331 );
2332 }
2333 }
2334
2335 let trigrams = extract_trigrams_from_regex(pattern);
2337
2338 let content_path = self.cache.path().join("content.bin");
2340 let content_reader =
2341 ContentReader::open(&content_path).context("Failed to open content store")?;
2342
2343 let mut results = Vec::new();
2344
2345 if trigrams.is_empty() {
2346 if !suppress_output {
2348 output::warn(&format!(
2349 "Regex pattern '{}' has no literals (≥3 chars), falling back to full content scan. This may be slow on large codebases. Consider using patterns with literal text.",
2350 pattern
2351 ));
2352 }
2353
2354 for file_id in 0..content_reader.file_count() {
2356 let file_path = content_reader
2357 .get_file_path(file_id as u32)
2358 .context("Invalid file_id")?;
2359 let content = content_reader.get_file_content(file_id as u32)?;
2360
2361 self.find_regex_matches_in_file(®ex, file_path, content, &mut results)?;
2362 }
2363 } else {
2364 log::debug!(
2366 "Using {} trigrams to narrow regex search candidates",
2367 trigrams.len()
2368 );
2369
2370 let trigrams_path = self.cache.path().join("trigrams.bin");
2372 let trigram_index = if trigrams_path.exists() {
2373 TrigramIndex::load(&trigrams_path)?
2374 } else {
2375 Self::rebuild_trigram_index(&content_reader)?
2376 };
2377
2378 use crate::regex_trigrams::extract_literal_sequences;
2380 let literals = extract_literal_sequences(pattern);
2381
2382 if literals.is_empty() {
2383 log::warn!(
2384 "Regex extraction found trigrams but no literal sequences - this shouldn't happen"
2385 );
2386 for file_id in 0..content_reader.file_count() {
2388 let file_path = content_reader
2389 .get_file_path(file_id as u32)
2390 .context("Invalid file_id")?;
2391 let content = content_reader.get_file_content(file_id as u32)?;
2392 self.find_regex_matches_in_file(®ex, file_path, content, &mut results)?;
2393 }
2394 } else {
2395 use std::collections::HashSet;
2400 let mut candidate_files: HashSet<u32> = HashSet::new();
2401
2402 for literal in &literals {
2403 let candidates = trigram_index.search(literal);
2405 let file_ids: HashSet<u32> = candidates.iter().map(|loc| loc.file_id).collect();
2406
2407 log::debug!("Literal '{}' found in {} files", literal, file_ids.len());
2408
2409 candidate_files.extend(file_ids);
2412 }
2413
2414 let final_candidates = candidate_files;
2415 log::debug!(
2416 "After union: searching {} files that contain any literal",
2417 final_candidates.len()
2418 );
2419
2420 for &file_id in &final_candidates {
2422 let file_path = trigram_index
2423 .get_file(file_id)
2424 .context("Invalid file_id from trigram search")?;
2425 let content = content_reader.get_file_content(file_id)?;
2426
2427 self.find_regex_matches_in_file(®ex, file_path, content, &mut results)?;
2428 }
2429 }
2430 }
2431
2432 log::info!(
2433 "Regex search found {} matches for pattern '{}'",
2434 results.len(),
2435 pattern
2436 );
2437 Ok(results)
2438 }
2439
2440 fn find_regex_matches_in_file(
2442 &self,
2443 regex: &Regex,
2444 file_path: &std::path::Path,
2445 content: &str,
2446 results: &mut Vec<SearchResult>,
2447 ) -> Result<()> {
2448 let file_path_str = file_path.to_string_lossy().to_string();
2449
2450 let ext = file_path.extension().and_then(|e| e.to_str()).unwrap_or("");
2452 let lang = Language::from_extension(ext);
2453
2454 for (line_idx, line) in content.lines().enumerate() {
2456 if regex.is_match(line) {
2457 let line_no = line_idx + 1;
2458
2459 results.push(SearchResult {
2466 path: file_path_str.clone(),
2467 lang: lang.clone(),
2468 kind: SymbolKind::Unknown("regex_match".to_string()),
2469 symbol: None, span: Span {
2471 start_line: line_no,
2472 end_line: line_no,
2473 },
2474 preview: line.to_string(),
2475 dependencies: None,
2476 });
2477 }
2478 }
2479
2480 Ok(())
2481 }
2482
2483 fn find_file_id(content_reader: &ContentReader, target_path: &str) -> Option<u32> {
2484 result::find_file_id(content_reader, target_path)
2485 }
2486
2487 fn rebuild_trigram_index(content_reader: &ContentReader) -> Result<TrigramIndex> {
2488 result::rebuild_trigram_index(content_reader)
2489 }
2490
2491 fn normalize_glob_pattern(pattern: &str) -> String {
2492 result::normalize_glob_pattern(pattern)
2493 }
2494
2495 fn has_word_boundary_match(line: &str, pattern: &str) -> bool {
2496 filter::has_word_boundary_match(line, pattern)
2497 }
2498
2499 pub fn get_index_status(&self) -> Result<(IndexStatus, bool, Option<IndexWarning>)> {
2504 let root = self.cache.workspace_root();
2505
2506 if crate::git::is_git_repo(&root) {
2508 if let Ok(current_branch) = crate::git::get_current_branch(&root) {
2509 if !self.cache.branch_exists(¤t_branch).unwrap_or(false) {
2511 let warning = IndexWarning {
2512 reason: format!("Branch '{}' has not been indexed", current_branch),
2513 action_required: "rfx index".to_string(),
2514 files_modified: None,
2515 details: Some(IndexWarningDetails {
2516 current_branch: Some(current_branch),
2517 indexed_branch: None,
2518 current_commit: None,
2519 indexed_commit: None,
2520 }),
2521 };
2522 return Ok((IndexStatus::Stale, false, Some(warning)));
2523 }
2524
2525 if let (Ok(current_commit), Ok(branch_info)) = (
2527 crate::git::get_current_commit(&root),
2528 self.cache.get_branch_info(¤t_branch),
2529 ) {
2530 if branch_info.commit_sha != current_commit {
2531 let warning = IndexWarning {
2532 reason: format!(
2533 "Commit changed from {} to {}",
2534 &branch_info.commit_sha[..7],
2535 ¤t_commit[..7]
2536 ),
2537 action_required: "rfx index".to_string(),
2538 files_modified: None,
2539 details: Some(IndexWarningDetails {
2540 current_branch: Some(current_branch.clone()),
2541 indexed_branch: Some(current_branch.clone()),
2542 current_commit: Some(current_commit.clone()),
2543 indexed_commit: Some(branch_info.commit_sha.clone()),
2544 }),
2545 };
2546 return Ok((IndexStatus::Stale, false, Some(warning)));
2547 }
2548
2549 if let Ok(branch_files) = self.cache.get_branch_files(¤t_branch) {
2551 let mut checked = 0;
2552 let mut changed = 0;
2553 const SAMPLE_SIZE: usize = 10;
2554
2555 for (path, _indexed_hash) in branch_files.iter().take(SAMPLE_SIZE) {
2556 checked += 1;
2557 let file_path = std::path::Path::new(path);
2558
2559 if let Ok(metadata) = std::fs::metadata(file_path) {
2560 if let Ok(modified) = metadata.modified() {
2561 let indexed_time = branch_info.last_indexed;
2562 let file_time = modified
2563 .duration_since(std::time::UNIX_EPOCH)
2564 .unwrap_or_default()
2565 .as_secs()
2566 as i64;
2567
2568 if file_time > indexed_time {
2569 changed += 1;
2572 }
2573 }
2574 }
2575 }
2576
2577 if changed > 0 {
2578 let warning = IndexWarning {
2579 reason: format!(
2580 "{} of {} sampled files modified",
2581 changed, checked
2582 ),
2583 action_required: "rfx index".to_string(),
2584 files_modified: Some(changed as u32),
2585 details: Some(IndexWarningDetails {
2586 current_branch: Some(current_branch.clone()),
2587 indexed_branch: Some(branch_info.branch.clone()),
2588 current_commit: Some(current_commit.clone()),
2589 indexed_commit: Some(branch_info.commit_sha.clone()),
2590 }),
2591 };
2592 return Ok((IndexStatus::Stale, false, Some(warning)));
2593 }
2594 }
2595
2596 return Ok((IndexStatus::Fresh, true, None));
2598 }
2599 }
2600 }
2601
2602 Ok((IndexStatus::Fresh, true, None))
2604 }
2605
2606 fn check_index_freshness(&self, filter: &QueryFilter) -> Result<()> {
2613 let root = self.cache.workspace_root();
2614
2615 if crate::git::is_git_repo(&root) {
2617 if !crate::git::is_git_available() {
2618 static WARNED: std::sync::OnceLock<()> = std::sync::OnceLock::new();
2619 if !filter.suppress_output {
2620 WARNED.get_or_init(|| {
2621 output::warn("⚠️ git binary not found in PATH; index freshness checks disabled for this session.");
2622 });
2623 }
2624 return Ok(());
2625 }
2626 if let Ok(current_branch) = crate::git::get_current_branch(&root) {
2627 if !self.cache.branch_exists(¤t_branch).unwrap_or(false) {
2629 if !filter.suppress_output {
2630 output::warn(&format!(
2631 "⚠️ WARNING: Index not found for branch '{}'. Run 'rfx index' to index this branch.",
2632 current_branch
2633 ));
2634 }
2635 return Ok(());
2636 }
2637
2638 if let (Ok(current_commit), Ok(branch_info)) = (
2640 crate::git::get_current_commit(&root),
2641 self.cache.get_branch_info(¤t_branch),
2642 ) {
2643 if branch_info.commit_sha != current_commit {
2644 if !filter.suppress_output {
2645 output::warn(&format!(
2646 "⚠️ WARNING: Index may be stale (commit changed: {} → {}). Consider running 'rfx index'.",
2647 &branch_info.commit_sha[..7],
2648 ¤t_commit[..7]
2649 ));
2650 }
2651 return Ok(());
2652 }
2653
2654 if let Ok(branch_files) = self.cache.get_branch_files(¤t_branch) {
2657 let mut checked = 0;
2658 let mut changed = 0;
2659 const SAMPLE_SIZE: usize = 10;
2660
2661 for (path, _indexed_hash) in branch_files.iter().take(SAMPLE_SIZE) {
2662 checked += 1;
2663 let file_path = std::path::Path::new(path);
2664
2665 if let Ok(metadata) = std::fs::metadata(file_path) {
2667 if let Ok(modified) = metadata.modified() {
2668 let indexed_time = branch_info.last_indexed;
2669 let file_time = modified
2670 .duration_since(std::time::UNIX_EPOCH)
2671 .unwrap_or_default()
2672 .as_secs()
2673 as i64;
2674
2675 if file_time > indexed_time {
2677 changed += 1;
2682 }
2683 }
2684 }
2685 }
2686
2687 if changed > 0 && !filter.suppress_output {
2688 output::warn(&format!(
2689 "⚠️ WARNING: {} of {} sampled files changed since indexing. Consider running 'rfx index'.",
2690 changed, checked
2691 ));
2692 }
2693 }
2694 }
2695 }
2696 }
2697
2698 Ok(())
2699 }
2700}
2701
2702pub fn generate_ai_instruction(
2707 result_count: usize,
2708 total_count: usize,
2709 has_more: bool,
2710 symbols_mode: bool,
2711 paths_only: bool,
2712 use_ast: bool,
2713 use_regex: bool,
2714 language_filter: bool,
2715 glob_filter: bool,
2716 exact_mode: bool,
2717) -> Option<String> {
2718 if result_count == 0 {
2720 return Some(
2721 "No results found. Consider these alternatives: 1) Check pattern spelling, 2) Remove --kind or --lang filters to broaden search, 3) Try partial match or related term, 4) Use search_regex tool for pattern matching with special characters or complex patterns."
2722 .to_string()
2723 );
2724 }
2725
2726 if total_count >= 500 {
2728 return Some(format!(
2729 "Query too broad: {} results found. STOP. Do not list results. Refine search automatically by adding filters: kind parameter (Function/Struct/Class), lang parameter (rust/python/etc), or glob parameter (['src/**/*.rs']). Call search_code again with appropriate filters.",
2730 total_count
2731 ));
2732 }
2733
2734 if has_more {
2736 return Some(format!(
2737 "Showing {} of {} results. PAGINATED - there are more results available. Do not automatically fetch all results. Show current page, ask user if these results answer their question before fetching more with --offset parameter.",
2738 result_count, total_count
2739 ));
2740 }
2741
2742 if result_count == 1 && symbols_mode {
2744 return Some(
2745 "Found 1 precise result. Respond concisely: '[symbol] at [path]:[line]'.".to_string(),
2746 );
2747 }
2748
2749 if result_count >= 2 && result_count <= 10 && symbols_mode {
2751 return Some(format!(
2752 "Found {} precise results (definitions only, not usages). List locations concisely: '[symbol] at [path]:[line]' for each result.",
2753 result_count
2754 ));
2755 }
2756
2757 if total_count >= 101 && total_count < 500 {
2759 return Some(format!(
2760 "Found {} results - this is broad. Suggest refining search with: kind parameter (Function/Struct/Class/etc), lang parameter (rust/python/etc), or glob parameter to narrow file scope.",
2761 total_count
2762 ));
2763 }
2764
2765 if result_count >= 100 && !symbols_mode {
2767 return Some(format!(
2768 "Found {} results in full-text search mode (includes definitions AND all usages). Consider using symbols=true parameter to filter to definitions only. This typically reduces results by 80-90%.",
2769 result_count
2770 ));
2771 }
2772
2773 if paths_only {
2775 return Some(format!(
2776 "Found {} unique files (paths-only mode - no code content included). Next step: Use Read tool on specific files that look relevant based on their paths.",
2777 result_count
2778 ));
2779 }
2780
2781 if use_ast {
2783 return Some(format!(
2784 "Found {} results using AST pattern matching. These are structure-based matches using Tree-sitter patterns, not text search.",
2785 result_count
2786 ));
2787 }
2788
2789 if use_regex && result_count >= 100 {
2791 return Some(format!(
2792 "Found {} results using regex pattern matching. Regex matches are expansive. Consider using exact text search or symbols mode for more precise results.",
2793 result_count
2794 ));
2795 }
2796
2797 if language_filter && result_count <= 5 {
2799 return Some(format!(
2800 "Found {} results with language filter active. Results are limited to this language only. Remove lang parameter if you want to search all languages.",
2801 result_count
2802 ));
2803 }
2804
2805 if glob_filter && result_count <= 10 {
2807 return Some(format!(
2808 "Found {} results with glob filter active. Results are limited to matching paths. Remove glob parameter to search entire codebase.",
2809 result_count
2810 ));
2811 }
2812
2813 if exact_mode && result_count <= 5 {
2815 return Some(format!(
2816 "Found {} results in exact match mode. Only exact symbol name matches are included. Remove exact parameter to allow substring matching.",
2817 result_count
2818 ));
2819 }
2820
2821 None
2823}
2824
2825#[cfg(test)]
2826mod tests {
2827 use super::*;
2828 use crate::indexer::Indexer;
2829 use crate::models::IndexConfig;
2830 use std::fs;
2831 use tempfile::TempDir;
2832
2833 #[test]
2836 fn test_query_engine_creation() {
2837 let temp = TempDir::new().unwrap();
2838 let cache = CacheManager::new(temp.path());
2839 let engine = QueryEngine::new(cache);
2840
2841 assert!(engine.cache.path().ends_with(".reflex"));
2842 }
2843
2844 #[test]
2845 fn test_filter_modes() {
2846 let filter_fulltext = QueryFilter::default();
2848 assert!(!filter_fulltext.symbols_mode);
2849
2850 let filter_symbols = QueryFilter {
2851 symbols_mode: true,
2852 ..Default::default()
2853 };
2854 assert!(filter_symbols.symbols_mode);
2855
2856 let filter_with_kind = QueryFilter {
2858 kind: Some(SymbolKind::Function),
2859 symbols_mode: true,
2860 ..Default::default()
2861 };
2862 assert!(filter_with_kind.symbols_mode);
2863 }
2864
2865 #[test]
2868 fn test_fulltext_search() {
2869 let temp = TempDir::new().unwrap();
2870 let project = temp.path().join("project");
2871 fs::create_dir(&project).unwrap();
2872
2873 fs::write(
2875 project.join("main.rs"),
2876 "fn main() {\n println!(\"hello\");\n}",
2877 )
2878 .unwrap();
2879 fs::write(project.join("lib.rs"), "pub fn hello() {}").unwrap();
2880
2881 let cache = CacheManager::new(&project);
2883 let indexer = Indexer::new(cache, IndexConfig::default());
2884 indexer.index(&project, false).unwrap();
2885
2886 let cache = CacheManager::new(&project);
2888 let engine = QueryEngine::new(cache);
2889 let filter = QueryFilter::default(); let results = engine.search("hello", filter).unwrap();
2891
2892 assert!(results.len() >= 2);
2894 assert!(results.iter().any(|r| r.path.contains("main.rs")));
2895 assert!(results.iter().any(|r| r.path.contains("lib.rs")));
2896 }
2897
2898 #[test]
2899 fn test_symbol_search() {
2900 let temp = TempDir::new().unwrap();
2901 let project = temp.path().join("project");
2902 fs::create_dir(&project).unwrap();
2903
2904 fs::write(
2906 project.join("main.rs"),
2907 "fn greet() {}\nfn main() {\n greet();\n}",
2908 )
2909 .unwrap();
2910
2911 let cache = CacheManager::new(&project);
2913 let indexer = Indexer::new(cache, IndexConfig::default());
2914 indexer.index(&project, false).unwrap();
2915
2916 let cache = CacheManager::new(&project);
2917
2918 let engine = QueryEngine::new(cache);
2920 let filter = QueryFilter {
2921 symbols_mode: true,
2922 ..Default::default()
2923 };
2924 let results = engine.search("greet", filter).unwrap();
2925
2926 assert!(results.len() >= 1);
2928 assert!(results.iter().any(|r| r.kind == SymbolKind::Function));
2929 }
2930
2931 #[test]
2932 fn test_regex_search() {
2933 let temp = TempDir::new().unwrap();
2934 let project = temp.path().join("project");
2935 fs::create_dir(&project).unwrap();
2936
2937 fs::write(
2938 project.join("main.rs"),
2939 "fn test1() {}\nfn test2() {}\nfn other() {}",
2940 )
2941 .unwrap();
2942
2943 let cache = CacheManager::new(&project);
2944 let indexer = Indexer::new(cache, IndexConfig::default());
2945 indexer.index(&project, false).unwrap();
2946
2947 let cache = CacheManager::new(&project);
2948
2949 let engine = QueryEngine::new(cache);
2950 let filter = QueryFilter {
2951 use_regex: true,
2952 ..Default::default()
2953 };
2954 let results = engine.search(r"fn test\d", filter).unwrap();
2955
2956 assert_eq!(results.len(), 2);
2958 assert!(results.iter().all(|r| r.preview.contains("test")));
2959 }
2960
2961 #[test]
2964 fn test_language_filter() {
2965 let temp = TempDir::new().unwrap();
2966 let project = temp.path().join("project");
2967 fs::create_dir(&project).unwrap();
2968
2969 fs::write(project.join("main.rs"), "fn main() {}").unwrap();
2970 fs::write(project.join("main.js"), "function main() {}").unwrap();
2971
2972 let cache = CacheManager::new(&project);
2973 let indexer = Indexer::new(cache, IndexConfig::default());
2974 indexer.index(&project, false).unwrap();
2975
2976 let cache = CacheManager::new(&project);
2977
2978 let engine = QueryEngine::new(cache);
2979
2980 let filter = QueryFilter {
2982 language: Some(Language::Rust),
2983 ..Default::default()
2984 };
2985 let results = engine.search("main", filter).unwrap();
2986
2987 assert!(results.iter().all(|r| r.lang == Language::Rust));
2988 assert!(results.iter().all(|r| r.path.ends_with(".rs")));
2989 }
2990
2991 #[test]
2992 fn test_kind_filter() {
2993 let temp = TempDir::new().unwrap();
2994 let project = temp.path().join("project");
2995 fs::create_dir(&project).unwrap();
2996
2997 fs::write(
2998 project.join("main.rs"),
2999 "struct Point {}\nfn main() {}\nimpl Point { fn new() {} }",
3000 )
3001 .unwrap();
3002
3003 let cache = CacheManager::new(&project);
3004 let indexer = Indexer::new(cache, IndexConfig::default());
3005 indexer.index(&project, false).unwrap();
3006
3007 let cache = CacheManager::new(&project);
3008
3009 let engine = QueryEngine::new(cache);
3010
3011 let filter = QueryFilter {
3013 symbols_mode: true,
3014 kind: Some(SymbolKind::Function),
3015 use_contains: true, ..Default::default()
3017 };
3018 let results = engine.search("mai", filter).unwrap();
3020
3021 assert!(results.len() > 0, "Should find at least one result");
3023 assert!(
3024 results.iter().any(|r| r.symbol.as_deref() == Some("main")),
3025 "Should find 'main' function"
3026 );
3027 }
3028
3029 #[test]
3030 fn test_file_pattern_filter() {
3031 let temp = TempDir::new().unwrap();
3032 let project = temp.path().join("project");
3033 fs::create_dir_all(project.join("src")).unwrap();
3034 fs::create_dir_all(project.join("tests")).unwrap();
3035
3036 fs::write(project.join("src/lib.rs"), "fn foo() {}").unwrap();
3037 fs::write(project.join("tests/test.rs"), "fn foo() {}").unwrap();
3038
3039 let cache = CacheManager::new(&project);
3040 let indexer = Indexer::new(cache, IndexConfig::default());
3041 indexer.index(&project, false).unwrap();
3042
3043 let cache = CacheManager::new(&project);
3044
3045 let engine = QueryEngine::new(cache);
3046
3047 let filter = QueryFilter {
3049 file_pattern: Some("src/".to_string()),
3050 ..Default::default()
3051 };
3052 let results = engine.search("foo", filter).unwrap();
3053
3054 assert!(results.iter().all(|r| r.path.contains("src/")));
3055 assert!(!results.iter().any(|r| r.path.contains("tests/")));
3056 }
3057
3058 #[test]
3059 fn test_limit_filter() {
3060 let temp = TempDir::new().unwrap();
3061 let project = temp.path().join("project");
3062 fs::create_dir(&project).unwrap();
3063
3064 let content = (0..20)
3066 .map(|i| format!("fn test{}() {{}}", i))
3067 .collect::<Vec<_>>()
3068 .join("\n");
3069 fs::write(project.join("main.rs"), content).unwrap();
3070
3071 let cache = CacheManager::new(&project);
3072 let indexer = Indexer::new(cache, IndexConfig::default());
3073 indexer.index(&project, false).unwrap();
3074
3075 let cache = CacheManager::new(&project);
3076
3077 let engine = QueryEngine::new(cache);
3078
3079 let filter = QueryFilter {
3081 limit: Some(5),
3082 use_contains: true, ..Default::default()
3084 };
3085 let results = engine.search("test", filter).unwrap();
3086
3087 assert_eq!(results.len(), 5);
3088 }
3089
3090 #[test]
3091 fn test_exact_match_filter() {
3092 let temp = TempDir::new().unwrap();
3093 let project = temp.path().join("project");
3094 fs::create_dir(&project).unwrap();
3095
3096 fs::write(
3097 project.join("main.rs"),
3098 "fn test() {}\nfn test_helper() {}\nfn other_test() {}",
3099 )
3100 .unwrap();
3101
3102 let cache = CacheManager::new(&project);
3103 let indexer = Indexer::new(cache, IndexConfig::default());
3104 indexer.index(&project, false).unwrap();
3105
3106 let cache = CacheManager::new(&project);
3107
3108 let engine = QueryEngine::new(cache);
3109
3110 let filter = QueryFilter {
3112 symbols_mode: true,
3113 exact: true,
3114 ..Default::default()
3115 };
3116 let results = engine.search("test", filter).unwrap();
3117
3118 assert_eq!(results.len(), 1);
3120 assert_eq!(results[0].symbol.as_deref(), Some("test"));
3121 }
3122
3123 #[test]
3126 fn test_expand_mode() {
3127 let temp = TempDir::new().unwrap();
3128 let project = temp.path().join("project");
3129 fs::create_dir(&project).unwrap();
3130
3131 fs::write(
3132 project.join("main.rs"),
3133 "fn greet() {\n println!(\"Hello\");\n println!(\"World\");\n}",
3134 )
3135 .unwrap();
3136
3137 let cache = CacheManager::new(&project);
3138 let indexer = Indexer::new(cache, IndexConfig::default());
3139 indexer.index(&project, false).unwrap();
3140
3141 let cache = CacheManager::new(&project);
3142
3143 let engine = QueryEngine::new(cache);
3144
3145 let filter = QueryFilter {
3147 symbols_mode: true,
3148 expand: true,
3149 ..Default::default()
3150 };
3151 let results = engine.search("greet", filter).unwrap();
3152
3153 assert!(results.len() >= 1);
3155 let result = &results[0];
3156 assert!(result.preview.contains("println"));
3157 }
3158
3159 #[test]
3162 fn test_search_empty_index() {
3163 let temp = TempDir::new().unwrap();
3164 let project = temp.path().join("project");
3165 fs::create_dir(&project).unwrap();
3166
3167 let cache = CacheManager::new(&project);
3168 let indexer = Indexer::new(cache, IndexConfig::default());
3169 indexer.index(&project, false).unwrap();
3170
3171 let cache = CacheManager::new(&project);
3172
3173 let engine = QueryEngine::new(cache);
3174 let filter = QueryFilter::default();
3175 let results = engine.search("nonexistent", filter).unwrap();
3176
3177 assert_eq!(results.len(), 0);
3178 }
3179
3180 #[test]
3181 fn test_search_no_index() {
3182 let temp = TempDir::new().unwrap();
3183 let project = temp.path().join("project");
3184 fs::create_dir(&project).unwrap();
3185
3186 let cache = CacheManager::new(&project);
3187 let engine = QueryEngine::new(cache);
3188 let filter = QueryFilter::default();
3189
3190 assert!(engine.search("test", filter).is_err());
3192 }
3193
3194 #[test]
3195 fn test_search_special_characters() {
3196 let temp = TempDir::new().unwrap();
3197 let project = temp.path().join("project");
3198 fs::create_dir(&project).unwrap();
3199
3200 fs::write(project.join("main.rs"), "let x = 42;\nlet y = x + 1;").unwrap();
3201
3202 let cache = CacheManager::new(&project);
3203 let indexer = Indexer::new(cache, IndexConfig::default());
3204 indexer.index(&project, false).unwrap();
3205
3206 let cache = CacheManager::new(&project);
3207
3208 let engine = QueryEngine::new(cache);
3209 let filter = QueryFilter::default();
3210
3211 let results = engine.search("x + ", filter).unwrap();
3213 assert!(results.len() >= 1);
3214 }
3215
3216 #[test]
3217 fn test_search_unicode() {
3218 let temp = TempDir::new().unwrap();
3219 let project = temp.path().join("project");
3220 fs::create_dir(&project).unwrap();
3221
3222 fs::write(project.join("main.rs"), "// 你好世界\nfn main() {}").unwrap();
3223
3224 let cache = CacheManager::new(&project);
3225 let indexer = Indexer::new(cache, IndexConfig::default());
3226 indexer.index(&project, false).unwrap();
3227
3228 let cache = CacheManager::new(&project);
3229
3230 let engine = QueryEngine::new(cache);
3231 let filter = QueryFilter {
3232 use_contains: true, force: true, ..Default::default()
3235 };
3236
3237 let results = engine.search("你好", filter).unwrap();
3239 assert!(results.len() >= 1);
3240 }
3241
3242 #[test]
3243 fn test_case_sensitive_search() {
3244 let temp = TempDir::new().unwrap();
3245 let project = temp.path().join("project");
3246 fs::create_dir(&project).unwrap();
3247
3248 fs::write(project.join("main.rs"), "fn Test() {}\nfn test() {}").unwrap();
3249
3250 let cache = CacheManager::new(&project);
3251 let indexer = Indexer::new(cache, IndexConfig::default());
3252 indexer.index(&project, false).unwrap();
3253
3254 let cache = CacheManager::new(&project);
3255
3256 let engine = QueryEngine::new(cache);
3257 let filter = QueryFilter::default();
3258
3259 let results = engine.search("Test", filter).unwrap();
3261 assert!(results.iter().any(|r| r.preview.contains("Test()")));
3262 }
3263
3264 #[test]
3267 fn test_results_sorted_deterministically() {
3268 let temp = TempDir::new().unwrap();
3269 let project = temp.path().join("project");
3270 fs::create_dir(&project).unwrap();
3271
3272 fs::write(project.join("a.rs"), "fn test() {}").unwrap();
3273 fs::write(project.join("z.rs"), "fn test() {}").unwrap();
3274 fs::write(project.join("m.rs"), "fn test() {}\nfn test2() {}").unwrap();
3275
3276 let cache = CacheManager::new(&project);
3277 let indexer = Indexer::new(cache, IndexConfig::default());
3278 indexer.index(&project, false).unwrap();
3279
3280 let cache = CacheManager::new(&project);
3281
3282 let engine = QueryEngine::new(cache);
3283 let filter = QueryFilter::default();
3284
3285 let results1 = engine.search("test", filter.clone()).unwrap();
3287 let results2 = engine.search("test", filter.clone()).unwrap();
3288 let results3 = engine.search("test", filter).unwrap();
3289
3290 assert_eq!(results1.len(), results2.len());
3292 assert_eq!(results1.len(), results3.len());
3293
3294 for i in 0..results1.len() {
3295 assert_eq!(results1[i].path, results2[i].path);
3296 assert_eq!(results1[i].path, results3[i].path);
3297 assert_eq!(results1[i].span.start_line, results2[i].span.start_line);
3298 assert_eq!(results1[i].span.start_line, results3[i].span.start_line);
3299 }
3300
3301 for i in 0..results1.len().saturating_sub(1) {
3303 let curr = &results1[i];
3304 let next = &results1[i + 1];
3305 assert!(
3306 curr.path < next.path
3307 || (curr.path == next.path && curr.span.start_line <= next.span.start_line)
3308 );
3309 }
3310 }
3311
3312 #[test]
3315 fn test_multiple_filters_combined() {
3316 let temp = TempDir::new().unwrap();
3317 let project = temp.path().join("project");
3318 fs::create_dir_all(project.join("src")).unwrap();
3319
3320 fs::write(project.join("src/main.rs"), "fn test() {}\nstruct Test {}").unwrap();
3321 fs::write(project.join("src/lib.rs"), "fn test() {}").unwrap();
3322 fs::write(project.join("test.js"), "function test() {}").unwrap();
3323
3324 let cache = CacheManager::new(&project);
3325 let indexer = Indexer::new(cache, IndexConfig::default());
3326 indexer.index(&project, false).unwrap();
3327
3328 let cache = CacheManager::new(&project);
3329
3330 let engine = QueryEngine::new(cache);
3331
3332 let filter = QueryFilter {
3334 language: Some(Language::Rust),
3335 kind: Some(SymbolKind::Function),
3336 file_pattern: Some("src/main".to_string()),
3337 symbols_mode: true,
3338 ..Default::default()
3339 };
3340 let results = engine.search("test", filter).unwrap();
3341
3342 assert_eq!(results.len(), 1);
3344 assert!(results[0].path.contains("src/main.rs"));
3345 assert_eq!(results[0].kind, SymbolKind::Function);
3346 }
3347
3348 #[test]
3351 fn test_find_symbol_helper() {
3352 let temp = TempDir::new().unwrap();
3353 let project = temp.path().join("project");
3354 fs::create_dir(&project).unwrap();
3355
3356 fs::write(project.join("main.rs"), "fn greet() {}").unwrap();
3357
3358 let cache = CacheManager::new(&project);
3359 let indexer = Indexer::new(cache, IndexConfig::default());
3360 indexer.index(&project, false).unwrap();
3361
3362 let cache = CacheManager::new(&project);
3363
3364 let engine = QueryEngine::new(cache);
3365 let results = engine.find_symbol("greet").unwrap();
3366
3367 assert!(results.len() >= 1);
3368 assert_eq!(results[0].kind, SymbolKind::Function);
3369 }
3370
3371 #[test]
3372 fn test_list_by_kind_helper() {
3373 let temp = TempDir::new().unwrap();
3374 let project = temp.path().join("project");
3375 fs::create_dir(&project).unwrap();
3376
3377 fs::write(
3378 project.join("main.rs"),
3379 "struct Point {}\nfn test() {}\nstruct Line {}",
3380 )
3381 .unwrap();
3382
3383 let cache = CacheManager::new(&project);
3384 let indexer = Indexer::new(cache, IndexConfig::default());
3385 indexer.index(&project, false).unwrap();
3386
3387 let cache = CacheManager::new(&project);
3388
3389 let engine = QueryEngine::new(cache);
3390
3391 let filter = QueryFilter {
3393 kind: Some(SymbolKind::Struct),
3394 symbols_mode: true,
3395 use_contains: true, ..Default::default()
3397 };
3398 let results = engine.search("oin", filter).unwrap();
3399
3400 assert!(results.len() >= 1, "Should find at least Point struct");
3402 assert!(results.iter().all(|r| r.kind == SymbolKind::Struct));
3403 assert!(results.iter().any(|r| r.symbol.as_deref() == Some("Point")));
3404 }
3405
3406 #[test]
3409 fn test_search_with_metadata() {
3410 let temp = TempDir::new().unwrap();
3411 let project = temp.path().join("project");
3412 fs::create_dir(&project).unwrap();
3413
3414 fs::write(project.join("main.rs"), "fn test() {}").unwrap();
3415
3416 let cache = CacheManager::new(&project);
3417 let indexer = Indexer::new(cache, IndexConfig::default());
3418 indexer.index(&project, false).unwrap();
3419
3420 let cache = CacheManager::new(&project);
3421
3422 let engine = QueryEngine::new(cache);
3423 let filter = QueryFilter::default();
3424 let response = engine.search_with_metadata("test", filter).unwrap();
3425
3426 assert!(response.results.len() >= 1);
3428 }
3430
3431 #[test]
3434 fn test_search_across_languages() {
3435 let temp = TempDir::new().unwrap();
3436 let project = temp.path().join("project");
3437 fs::create_dir(&project).unwrap();
3438
3439 fs::write(project.join("main.rs"), "fn greet() {}").unwrap();
3440 fs::write(project.join("main.ts"), "function greet() {}").unwrap();
3441 fs::write(project.join("main.py"), "def greet(): pass").unwrap();
3442
3443 let cache = CacheManager::new(&project);
3444 let indexer = Indexer::new(cache, IndexConfig::default());
3445 indexer.index(&project, false).unwrap();
3446
3447 let cache = CacheManager::new(&project);
3448
3449 let engine = QueryEngine::new(cache);
3450 let filter = QueryFilter::default();
3451 let results = engine.search("greet", filter).unwrap();
3452
3453 assert!(results.len() >= 3);
3455 assert!(results.iter().any(|r| r.lang == Language::Rust));
3456 assert!(results.iter().any(|r| r.lang == Language::TypeScript));
3457 assert!(results.iter().any(|r| r.lang == Language::Python));
3458 }
3459}