1pub mod filter;
7pub mod result;
8
9pub use filter::QueryFilter;
10
11use anyhow::{Context, Result};
12use regex::Regex;
13
14use crate::cache::CacheManager;
15use crate::content_store::ContentReader;
16use crate::models::{
17 IndexStatus, IndexWarning, IndexWarningDetails, Language, QueryResponse, SearchResult, Span,
18 SymbolKind,
19};
20use crate::output;
21use crate::parsers::ParserFactory;
22use crate::regex_trigrams::extract_trigrams_from_regex;
23use crate::trigram::TrigramIndex;
24
25pub struct QueryEngine {
27 cache: CacheManager,
28}
29
30impl QueryEngine {
31 pub fn new(cache: CacheManager) -> Self {
33 Self { cache }
34 }
35
36 fn load_dependencies(&self, results: &mut [SearchResult], include_deps: bool) -> Result<()> {
39 if !include_deps || results.is_empty() {
40 return Ok(());
41 }
42
43 log::debug!("Loading dependencies for {} results", results.len());
44
45 let workspace_root = self.cache.path().parent()
49 .ok_or_else(|| anyhow::anyhow!("Cache path has no parent"))?;
50 let cache_for_deps = CacheManager::new(workspace_root);
51 let dep_index = crate::dependency::DependencyIndex::new(cache_for_deps);
52
53 for result in results {
55 let normalized_path = result.path.strip_prefix("./").unwrap_or(&result.path);
57
58 match self.cache.get_file_id(normalized_path) {
60 Ok(Some(file_id)) => {
61 log::debug!("Found file_id={} for path={}", file_id, result.path);
62 match dep_index.get_dependencies_info(file_id) {
64 Ok(dep_infos) => {
65 log::debug!("Loaded {} dependencies for file_id={}", dep_infos.len(), file_id);
66 if !dep_infos.is_empty() {
67 result.dependencies = Some(dep_infos);
68 }
69 }
70 Err(e) => {
71 log::warn!("Failed to get dependencies for file_id={}: {}", file_id, e);
72 }
73 }
74 }
75 Ok(None) => {
76 log::warn!("No file_id found for path: {}", result.path);
77 }
78 Err(e) => {
79 log::warn!("Failed to get file_id for path {}: {}", result.path, e);
80 }
81 }
82 }
83
84 Ok(())
85 }
86
87 fn group_and_load_dependencies(
90 &self,
91 results: Vec<SearchResult>,
92 include_deps: bool,
93 ) -> Result<Vec<crate::models::FileGroupedResult>> {
94 use std::collections::HashMap;
95 use crate::models::{FileGroupedResult, MatchResult};
96
97 if results.is_empty() {
98 return Ok(Vec::new());
99 }
100
101 let mut grouped: HashMap<String, Vec<SearchResult>> = HashMap::new();
103 for result in results {
104 grouped
105 .entry(result.path.clone())
106 .or_default()
107 .push(result);
108 }
109
110 let dep_index = if include_deps {
112 let workspace_root = self.cache.path().parent()
113 .ok_or_else(|| anyhow::anyhow!("Cache path has no parent"))?;
114 let cache_for_deps = CacheManager::new(workspace_root);
115 Some(crate::dependency::DependencyIndex::new(cache_for_deps))
116 } else {
117 None
118 };
119
120 let content_path = self.cache.path().join("content.bin");
122 let content_reader_opt = ContentReader::open(&content_path).ok();
123
124 let mut file_results: Vec<FileGroupedResult> = grouped
126 .into_iter()
127 .map(|(path, file_matches)| {
128 let dependencies = if let Some(dep_idx) = &dep_index {
130 let normalized_path = path.strip_prefix("./").unwrap_or(&path);
131 match self.cache.get_file_id(normalized_path) {
132 Ok(Some(file_id)) => {
133 match dep_idx.get_dependencies_info(file_id) {
134 Ok(dep_infos) if !dep_infos.is_empty() => {
135 log::debug!("Loaded {} dependencies for file: {}", dep_infos.len(), path);
136 Some(dep_infos)
137 }
138 Ok(_) => None,
139 Err(e) => {
140 log::warn!("Failed to get dependencies for {}: {}", path, e);
141 None
142 }
143 }
144 }
145 Ok(None) => {
146 log::warn!("No file_id found for path: {}", path);
147 None
148 }
149 Err(e) => {
150 log::warn!("Failed to get file_id for path {}: {}", path, e);
151 None
152 }
153 }
154 } else {
155 None
156 };
157
158 let normalized_path = path.strip_prefix("./").unwrap_or(&path);
162 let file_id_for_context = if let Some(reader) = &content_reader_opt {
163 reader.get_file_id_by_path(normalized_path)
164 } else {
165 None
166 };
167 log::debug!("Context extraction: file={}, file_id={:?}, content_reader={}",
168 path, file_id_for_context, content_reader_opt.is_some());
169
170 let matches: Vec<MatchResult> = file_matches
172 .into_iter()
173 .map(|r| {
174 let (context_before, context_after) = if let (Some(reader), Some(fid)) = (&content_reader_opt, file_id_for_context) {
176 let result = reader.get_context_by_line(fid as u32, r.span.start_line, 3)
177 .unwrap_or_else(|e| {
178 log::warn!("Failed to extract context for {}:{}: {}", path, r.span.start_line, e);
179 (vec![], vec![])
180 });
181 log::debug!("Extracted context for {}:{} - before: {}, after: {}",
182 path, r.span.start_line, result.0.len(), result.1.len());
183 result
184 } else {
185 if content_reader_opt.is_none() {
186 log::debug!("No ContentReader available for context extraction");
187 }
188 if file_id_for_context.is_none() {
189 log::debug!("No file_id found for {}", path);
190 }
191 (vec![], vec![])
192 };
193
194 MatchResult {
195 kind: r.kind,
196 symbol: r.symbol,
197 span: r.span,
198 preview: r.preview,
199 context_before,
200 context_after,
201 }
202 })
203 .collect();
204
205 FileGroupedResult {
206 path,
207 dependencies,
208 matches,
209 }
210 })
211 .collect();
212
213 file_results.sort_by(|a, b| a.path.cmp(&b.path));
215
216 Ok(file_results)
217 }
218
219 pub fn search_with_metadata(&self, pattern: &str, filter: QueryFilter) -> Result<QueryResponse> {
224 log::info!("Executing query with metadata: pattern='{}', filter={:?}", pattern, filter);
225
226 if !self.cache.exists() {
228 anyhow::bail!(
229 "Index not found. Run 'rfx index' to build the cache first."
230 );
231 }
232
233 if let Err(e) = self.cache.validate() {
235 anyhow::bail!(
236 "Cache appears to be corrupted: {}. Run 'rfx clear' followed by 'rfx index' to rebuild.",
237 e
238 );
239 }
240
241 let (status, can_trust_results, warning) = self.get_index_status()?;
243
244 let (results, total) = self.search_internal(pattern, filter.clone())?;
246
247 use crate::models::PaginationInfo;
249 let pagination = PaginationInfo {
250 total,
251 count: results.len(),
252 offset: filter.offset.unwrap_or(0),
253 limit: filter.limit,
254 has_more: total > filter.offset.unwrap_or(0) + results.len(),
255 };
256
257 let grouped_results = self.group_and_load_dependencies(results, filter.include_dependencies)?;
260
261 Ok(QueryResponse {
262 ai_instruction: None, status,
264 can_trust_results,
265 warning,
266 pagination,
267 results: grouped_results,
268 })
269 }
270
271 pub fn search(&self, pattern: &str, filter: QueryFilter) -> Result<Vec<SearchResult>> {
276 log::info!("Executing query: pattern='{}', filter={:?}", pattern, filter);
277
278 if !self.cache.exists() {
280 anyhow::bail!(
281 "Index not found. Run 'rfx index' to build the cache first."
282 );
283 }
284
285 if let Err(e) = self.cache.validate() {
287 anyhow::bail!(
288 "Cache appears to be corrupted: {}. Run 'rfx clear' followed by 'rfx index' to rebuild.",
289 e
290 );
291 }
292
293 self.check_index_freshness(&filter)?;
295
296 let (mut results, _total_count) = self.search_internal(pattern, filter.clone())?;
298
299 self.load_dependencies(&mut results, filter.include_dependencies)?;
301
302 Ok(results)
303 }
304
305 fn search_internal(&self, pattern: &str, filter: QueryFilter) -> Result<(Vec<SearchResult>, usize)> {
308 use std::time::{Duration, Instant};
309
310 let start_time = Instant::now();
312 let timeout = if filter.timeout_secs > 0 {
313 Some(Duration::from_secs(filter.timeout_secs))
314 } else {
315 None
316 };
317
318 let is_keyword_query = if filter.symbols_mode || filter.kind.is_some() {
329 ParserFactory::get_all_keywords().contains(&pattern)
330 } else {
331 false
332 };
333
334 let mut filter = filter.clone(); if is_keyword_query && filter.kind.is_none() {
339 if let Some(inferred_kind) = Self::keyword_to_kind(pattern) {
340 log::info!("Keyword '{}' mapped to kind {:?} (auto-inferred)", pattern, inferred_kind);
341 filter.kind = Some(inferred_kind);
342 }
343 }
344
345 if !filter.force && !filter.use_regex && !is_keyword_query {
357 let stats = self.cache.stats()?;
358 let total_files = stats.total_files;
359 let pattern_len = pattern.chars().count();
360
361 let large_index_threshold = filter.test_large_index_threshold.unwrap_or(20_000);
366 let short_pattern_threshold = filter.test_short_pattern_threshold.unwrap_or(4);
367
368 if total_files > large_index_threshold && pattern_len < short_pattern_threshold {
369 anyhow::bail!(
370 "Query too broad - would be expensive to execute on this large index\n\
371 \n\
372 This index contains {} files, and pattern '{}' ({} characters) is too short for efficient searching.\n\
373 On large codebases, short patterns can take 10-30+ seconds to complete.\n\
374 \n\
375 This query could:\n\
376 • Hang for an extended period before returning results\n\
377 • Return thousands of results\n\
378 • Flood LLM context windows with excessive data\n\
379 • Fail entirely\n\
380 \n\
381 Suggestions to narrow the query:\n\
382 • Use a longer, more specific pattern (4+ characters recommended for large indexes)\n\
383 • Add a language filter: --lang <language>\n\
384 • Add a file filter: --glob <pattern> or --file <path>\n\
385 • Use --force to bypass this check if you really need all results\n\
386 \n\
387 To force execution anyway:\n\
388 rfx query \"{}\" --force",
389 total_files,
390 pattern,
391 pattern_len,
392 pattern
393 );
394 }
395 }
396
397 let mut results = if is_keyword_query {
399 if let Some(lang) = filter.language {
402 log::info!("Keyword query detected for '{}' - scanning all {:?} files (bypassing trigram search)",
403 pattern, lang);
404 } else {
405 log::info!("Keyword query detected for '{}' - scanning all files (bypassing trigram search)", pattern);
406 }
407 self.get_all_language_files(&filter)?
408 } else if filter.use_regex {
409 self.get_regex_candidates(pattern, timeout.as_ref(), &start_time, filter.suppress_output)?
411 } else {
412 self.get_trigram_candidates(pattern, &filter)?
414 };
415
416 if !is_keyword_query {
422 if let Some(lang) = filter.language {
423 let before_count = results.len();
424 results.retain(|r| r.lang == lang);
425 log::debug!(
426 "Language filter ({:?}): reduced {} candidates to {} candidates",
427 lang,
428 before_count,
429 results.len()
430 );
431 }
432 }
433
434 if !filter.glob_patterns.is_empty() || !filter.exclude_patterns.is_empty() {
438 use globset::{Glob, GlobSetBuilder};
439
440 let include_matcher = if !filter.glob_patterns.is_empty() {
442 let mut builder = GlobSetBuilder::new();
443 for pattern in &filter.glob_patterns {
444 let normalized = Self::normalize_glob_pattern(pattern);
446 match Glob::new(&normalized) {
447 Ok(glob) => {
448 builder.add(glob);
449 }
450 Err(e) => {
451 log::warn!("Invalid glob pattern '{}': {}", pattern, e);
452 }
453 }
454 }
455 match builder.build() {
456 Ok(matcher) => Some(matcher),
457 Err(e) => {
458 log::warn!("Failed to build glob matcher: {}", e);
459 None
460 }
461 }
462 } else {
463 None
464 };
465
466 let exclude_matcher = if !filter.exclude_patterns.is_empty() {
468 let mut builder = GlobSetBuilder::new();
469 for pattern in &filter.exclude_patterns {
470 let normalized = Self::normalize_glob_pattern(pattern);
472 match Glob::new(&normalized) {
473 Ok(glob) => {
474 builder.add(glob);
475 }
476 Err(e) => {
477 log::warn!("Invalid exclude pattern '{}': {}", pattern, e);
478 }
479 }
480 }
481 match builder.build() {
482 Ok(matcher) => Some(matcher),
483 Err(e) => {
484 log::warn!("Failed to build exclude matcher: {}", e);
485 None
486 }
487 }
488 } else {
489 None
490 };
491
492 let before_count = results.len();
494 results.retain(|r| {
495 let included = if let Some(ref matcher) = include_matcher {
497 matcher.is_match(&r.path)
498 } else {
499 true };
501
502 let excluded = if let Some(ref matcher) = exclude_matcher {
504 matcher.is_match(&r.path)
505 } else {
506 false };
508
509 included && !excluded
510 });
511 log::debug!(
512 "Glob filter: reduced {} candidates to {} candidates",
513 before_count,
514 results.len()
515 );
516 }
517
518 if let Some(timeout_duration) = timeout {
520 if start_time.elapsed() > timeout_duration {
521 anyhow::bail!(
522 "Query timeout exceeded ({} seconds).\n\
523 \n\
524 The query took too long to complete. Try one of these approaches:\n\
525 • Use a more specific search pattern (longer patterns = faster search)\n\
526 • Add a language filter with --lang to narrow the search space\n\
527 • Add a file filter with --file to search specific directories\n\
528 • Increase the timeout with --timeout <seconds>\n\
529 \n\
530 Example: rfx query \"{}\" --lang rust --timeout 60",
531 filter.timeout_secs,
532 pattern
533 );
534 }
535 }
536
537 if !filter.force {
540 let candidate_count = results.len();
541 let pattern_len = pattern.chars().count();
542
543 let is_short_pattern = pattern_len < 3 && !filter.use_regex && !is_keyword_query;
546
547 let is_broad_ast = filter.use_ast && filter.glob_patterns.is_empty() && candidate_count >= 100;
550
551 let threshold = if filter.use_ast && filter.glob_patterns.is_empty() {
558 100 } else if filter.use_ast {
560 10_000 } else if is_keyword_query {
562 20_000 } else {
564 50_000 };
566
567 let has_many_candidates = candidate_count > threshold &&
568 (filter.symbols_mode || filter.kind.is_some() || filter.use_ast);
569
570 if is_short_pattern || has_many_candidates || is_broad_ast {
571 let reason = if is_short_pattern {
572 format!("Pattern '{}' is too short ({} characters). Short patterns bypass trigram optimization and require scanning many files.", pattern, pattern_len)
573 } else if is_broad_ast {
574 format!("AST query without --glob restriction will scan the entire codebase ({} files). AST queries are SLOW (500ms-10s+).", candidate_count)
575 } else if is_keyword_query {
576 format!("Keyword query '{}' matched {} files. This query scans all files of the target language, which will take significant time and produce excessive results.", pattern, candidate_count)
577 } else {
578 format!("Query matched {} files. Parsing this many files with --symbols or --kind will take significant time and produce excessive results.", candidate_count)
579 };
580
581 let suggestions = if is_short_pattern {
582 vec![
583 "• Use a longer, more specific pattern (3+ characters recommended)",
584 "• Add a language filter: --lang <language>",
585 "• Add a file path filter: --file <path> or --glob <pattern>",
586 "• Use --force to bypass this check if you really need all results"
587 ]
588 } else if is_broad_ast {
589 vec![
590 "• Add --glob to restrict AST query to specific files: --glob 'src/**/*.rs'",
591 "• Use --symbols instead (10-100x faster in 95% of cases)",
592 "• Use --force to bypass this check if you need a full codebase scan"
593 ]
594 } else if is_keyword_query {
595 vec![
596 "• Add a language filter to reduce files scanned: --lang <language>",
597 "• Add glob patterns to search specific directories: --glob 'src/**/*.rs'",
598 "• Add --kind to filter to specific symbol types: --kind function",
599 "• Use a more specific pattern instead of a keyword",
600 "• Use --force to bypass this check if you need all results"
601 ]
602 } else {
603 vec![
604 "• Add a language filter to reduce candidate set: --lang <language>",
605 "• Add glob patterns to search specific directories: --glob 'src/**/*.rs'",
606 "• Use a more specific search pattern",
607 "• Use --force to bypass this check if you need all results"
608 ]
609 };
610
611 let mut cmd_flags = String::new();
613 if filter.symbols_mode {
614 cmd_flags.push_str("--symbols ");
615 }
616 if let Some(ref lang) = filter.language {
617 cmd_flags.push_str(&format!("--lang {:?} ", lang));
618 }
619 if let Some(ref kind) = filter.kind {
620 cmd_flags.push_str(&format!("--kind {:?} ", kind));
621 }
622 if filter.use_ast {
623 cmd_flags.push_str("--ast ");
624 }
625
626 anyhow::bail!(
627 "Query too broad - would be expensive to execute\n\
628 \n\
629 {}\n\
630 \n\
631 This query could:\n\
632 • Hang for an extended period before returning results\n\
633 • Return thousands of results\n\
634 • Flood LLM context windows with excessive data\n\
635 • Fail entirely\n\
636 \n\
637 Suggestions to narrow the query:\n\
638 {}\n\
639 \n\
640 To force execution anyway:\n\
641 rfx query \"{}\" --force {}",
642 reason,
643 suggestions.join("\n "),
644 pattern,
645 cmd_flags
646 );
647 }
648 }
649
650 if filter.symbols_mode || filter.kind.is_some() || filter.use_ast {
653 results.sort_by(|a, b| {
654 a.path.cmp(&b.path)
655 .then_with(|| a.span.start_line.cmp(&b.span.start_line))
656 });
657
658 let candidate_count = results.len();
660 if candidate_count > 1000 && !filter.suppress_output {
661 output::warn(&format!(
662 "Pattern '{}' matched {} files - parsing may take some time. Consider using --file, --glob, or a more specific pattern to narrow the search.",
663 pattern,
664 candidate_count
665 ));
666 } else if candidate_count > 100 {
667 log::info!("Parsing {} candidate files for symbol extraction", candidate_count);
668 }
669 }
670
671 if filter.use_ast {
673 results = self.enrich_with_ast(results, pattern, filter.language)?;
675 } else if filter.symbols_mode || filter.kind.is_some() {
676 results = self.enrich_with_symbols(results, pattern, &filter)?;
678 }
679
680 if let Some(ref kind) = filter.kind {
687 results.retain(|r| {
688 if matches!(kind, SymbolKind::Function) {
689 matches!(r.kind, SymbolKind::Function | SymbolKind::Method)
691 } else {
692 r.kind == *kind
693 }
694 });
695 }
696
697 if let Some(ref file_pattern) = filter.file_pattern {
699 results.retain(|r| r.path.contains(file_pattern));
700 }
701
702 if filter.exact && filter.symbols_mode {
704 results.retain(|r| r.symbol.as_deref() == Some(pattern));
705 }
706
707 if filter.expand {
710 let content_path = self.cache.path().join("content.bin");
712 if let Ok(content_reader) = ContentReader::open(&content_path) {
713 for result in &mut results {
714 if result.span.start_line < result.span.end_line {
716 if let Some(file_id) = Self::find_file_id(&content_reader, &result.path) {
718 if let Ok(content) = content_reader.get_file_content(file_id) {
720 let lines: Vec<&str> = content.lines().collect();
721 let start_idx = (result.span.start_line as usize).saturating_sub(1);
722 let end_idx = (result.span.end_line as usize).min(lines.len());
723
724 if start_idx < end_idx {
725 let full_body = lines[start_idx..end_idx].join("\n");
726 result.preview = full_body;
727 }
728 }
729 }
730 }
731 }
732 }
733 }
734
735 if filter.paths_only {
737 use std::collections::HashSet;
738 let mut seen_paths = HashSet::new();
739 results.retain(|r| seen_paths.insert(r.path.clone()));
740 }
741
742 results.sort_by(|a, b| {
744 a.path.cmp(&b.path)
745 .then_with(|| a.span.start_line.cmp(&b.span.start_line))
746 });
747
748 let total_count = results.len();
751
752 if let Some(offset) = filter.offset {
754 if offset < results.len() {
755 results = results.into_iter().skip(offset).collect();
756 } else {
757 results.clear();
759 }
760 }
761
762 if let Some(limit) = filter.limit {
764 results.truncate(limit);
765 }
766
767 log::info!("Query returned {} results (total before pagination: {})", results.len(), total_count);
768
769 Ok((results, total_count))
770 }
771
772 pub fn find_symbol(&self, name: &str) -> Result<Vec<SearchResult>> {
774 let filter = QueryFilter {
775 symbols_mode: true,
776 ..Default::default()
777 };
778 self.search(name, filter)
779 }
780
781 pub fn search_ast(&self, pattern: &str, lang: Option<Language>) -> Result<Vec<SearchResult>> {
783 let filter = QueryFilter {
784 language: lang,
785 use_ast: true,
786 ..Default::default()
787 };
788
789 self.search(pattern, filter)
790 }
791
792 pub fn search_ast_all_files(&self, ast_pattern: &str, filter: QueryFilter) -> Result<Vec<SearchResult>> {
813 log::info!("Executing AST query on all files: pattern='{}', filter={:?}", ast_pattern, filter);
814
815 let lang = filter.language.ok_or_else(|| anyhow::anyhow!(
817 "Language must be specified for AST pattern matching. Use --lang to specify the language.\n\
818 \n\
819 Example: rfx query \"(function_definition) @fn\" --ast --lang python"
820 ))?;
821
822 if !self.cache.exists() {
824 anyhow::bail!(
825 "Index not found. Run 'rfx index' to build the cache first."
826 );
827 }
828
829 self.check_index_freshness(&filter)?;
831
832 let content_path = self.cache.path().join("content.bin");
834 let content_reader = ContentReader::open(&content_path)
835 .context("Failed to open content store")?;
836
837 use globset::{Glob, GlobSetBuilder};
839
840 let include_matcher = if !filter.glob_patterns.is_empty() {
841 let mut builder = GlobSetBuilder::new();
842 for pattern in &filter.glob_patterns {
843 let normalized = Self::normalize_glob_pattern(pattern);
845 if let Ok(glob) = Glob::new(&normalized) {
846 builder.add(glob);
847 }
848 }
849 builder.build().ok()
850 } else {
851 None
852 };
853
854 let exclude_matcher = if !filter.exclude_patterns.is_empty() {
855 let mut builder = GlobSetBuilder::new();
856 for pattern in &filter.exclude_patterns {
857 let normalized = Self::normalize_glob_pattern(pattern);
859 if let Ok(glob) = Glob::new(&normalized) {
860 builder.add(glob);
861 }
862 }
863 builder.build().ok()
864 } else {
865 None
866 };
867
868 let mut candidates: Vec<SearchResult> = Vec::new();
870
871 for file_id in 0..content_reader.file_count() {
872 let file_path = match content_reader.get_file_path(file_id as u32) {
873 Some(p) => p,
874 None => continue,
875 };
876
877 let ext = file_path.extension()
879 .and_then(|e| e.to_str())
880 .unwrap_or("");
881 let detected_lang = Language::from_extension(ext);
882
883 if detected_lang != lang {
885 continue;
886 }
887
888 let file_path_str = file_path.to_string_lossy().to_string();
889
890 let included = include_matcher.as_ref().map_or(true, |m| m.is_match(&file_path_str));
892 let excluded = exclude_matcher.as_ref().map_or(false, |m| m.is_match(&file_path_str));
893
894 if !included || excluded {
895 continue;
896 }
897
898 candidates.push(SearchResult {
900 path: file_path_str,
901 lang: detected_lang,
902 span: Span { start_line: 1, end_line: 1 },
903 symbol: None,
904 kind: SymbolKind::Unknown("ast_query".to_string()),
905 preview: String::new(),
906 dependencies: None,
907 });
908 }
909
910 log::info!("AST query scanning {} files for language {:?}", candidates.len(), lang);
911
912 if !filter.force && filter.glob_patterns.is_empty() && candidates.len() >= 100 {
915 anyhow::bail!(
916 "Query too broad - would be expensive to execute\n\
917 \n\
918 AST query without --glob restriction will scan the ENTIRE codebase ({} files). AST queries are SLOW (500ms-10s+).\n\
919 \n\
920 This query could:\n\
921 • Hang for an extended period before returning results\n\
922 • Return thousands of results\n\
923 • Flood LLM context windows with excessive data\n\
924 • Fail entirely\n\
925 \n\
926 Suggestions to narrow the query:\n\
927 • Add --glob to restrict AST query to specific files: --glob 'src/**/*.rs'\n\
928 • Use --symbols instead (10-100x faster in 95% of cases)\n\
929 • Use --force to bypass this check if you need a full codebase scan\n\
930 \n\
931 To force execution anyway:\n\
932 rfx query \"{}\" --force --ast --lang {:?}",
933 candidates.len(),
934 ast_pattern,
935 lang
936 );
937 }
938
939 if candidates.is_empty() {
940 if !filter.suppress_output {
941 output::warn(&format!("No files found for language {:?}. Check your language filter or glob patterns.", lang));
942 }
943 return Ok(Vec::new());
944 }
945
946 let mut results = self.enrich_with_ast(candidates, ast_pattern, filter.language)?;
949
950 log::debug!("AST query found {} matches before filtering", results.len());
951
952 if let Some(ref kind) = filter.kind {
956 results.retain(|r| {
957 if matches!(kind, SymbolKind::Function) {
958 matches!(r.kind, SymbolKind::Function | SymbolKind::Method)
959 } else {
960 r.kind == *kind
961 }
962 });
963 }
964
965 if filter.expand {
969 let content_path = self.cache.path().join("content.bin");
970 if let Ok(content_reader) = ContentReader::open(&content_path) {
971 for result in &mut results {
972 if result.span.start_line < result.span.end_line {
973 if let Some(file_id) = Self::find_file_id(&content_reader, &result.path) {
974 if let Ok(content) = content_reader.get_file_content(file_id) {
975 let lines: Vec<&str> = content.lines().collect();
976 let start_idx = (result.span.start_line as usize).saturating_sub(1);
977 let end_idx = (result.span.end_line as usize).min(lines.len());
978
979 if start_idx < end_idx {
980 let full_body = lines[start_idx..end_idx].join("\n");
981 result.preview = full_body;
982 }
983 }
984 }
985 }
986 }
987 }
988 }
989
990 if filter.paths_only {
992 use std::collections::HashSet;
993 let mut seen_paths = HashSet::new();
994 results.retain(|r| seen_paths.insert(r.path.clone()));
995 }
996
997 results.sort_by(|a, b| {
999 a.path.cmp(&b.path)
1000 .then_with(|| a.span.start_line.cmp(&b.span.start_line))
1001 });
1002
1003 if let Some(offset) = filter.offset {
1005 if offset < results.len() {
1006 results = results.into_iter().skip(offset).collect();
1007 } else {
1008 results.clear();
1009 }
1010 }
1011
1012 if let Some(limit) = filter.limit {
1014 results.truncate(limit);
1015 }
1016
1017 log::info!("AST query returned {} results", results.len());
1018
1019 self.load_dependencies(&mut results, filter.include_dependencies)?;
1021
1022 Ok(results)
1023 }
1024
1025 pub fn search_ast_with_text_filter(
1037 &self,
1038 text_pattern: &str,
1039 ast_pattern: &str,
1040 filter: QueryFilter,
1041 ) -> Result<Vec<SearchResult>> {
1042 log::info!("Executing AST query with text filter: text='{}', ast='{}', filter={:?}",
1043 text_pattern, ast_pattern, filter);
1044
1045 if !self.cache.exists() {
1047 anyhow::bail!(
1048 "Index not found. Run 'rfx index' to build the cache first."
1049 );
1050 }
1051
1052 self.check_index_freshness(&filter)?;
1054
1055 use std::time::{Duration, Instant};
1057 let start_time = Instant::now();
1058 let timeout = if filter.timeout_secs > 0 {
1059 Some(Duration::from_secs(filter.timeout_secs))
1060 } else {
1061 None
1062 };
1063
1064 let candidates = if filter.use_regex {
1066 self.get_regex_candidates(text_pattern, timeout.as_ref(), &start_time, filter.suppress_output)?
1067 } else {
1068 self.get_trigram_candidates(text_pattern, &filter)?
1069 };
1070
1071 log::debug!("Phase 1 found {} candidate locations", candidates.len());
1072
1073 let mut results = self.enrich_with_ast(candidates, ast_pattern, filter.language)?;
1075
1076 log::debug!("Phase 2 AST matching found {} results", results.len());
1077
1078 if let Some(lang) = filter.language {
1080 results.retain(|r| r.lang == lang);
1081 }
1082
1083 if let Some(ref kind) = filter.kind {
1084 results.retain(|r| {
1085 if matches!(kind, SymbolKind::Function) {
1086 matches!(r.kind, SymbolKind::Function | SymbolKind::Method)
1087 } else {
1088 r.kind == *kind
1089 }
1090 });
1091 }
1092
1093 if let Some(ref file_pattern) = filter.file_pattern {
1094 results.retain(|r| r.path.contains(file_pattern));
1095 }
1096
1097 if !filter.glob_patterns.is_empty() || !filter.exclude_patterns.is_empty() {
1099 use globset::{Glob, GlobSetBuilder};
1100
1101 let include_matcher = if !filter.glob_patterns.is_empty() {
1102 let mut builder = GlobSetBuilder::new();
1103 for pattern in &filter.glob_patterns {
1104 let normalized = Self::normalize_glob_pattern(pattern);
1106 if let Ok(glob) = Glob::new(&normalized) {
1107 builder.add(glob);
1108 }
1109 }
1110 builder.build().ok()
1111 } else {
1112 None
1113 };
1114
1115 let exclude_matcher = if !filter.exclude_patterns.is_empty() {
1116 let mut builder = GlobSetBuilder::new();
1117 for pattern in &filter.exclude_patterns {
1118 let normalized = Self::normalize_glob_pattern(pattern);
1120 if let Ok(glob) = Glob::new(&normalized) {
1121 builder.add(glob);
1122 }
1123 }
1124 builder.build().ok()
1125 } else {
1126 None
1127 };
1128
1129 results.retain(|r| {
1130 let included = include_matcher.as_ref().map_or(true, |m| m.is_match(&r.path));
1131 let excluded = exclude_matcher.as_ref().map_or(false, |m| m.is_match(&r.path));
1132 included && !excluded
1133 });
1134 }
1135
1136 if filter.exact && filter.symbols_mode {
1137 results.retain(|r| r.symbol.as_deref() == Some(text_pattern));
1138 }
1139
1140 if filter.expand {
1142 let content_path = self.cache.path().join("content.bin");
1143 if let Ok(content_reader) = ContentReader::open(&content_path) {
1144 for result in &mut results {
1145 if result.span.start_line < result.span.end_line {
1146 if let Some(file_id) = Self::find_file_id(&content_reader, &result.path) {
1147 if let Ok(content) = content_reader.get_file_content(file_id) {
1148 let lines: Vec<&str> = content.lines().collect();
1149 let start_idx = (result.span.start_line as usize).saturating_sub(1);
1150 let end_idx = (result.span.end_line as usize).min(lines.len());
1151
1152 if start_idx < end_idx {
1153 let full_body = lines[start_idx..end_idx].join("\n");
1154 result.preview = full_body;
1155 }
1156 }
1157 }
1158 }
1159 }
1160 }
1161 }
1162
1163 results.sort_by(|a, b| {
1165 a.path.cmp(&b.path)
1166 .then_with(|| a.span.start_line.cmp(&b.span.start_line))
1167 });
1168
1169 if let Some(offset) = filter.offset {
1171 if offset < results.len() {
1172 results = results.into_iter().skip(offset).collect();
1173 } else {
1174 results.clear();
1175 }
1176 }
1177
1178 if let Some(limit) = filter.limit {
1180 results.truncate(limit);
1181 }
1182
1183 log::info!("AST query returned {} results", results.len());
1184
1185 Ok(results)
1186 }
1187
1188 pub fn list_by_kind(&self, kind: SymbolKind) -> Result<Vec<SearchResult>> {
1190 let filter = QueryFilter {
1191 kind: Some(kind),
1192 symbols_mode: true,
1193 ..Default::default()
1194 };
1195
1196 self.search("*", filter)
1197 }
1198
1199 fn enrich_with_symbols(&self, candidates: Vec<SearchResult>, pattern: &str, filter: &QueryFilter) -> Result<Vec<SearchResult>> {
1220 let content_path = self.cache.path().join("content.bin");
1222 let content_reader = ContentReader::open(&content_path)
1223 .context("Failed to open content store")?;
1224
1225 let trigrams_path = self.cache.path().join("trigrams.bin");
1227 let trigram_index = if trigrams_path.exists() {
1228 TrigramIndex::load(&trigrams_path)?
1229 } else {
1230 Self::rebuild_trigram_index(&content_reader)?
1231 };
1232
1233 let symbol_cache = crate::symbol_cache::SymbolCache::open(self.cache.path())
1235 .context("Failed to open symbol cache")?;
1236
1237 let root = self.cache.workspace_root();
1239 let branch = crate::git::get_current_branch(&root)
1240 .unwrap_or_else(|_| "_default".to_string());
1241 let file_hashes = self.cache.load_hashes_for_branch(&branch)
1242 .context("Failed to load file hashes")?;
1243 log::debug!("Loaded {} file hashes for branch '{}' for symbol cache lookups", file_hashes.len(), branch);
1244
1245 use std::collections::HashMap;
1247 let mut files_by_path: HashMap<String, Vec<SearchResult>> = HashMap::new();
1248 let mut skipped_unsupported = 0;
1249
1250 for candidate in candidates {
1251 if !candidate.lang.is_supported() {
1253 skipped_unsupported += 1;
1254 continue;
1255 }
1256
1257 files_by_path
1258 .entry(candidate.path.clone())
1259 .or_insert_with(Vec::new)
1260 .push(candidate);
1261 }
1262
1263 let total_files = files_by_path.len();
1264 log::debug!("Processing {} candidate files for symbol enrichment (skipped {} unsupported language files)",
1265 total_files, skipped_unsupported);
1266
1267 if total_files > 1000 && !filter.suppress_output {
1269 output::warn(&format!(
1270 "Pattern '{}' matched {} files. This may take some time to parse. Consider using a more specific pattern or adding --lang/--file filters to narrow the search.",
1271 pattern,
1272 total_files
1273 ));
1274 }
1275
1276 let mut files_to_process: Vec<String> = files_by_path.keys().cloned().collect();
1278
1279 let mut files_to_skip: std::collections::HashSet<String> = std::collections::HashSet::new();
1282
1283 for file_path in &files_to_process {
1284 let ext = std::path::Path::new(file_path)
1286 .extension()
1287 .and_then(|e| e.to_str())
1288 .unwrap_or("");
1289 let lang = Language::from_extension(ext);
1290
1291 if let Some(line_filter) = crate::line_filter::get_filter(lang) {
1293 let file_id = match Self::find_file_id_by_path(&content_reader, &trigram_index, file_path) {
1295 Some(id) => id,
1296 None => continue,
1297 };
1298
1299 let content = match content_reader.get_file_content(file_id) {
1301 Ok(c) => c,
1302 Err(_) => continue,
1303 };
1304
1305 let mut all_in_non_code = true;
1307 for line in content.lines() {
1308 let mut search_start = 0;
1310 while let Some(pos) = line[search_start..].find(pattern) {
1311 let absolute_pos = search_start + pos;
1312
1313 let in_comment = line_filter.is_in_comment(line, absolute_pos);
1315 let in_string = line_filter.is_in_string(line, absolute_pos);
1316
1317 if !in_comment && !in_string {
1318 all_in_non_code = false;
1320 break;
1321 }
1322
1323 search_start = absolute_pos + pattern.len();
1324 }
1325
1326 if !all_in_non_code {
1327 break;
1328 }
1329 }
1330
1331 if all_in_non_code {
1333 if content.contains(pattern) {
1335 files_to_skip.insert(file_path.clone());
1336 log::debug!("Pre-filter: Skipping {} (all matches in comments/strings)", file_path);
1337 }
1338 }
1339 }
1340 }
1341
1342 files_to_process.retain(|path| !files_to_skip.contains(path));
1344
1345 log::debug!("Pre-filter: Skipped {} files where all matches are in comments/strings (parsing {} files)",
1346 files_to_skip.len(), files_to_process.len());
1347
1348 let num_threads = {
1350 let available_cores = std::thread::available_parallelism()
1351 .map(|n| n.get())
1352 .unwrap_or(4);
1353 ((available_cores as f64 * 0.8).ceil() as usize).max(1).min(8)
1356 };
1357
1358 log::debug!("Using {} threads for parallel symbol extraction (out of {} available cores)",
1359 num_threads,
1360 std::thread::available_parallelism().map(|n| n.get()).unwrap_or(4));
1361
1362 let pool = rayon::ThreadPoolBuilder::new()
1364 .num_threads(num_threads)
1365 .build()
1366 .context("Failed to create thread pool for symbol extraction")?;
1367
1368 let files_with_hashes: Vec<String> = files_to_process
1373 .iter()
1374 .filter(|path| file_hashes.contains_key(path.as_str()))
1375 .cloned()
1376 .collect();
1377
1378 let file_id_map = self.cache.batch_get_file_ids(&files_with_hashes)
1380 .context("Failed to batch lookup file IDs")?;
1381
1382 let file_lookup_tuples: Vec<(i64, String, String)> = files_with_hashes
1384 .iter()
1385 .filter_map(|path| {
1386 let file_id = file_id_map.get(path)?;
1387 let hash = file_hashes.get(path.as_str())?;
1388 Some((*file_id, hash.clone(), path.clone()))
1389 })
1390 .collect();
1391
1392 let batch_results = symbol_cache.batch_get_with_kind(&file_lookup_tuples, filter.kind.clone())
1394 .context("Failed to batch read symbol cache")?;
1395
1396 let mut cached_symbols: HashMap<String, Vec<SearchResult>> = HashMap::new();
1398 let mut files_needing_parse: Vec<String> = Vec::new();
1399
1400 let id_to_path: HashMap<i64, String> = file_id_map
1402 .iter()
1403 .map(|(path, id)| (*id, path.clone()))
1404 .collect();
1405
1406 for (file_id, symbols) in batch_results {
1408 if let Some(file_path) = id_to_path.get(&file_id) {
1409 cached_symbols.insert(file_path.clone(), symbols);
1410 }
1411 }
1412
1413 for path in &files_with_hashes {
1415 if file_id_map.contains_key(path) && !cached_symbols.contains_key(path) {
1416 files_needing_parse.push(path.clone());
1417 }
1418 }
1419
1420 for file_path in &files_to_process {
1422 if !file_hashes.contains_key(file_path.as_str()) {
1423 files_needing_parse.push(file_path.clone());
1424 }
1425 }
1426
1427 log::debug!(
1428 "Symbol cache: {} hits, {} need parsing",
1429 cached_symbols.len(),
1430 files_needing_parse.len()
1431 );
1432
1433 use rayon::prelude::*;
1435
1436 let parsed_symbols: Vec<SearchResult> = pool.install(|| {
1437 files_needing_parse
1438 .par_iter()
1439 .flat_map(|file_path| {
1440 let file_id = match Self::find_file_id_by_path(&content_reader, &trigram_index, file_path) {
1442 Some(id) => id,
1443 None => {
1444 log::warn!("Could not find file_id for path: {}", file_path);
1445 return Vec::new();
1446 }
1447 };
1448
1449 let content = match content_reader.get_file_content(file_id) {
1450 Ok(c) => c,
1451 Err(e) => {
1452 log::warn!("Failed to read file {}: {}", file_path, e);
1453 return Vec::new();
1454 }
1455 };
1456
1457 let ext = std::path::Path::new(file_path)
1459 .extension()
1460 .and_then(|e| e.to_str())
1461 .unwrap_or("");
1462 let lang = Language::from_extension(ext);
1463
1464 let symbols = match ParserFactory::parse(file_path, content, lang) {
1466 Ok(symbols) => {
1467 log::debug!("Parsed {} symbols from {}", symbols.len(), file_path);
1468 symbols
1469 }
1470 Err(e) => {
1471 log::debug!("Failed to parse {}: {}", file_path, e);
1472 Vec::new()
1473 }
1474 };
1475
1476 if let Some(file_hash) = file_hashes.get(file_path.as_str()) {
1478 if let Err(e) = symbol_cache.set(file_path, file_hash, &symbols) {
1479 log::debug!("Failed to cache symbols for {}: {}", file_path, e);
1480 }
1481 }
1482
1483 symbols
1484 })
1485 .collect()
1486 });
1487
1488 let mut all_symbols: Vec<SearchResult> = Vec::new();
1490
1491 for symbols in cached_symbols.values() {
1493 all_symbols.extend_from_slice(symbols);
1494 }
1495
1496 all_symbols.extend(parsed_symbols);
1498
1499 let is_keyword_query = {
1507 let lang_to_check = if let Some(lang) = filter.language {
1509 vec![lang]
1512 } else {
1513 let mut langs: Vec<Language> = all_symbols.iter()
1517 .map(|s| s.lang)
1518 .collect::<Vec<_>>();
1519 langs.sort_by(|a, b| format!("{:?}", a).cmp(&format!("{:?}", b))); langs.dedup(); langs
1522 };
1523
1524 lang_to_check.iter().any(|lang| {
1526 ParserFactory::get_keywords(*lang).contains(&pattern)
1527 })
1528 };
1529
1530 let filtered: Vec<SearchResult> = if is_keyword_query {
1533 log::info!("Pattern '{}' is a language keyword - listing all symbols (kind filtering will be applied in Phase 3)", pattern);
1534 all_symbols
1535 } else if filter.use_regex {
1536 use std::collections::{HashMap, HashSet};
1542 let mut candidate_lines: HashMap<String, HashSet<usize>> = HashMap::new();
1543 for candidate in &files_by_path {
1544 for cand in candidate.1 {
1545 candidate_lines
1546 .entry(candidate.0.clone())
1547 .or_insert_with(HashSet::new)
1548 .insert(cand.span.start_line);
1549 }
1550 }
1551
1552 all_symbols
1554 .into_iter()
1555 .filter(|sym| {
1556 if let Some(lines) = candidate_lines.get(&sym.path) {
1557 for line in sym.span.start_line..=sym.span.end_line {
1559 if lines.contains(&line) {
1560 return true;
1561 }
1562 }
1563 }
1564 false
1565 })
1566 .collect()
1567 } else if filter.use_contains {
1568 all_symbols
1570 .into_iter()
1571 .filter(|sym| sym.symbol.as_deref().map_or(false, |s| s.contains(pattern)))
1572 .collect()
1573 } else {
1574 all_symbols
1576 .into_iter()
1577 .filter(|sym| sym.symbol.as_deref().map_or(false, |s| s == pattern))
1578 .collect()
1579 };
1580
1581 log::info!("Symbol enrichment found {} matches for pattern '{}'", filtered.len(), pattern);
1582
1583 Ok(filtered)
1584 }
1585
1586 fn enrich_with_ast(&self, candidates: Vec<SearchResult>, ast_pattern: &str, language: Option<Language>) -> Result<Vec<SearchResult>> {
1605 let lang = language.ok_or_else(|| anyhow::anyhow!(
1607 "Language must be specified for AST pattern matching. Use --lang to specify the language."
1608 ))?;
1609
1610 let content_path = self.cache.path().join("content.bin");
1612 let content_reader = ContentReader::open(&content_path)
1613 .context("Failed to open content store")?;
1614
1615 let trigrams_path = self.cache.path().join("trigrams.bin");
1617 let trigram_index = if trigrams_path.exists() {
1618 TrigramIndex::load(&trigrams_path)?
1619 } else {
1620 Self::rebuild_trigram_index(&content_reader)?
1621 };
1622
1623 use std::collections::HashMap;
1625 let mut file_contents: HashMap<String, String> = HashMap::new();
1626
1627 for candidate in &candidates {
1628 if file_contents.contains_key(&candidate.path) {
1629 continue;
1630 }
1631
1632 let file_id = match Self::find_file_id_by_path(&content_reader, &trigram_index, &candidate.path) {
1634 Some(id) => id,
1635 None => {
1636 log::warn!("Could not find file_id for path: {}", candidate.path);
1637 continue;
1638 }
1639 };
1640
1641 let content = match content_reader.get_file_content(file_id) {
1643 Ok(c) => c,
1644 Err(e) => {
1645 log::warn!("Failed to read file {}: {}", candidate.path, e);
1646 continue;
1647 }
1648 };
1649
1650 file_contents.insert(candidate.path.clone(), content.to_string());
1651 }
1652
1653 log::debug!("Executing AST query on {} candidate files with language {:?}", file_contents.len(), lang);
1654
1655 let results = crate::ast_query::execute_ast_query(candidates, ast_pattern, lang, &file_contents)?;
1657
1658 log::info!("AST query found {} matches for pattern '{}'", results.len(), ast_pattern);
1659
1660 Ok(results)
1661 }
1662
1663 fn find_file_id_by_path(
1665 content_reader: &ContentReader,
1666 trigram_index: &TrigramIndex,
1667 target_path: &str,
1668 ) -> Option<u32> {
1669 for file_id in 0..trigram_index.file_count() {
1671 if let Some(path) = trigram_index.get_file(file_id as u32) {
1672 if path.to_string_lossy() == target_path {
1673 return Some(file_id as u32);
1674 }
1675 }
1676 }
1677
1678 for file_id in 0..content_reader.file_count() {
1680 if let Some(path) = content_reader.get_file_path(file_id as u32) {
1681 if path.to_string_lossy() == target_path {
1682 return Some(file_id as u32);
1683 }
1684 }
1685 }
1686
1687 None
1688 }
1689
1690 fn keyword_to_kind(keyword: &str) -> Option<SymbolKind> {
1698 filter::keyword_to_kind(keyword)
1699 }
1700
1701 fn get_all_language_files(&self, filter: &QueryFilter) -> Result<Vec<SearchResult>> {
1709 let content_path = self.cache.path().join("content.bin");
1714 let content_reader = ContentReader::open(&content_path)
1715 .context("Failed to open content store")?;
1716
1717 use globset::{Glob, GlobSetBuilder};
1719
1720 let include_matcher = if !filter.glob_patterns.is_empty() {
1721 let mut builder = GlobSetBuilder::new();
1722 for pattern in &filter.glob_patterns {
1723 let normalized = Self::normalize_glob_pattern(pattern);
1724 if let Ok(glob) = Glob::new(&normalized) {
1725 builder.add(glob);
1726 }
1727 }
1728 builder.build().ok()
1729 } else {
1730 None
1731 };
1732
1733 let exclude_matcher = if !filter.exclude_patterns.is_empty() {
1734 let mut builder = GlobSetBuilder::new();
1735 for pattern in &filter.exclude_patterns {
1736 let normalized = Self::normalize_glob_pattern(pattern);
1737 if let Ok(glob) = Glob::new(&normalized) {
1738 builder.add(glob);
1739 }
1740 }
1741 builder.build().ok()
1742 } else {
1743 None
1744 };
1745
1746 let mut candidates: Vec<SearchResult> = Vec::new();
1748
1749 for file_id in 0..content_reader.file_count() {
1750 let file_path = match content_reader.get_file_path(file_id as u32) {
1751 Some(p) => p,
1752 None => continue,
1753 };
1754
1755 let ext = file_path.extension()
1757 .and_then(|e| e.to_str())
1758 .unwrap_or("");
1759 let detected_lang = Language::from_extension(ext);
1760
1761 if let Some(lang) = filter.language {
1763 if detected_lang != lang {
1764 continue;
1765 }
1766 }
1767
1768 let file_path_str = file_path.to_string_lossy().to_string();
1769
1770 let included = include_matcher.as_ref().map_or(true, |m| m.is_match(&file_path_str));
1772 let excluded = exclude_matcher.as_ref().map_or(false, |m| m.is_match(&file_path_str));
1773
1774 if !included || excluded {
1775 continue;
1776 }
1777
1778 if let Some(ref file_pattern) = filter.file_pattern {
1780 if !file_path_str.contains(file_pattern) {
1781 continue;
1782 }
1783 }
1784
1785 candidates.push(SearchResult {
1788 path: file_path_str,
1789 lang: detected_lang,
1790 span: Span { start_line: 1, end_line: 1 },
1791 symbol: None,
1792 kind: SymbolKind::Unknown("keyword_query".to_string()),
1793 preview: String::new(),
1794 dependencies: None,
1795 });
1796 }
1797
1798 if let Some(lang) = filter.language {
1799 log::info!("Keyword query will scan {} {:?} files for symbol extraction", candidates.len(), lang);
1800 } else {
1801 log::info!("Keyword query will scan {} files (all languages) for symbol extraction", candidates.len());
1802 }
1803
1804 Ok(candidates)
1805 }
1806
1807 fn get_trigram_candidates(&self, pattern: &str, filter: &QueryFilter) -> Result<Vec<SearchResult>> {
1809 let content_path = self.cache.path().join("content.bin");
1811 let content_reader = ContentReader::open(&content_path)
1812 .context("Failed to open content store")?;
1813
1814 let trigrams_path = self.cache.path().join("trigrams.bin");
1816 let trigram_index = if trigrams_path.exists() {
1817 match TrigramIndex::load(&trigrams_path) {
1818 Ok(index) => {
1819 log::debug!("Loaded trigram index from disk: {} trigrams, {} files",
1820 index.trigram_count(), index.file_count());
1821 index
1822 }
1823 Err(e) => {
1824 log::warn!("Failed to load trigram index from disk: {}", e);
1825 log::warn!("Rebuilding trigram index from content store...");
1826 Self::rebuild_trigram_index(&content_reader)?
1827 }
1828 }
1829 } else {
1830 log::debug!("trigrams.bin not found, rebuilding from content store");
1831 Self::rebuild_trigram_index(&content_reader)?
1832 };
1833
1834 let candidates = trigram_index.search(pattern);
1836 log::debug!("Found {} candidate locations from trigram search", candidates.len());
1837
1838 let pattern_owned = pattern.to_string();
1840
1841 let compiled_regex = if filter.use_regex {
1843 match Regex::new(&pattern_owned) {
1844 Ok(re) => Some(re),
1845 Err(e) => {
1846 log::error!("Invalid regex pattern '{}': {}", pattern_owned, e);
1847 anyhow::bail!("Invalid regex pattern '{}': {}", pattern_owned, e);
1848 }
1849 }
1850 } else {
1851 None
1852 };
1853
1854 use std::collections::HashMap;
1856 let mut candidates_by_file: HashMap<u32, Vec<crate::trigram::FileLocation>> = HashMap::new();
1857 for loc in candidates {
1858 candidates_by_file
1859 .entry(loc.file_id)
1860 .or_insert_with(Vec::new)
1861 .push(loc);
1862 }
1863
1864 log::debug!("Scanning {} files with trigram matches", candidates_by_file.len());
1865
1866 use rayon::prelude::*;
1868
1869 let results: Vec<SearchResult> = candidates_by_file
1870 .par_iter()
1871 .flat_map(|(file_id, locations)| {
1872 let file_path = match trigram_index.get_file(*file_id) {
1874 Some(p) => p,
1875 None => return Vec::new(),
1876 };
1877
1878 let content = match content_reader.get_file_content(*file_id) {
1879 Ok(c) => c,
1880 Err(_) => return Vec::new(),
1881 };
1882
1883 let file_path_str = file_path.to_string_lossy().to_string();
1884
1885 let ext = file_path.extension()
1887 .and_then(|e| e.to_str())
1888 .unwrap_or("");
1889 let lang = Language::from_extension(ext);
1890
1891 let lines: Vec<&str> = content.lines().collect();
1893
1894 let mut seen_lines: std::collections::HashSet<usize> = std::collections::HashSet::new();
1896 let mut file_results = Vec::new();
1897
1898 for loc in locations {
1900 let line_no = loc.line_no as usize;
1901
1902 if seen_lines.contains(&line_no) {
1904 continue;
1905 }
1906
1907 if line_no == 0 || line_no > lines.len() {
1909 log::debug!("Line {} out of bounds (file has {} lines)", line_no, lines.len());
1910 continue;
1911 }
1912
1913 let line = lines[line_no - 1];
1914
1915 let line_matches = if filter.use_regex {
1920 compiled_regex.as_ref()
1923 .map(|re| re.is_match(line))
1924 .unwrap_or(false)
1925 } else if filter.use_contains {
1926 line.contains(&pattern_owned)
1928 } else {
1929 Self::has_word_boundary_match(line, &pattern_owned)
1931 };
1932
1933 if !line_matches {
1934 continue;
1935 }
1936
1937 seen_lines.insert(line_no);
1938
1939 file_results.push(SearchResult {
1941 path: file_path_str.clone(),
1942 lang: lang.clone(),
1943 kind: SymbolKind::Unknown("text_match".to_string()),
1944 symbol: None, span: Span {
1946 start_line: line_no,
1947 end_line: line_no,
1948 },
1949 preview: line.to_string(),
1950 dependencies: None,
1951 });
1952 }
1953
1954 file_results
1955 })
1956 .collect();
1957
1958 Ok(results)
1959 }
1960
1961 fn get_regex_candidates(&self, pattern: &str, timeout: Option<&std::time::Duration>, start_time: &std::time::Instant, suppress_output: bool) -> Result<Vec<SearchResult>> {
1985 let regex = Regex::new(pattern)
1987 .with_context(|| format!("Invalid regex pattern: {}", pattern))?;
1988
1989 if let Some(timeout_duration) = timeout {
1991 if start_time.elapsed() > *timeout_duration {
1992 anyhow::bail!(
1993 "Query timeout exceeded ({} seconds) during regex compilation",
1994 timeout_duration.as_secs()
1995 );
1996 }
1997 }
1998
1999 let trigrams = extract_trigrams_from_regex(pattern);
2001
2002 let content_path = self.cache.path().join("content.bin");
2004 let content_reader = ContentReader::open(&content_path)
2005 .context("Failed to open content store")?;
2006
2007 let mut results = Vec::new();
2008
2009 if trigrams.is_empty() {
2010 if !suppress_output {
2012 output::warn(&format!(
2013 "Regex pattern '{}' has no literals (≥3 chars), falling back to full content scan. This may be slow on large codebases. Consider using patterns with literal text.",
2014 pattern
2015 ));
2016 }
2017
2018 for file_id in 0..content_reader.file_count() {
2020 let file_path = content_reader.get_file_path(file_id as u32)
2021 .context("Invalid file_id")?;
2022 let content = content_reader.get_file_content(file_id as u32)?;
2023
2024 self.find_regex_matches_in_file(
2025 ®ex,
2026 file_path,
2027 content,
2028 &mut results,
2029 )?;
2030 }
2031 } else {
2032 log::debug!("Using {} trigrams to narrow regex search candidates", trigrams.len());
2034
2035 let trigrams_path = self.cache.path().join("trigrams.bin");
2037 let trigram_index = if trigrams_path.exists() {
2038 TrigramIndex::load(&trigrams_path)?
2039 } else {
2040 Self::rebuild_trigram_index(&content_reader)?
2041 };
2042
2043 use crate::regex_trigrams::extract_literal_sequences;
2045 let literals = extract_literal_sequences(pattern);
2046
2047 if literals.is_empty() {
2048 log::warn!("Regex extraction found trigrams but no literal sequences - this shouldn't happen");
2049 for file_id in 0..content_reader.file_count() {
2051 let file_path = content_reader.get_file_path(file_id as u32)
2052 .context("Invalid file_id")?;
2053 let content = content_reader.get_file_content(file_id as u32)?;
2054 self.find_regex_matches_in_file(®ex, file_path, content, &mut results)?;
2055 }
2056 } else {
2057 use std::collections::HashSet;
2062 let mut candidate_files: HashSet<u32> = HashSet::new();
2063
2064 for literal in &literals {
2065 let candidates = trigram_index.search(literal);
2067 let file_ids: HashSet<u32> = candidates.iter().map(|loc| loc.file_id).collect();
2068
2069 log::debug!("Literal '{}' found in {} files", literal, file_ids.len());
2070
2071 candidate_files.extend(file_ids);
2074 }
2075
2076 let final_candidates = candidate_files;
2077 log::debug!("After union: searching {} files that contain any literal", final_candidates.len());
2078
2079 for &file_id in &final_candidates {
2081 let file_path = trigram_index.get_file(file_id)
2082 .context("Invalid file_id from trigram search")?;
2083 let content = content_reader.get_file_content(file_id)?;
2084
2085 self.find_regex_matches_in_file(
2086 ®ex,
2087 file_path,
2088 content,
2089 &mut results,
2090 )?;
2091 }
2092 }
2093 }
2094
2095 log::info!("Regex search found {} matches for pattern '{}'", results.len(), pattern);
2096 Ok(results)
2097 }
2098
2099 fn find_regex_matches_in_file(
2101 &self,
2102 regex: &Regex,
2103 file_path: &std::path::Path,
2104 content: &str,
2105 results: &mut Vec<SearchResult>,
2106 ) -> Result<()> {
2107 let file_path_str = file_path.to_string_lossy().to_string();
2108
2109 let ext = file_path.extension()
2111 .and_then(|e| e.to_str())
2112 .unwrap_or("");
2113 let lang = Language::from_extension(ext);
2114
2115 for (line_idx, line) in content.lines().enumerate() {
2117 if regex.is_match(line) {
2118 let line_no = line_idx + 1;
2119
2120 results.push(SearchResult {
2127 path: file_path_str.clone(),
2128 lang: lang.clone(),
2129 kind: SymbolKind::Unknown("regex_match".to_string()),
2130 symbol: None, span: Span {
2132 start_line: line_no,
2133 end_line: line_no,
2134 },
2135 preview: line.to_string(),
2136 dependencies: None,
2137 });
2138 }
2139 }
2140
2141 Ok(())
2142 }
2143
2144 fn find_file_id(content_reader: &ContentReader, target_path: &str) -> Option<u32> {
2145 result::find_file_id(content_reader, target_path)
2146 }
2147
2148 fn rebuild_trigram_index(content_reader: &ContentReader) -> Result<TrigramIndex> {
2149 result::rebuild_trigram_index(content_reader)
2150 }
2151
2152 fn normalize_glob_pattern(pattern: &str) -> String {
2153 result::normalize_glob_pattern(pattern)
2154 }
2155
2156 fn has_word_boundary_match(line: &str, pattern: &str) -> bool {
2157 filter::has_word_boundary_match(line, pattern)
2158 }
2159
2160 fn get_index_status(&self) -> Result<(IndexStatus, bool, Option<IndexWarning>)> {
2165 let root = self.cache.workspace_root();
2166
2167 if crate::git::is_git_repo(&root) {
2169 if let Ok(current_branch) = crate::git::get_current_branch(&root) {
2170 if !self.cache.branch_exists(¤t_branch).unwrap_or(false) {
2172 let warning = IndexWarning {
2173 reason: format!("Branch '{}' has not been indexed", current_branch),
2174 action_required: "rfx index".to_string(),
2175 details: Some(IndexWarningDetails {
2176 current_branch: Some(current_branch),
2177 indexed_branch: None,
2178 current_commit: None,
2179 indexed_commit: None,
2180 }),
2181 };
2182 return Ok((IndexStatus::Stale, false, Some(warning)));
2183 }
2184
2185 if let (Ok(current_commit), Ok(branch_info)) =
2187 (crate::git::get_current_commit(&root), self.cache.get_branch_info(¤t_branch)) {
2188
2189 if branch_info.commit_sha != current_commit {
2190 let warning = IndexWarning {
2191 reason: format!(
2192 "Commit changed from {} to {}",
2193 &branch_info.commit_sha[..7],
2194 ¤t_commit[..7]
2195 ),
2196 action_required: "rfx index".to_string(),
2197 details: Some(IndexWarningDetails {
2198 current_branch: Some(current_branch.clone()),
2199 indexed_branch: Some(current_branch.clone()),
2200 current_commit: Some(current_commit.clone()),
2201 indexed_commit: Some(branch_info.commit_sha.clone()),
2202 }),
2203 };
2204 return Ok((IndexStatus::Stale, false, Some(warning)));
2205 }
2206
2207 if let Ok(branch_files) = self.cache.get_branch_files(¤t_branch) {
2209 let mut checked = 0;
2210 let mut changed = 0;
2211 const SAMPLE_SIZE: usize = 10;
2212
2213 for (path, _indexed_hash) in branch_files.iter().take(SAMPLE_SIZE) {
2214 checked += 1;
2215 let file_path = std::path::Path::new(path);
2216
2217 if let Ok(metadata) = std::fs::metadata(file_path) {
2218 if let Ok(modified) = metadata.modified() {
2219 let indexed_time = branch_info.last_indexed;
2220 let file_time = modified.duration_since(std::time::UNIX_EPOCH)
2221 .unwrap_or_default()
2222 .as_secs() as i64;
2223
2224 if file_time > indexed_time {
2225 changed += 1;
2228 }
2229 }
2230 }
2231 }
2232
2233 if changed > 0 {
2234 let warning = IndexWarning {
2235 reason: format!("{} of {} sampled files modified", changed, checked),
2236 action_required: "rfx index".to_string(),
2237 details: Some(IndexWarningDetails {
2238 current_branch: Some(current_branch.clone()),
2239 indexed_branch: Some(branch_info.branch.clone()),
2240 current_commit: Some(current_commit.clone()),
2241 indexed_commit: Some(branch_info.commit_sha.clone()),
2242 }),
2243 };
2244 return Ok((IndexStatus::Stale, false, Some(warning)));
2245 }
2246 }
2247
2248 return Ok((IndexStatus::Fresh, true, None));
2250 }
2251 }
2252 }
2253
2254 Ok((IndexStatus::Fresh, true, None))
2256 }
2257
2258 fn check_index_freshness(&self, filter: &QueryFilter) -> Result<()> {
2265 let root = self.cache.workspace_root();
2266
2267 if crate::git::is_git_repo(&root) {
2269 if !crate::git::is_git_available() {
2270 static WARNED: std::sync::OnceLock<()> = std::sync::OnceLock::new();
2271 if !filter.suppress_output {
2272 WARNED.get_or_init(|| {
2273 output::warn("⚠️ git binary not found in PATH; index freshness checks disabled for this session.");
2274 });
2275 }
2276 return Ok(());
2277 }
2278 if let Ok(current_branch) = crate::git::get_current_branch(&root) {
2279 if !self.cache.branch_exists(¤t_branch).unwrap_or(false) {
2281 if !filter.suppress_output {
2282 output::warn(&format!("⚠️ WARNING: Index not found for branch '{}'. Run 'rfx index' to index this branch.", current_branch));
2283 }
2284 return Ok(());
2285 }
2286
2287 if let (Ok(current_commit), Ok(branch_info)) =
2289 (crate::git::get_current_commit(&root), self.cache.get_branch_info(¤t_branch)) {
2290
2291 if branch_info.commit_sha != current_commit {
2292 if !filter.suppress_output {
2293 output::warn(&format!("⚠️ WARNING: Index may be stale (commit changed: {} → {}). Consider running 'rfx index'.",
2294 &branch_info.commit_sha[..7], ¤t_commit[..7]));
2295 }
2296 return Ok(());
2297 }
2298
2299 if let Ok(branch_files) = self.cache.get_branch_files(¤t_branch) {
2302 let mut checked = 0;
2303 let mut changed = 0;
2304 const SAMPLE_SIZE: usize = 10;
2305
2306 for (path, _indexed_hash) in branch_files.iter().take(SAMPLE_SIZE) {
2307 checked += 1;
2308 let file_path = std::path::Path::new(path);
2309
2310 if let Ok(metadata) = std::fs::metadata(file_path) {
2312 if let Ok(modified) = metadata.modified() {
2313 let indexed_time = branch_info.last_indexed;
2314 let file_time = modified.duration_since(std::time::UNIX_EPOCH)
2315 .unwrap_or_default()
2316 .as_secs() as i64;
2317
2318 if file_time > indexed_time {
2320 changed += 1;
2325 }
2326 }
2327 }
2328 }
2329
2330 if changed > 0 && !filter.suppress_output {
2331 output::warn(&format!("⚠️ WARNING: {} of {} sampled files changed since indexing. Consider running 'rfx index'.", changed, checked));
2332 }
2333 }
2334 }
2335 }
2336 }
2337
2338 Ok(())
2339 }
2340}
2341
2342pub fn generate_ai_instruction(
2347 result_count: usize,
2348 total_count: usize,
2349 has_more: bool,
2350 symbols_mode: bool,
2351 paths_only: bool,
2352 use_ast: bool,
2353 use_regex: bool,
2354 language_filter: bool,
2355 glob_filter: bool,
2356 exact_mode: bool,
2357) -> Option<String> {
2358 if result_count == 0 {
2360 return Some(
2361 "No results found. Consider these alternatives: 1) Check pattern spelling, 2) Remove --kind or --lang filters to broaden search, 3) Try partial match or related term, 4) Use search_regex tool for pattern matching with special characters or complex patterns."
2362 .to_string()
2363 );
2364 }
2365
2366 if total_count >= 500 {
2368 return Some(
2369 format!("Query too broad: {} results found. STOP. Do not list results. Refine search automatically by adding filters: kind parameter (Function/Struct/Class), lang parameter (rust/python/etc), or glob parameter (['src/**/*.rs']). Call search_code again with appropriate filters.", total_count)
2370 );
2371 }
2372
2373 if has_more {
2375 return Some(
2376 format!("Showing {} of {} results. PAGINATED - there are more results available. Do not automatically fetch all results. Show current page, ask user if these results answer their question before fetching more with --offset parameter.", result_count, total_count)
2377 );
2378 }
2379
2380 if result_count == 1 && symbols_mode {
2382 return Some(
2383 "Found 1 precise result. Respond concisely: '[symbol] at [path]:[line]'.".to_string()
2384 );
2385 }
2386
2387 if result_count >= 2 && result_count <= 10 && symbols_mode {
2389 return Some(
2390 format!("Found {} precise results (definitions only, not usages). List locations concisely: '[symbol] at [path]:[line]' for each result.", result_count)
2391 );
2392 }
2393
2394 if total_count >= 101 && total_count < 500 {
2396 return Some(
2397 format!("Found {} results - this is broad. Suggest refining search with: kind parameter (Function/Struct/Class/etc), lang parameter (rust/python/etc), or glob parameter to narrow file scope.", total_count)
2398 );
2399 }
2400
2401 if result_count >= 100 && !symbols_mode {
2403 return Some(
2404 format!("Found {} results in full-text search mode (includes definitions AND all usages). Consider using symbols=true parameter to filter to definitions only. This typically reduces results by 80-90%.", result_count)
2405 );
2406 }
2407
2408 if paths_only {
2410 return Some(
2411 format!("Found {} unique files (paths-only mode - no code content included). Next step: Use Read tool on specific files that look relevant based on their paths.", result_count)
2412 );
2413 }
2414
2415 if use_ast {
2417 return Some(
2418 format!("Found {} results using AST pattern matching. These are structure-based matches using Tree-sitter patterns, not text search.", result_count)
2419 );
2420 }
2421
2422 if use_regex && result_count >= 100 {
2424 return Some(
2425 format!("Found {} results using regex pattern matching. Regex matches are expansive. Consider using exact text search or symbols mode for more precise results.", result_count)
2426 );
2427 }
2428
2429 if language_filter && result_count <= 5 {
2431 return Some(
2432 format!("Found {} results with language filter active. Results are limited to this language only. Remove lang parameter if you want to search all languages.", result_count)
2433 );
2434 }
2435
2436 if glob_filter && result_count <= 10 {
2438 return Some(
2439 format!("Found {} results with glob filter active. Results are limited to matching paths. Remove glob parameter to search entire codebase.", result_count)
2440 );
2441 }
2442
2443 if exact_mode && result_count <= 5 {
2445 return Some(
2446 format!("Found {} results in exact match mode. Only exact symbol name matches are included. Remove exact parameter to allow substring matching.", result_count)
2447 );
2448 }
2449
2450 None
2452}
2453
2454#[cfg(test)]
2455mod tests {
2456 use super::*;
2457 use crate::indexer::Indexer;
2458 use crate::models::IndexConfig;
2459 use std::fs;
2460 use tempfile::TempDir;
2461
2462 #[test]
2465 fn test_query_engine_creation() {
2466 let temp = TempDir::new().unwrap();
2467 let cache = CacheManager::new(temp.path());
2468 let engine = QueryEngine::new(cache);
2469
2470 assert!(engine.cache.path().ends_with(".reflex"));
2471 }
2472
2473 #[test]
2474 fn test_filter_modes() {
2475 let filter_fulltext = QueryFilter::default();
2477 assert!(!filter_fulltext.symbols_mode);
2478
2479 let filter_symbols = QueryFilter {
2480 symbols_mode: true,
2481 ..Default::default()
2482 };
2483 assert!(filter_symbols.symbols_mode);
2484
2485 let filter_with_kind = QueryFilter {
2487 kind: Some(SymbolKind::Function),
2488 symbols_mode: true,
2489 ..Default::default()
2490 };
2491 assert!(filter_with_kind.symbols_mode);
2492 }
2493
2494 #[test]
2497 fn test_fulltext_search() {
2498 let temp = TempDir::new().unwrap();
2499 let project = temp.path().join("project");
2500 fs::create_dir(&project).unwrap();
2501
2502 fs::write(project.join("main.rs"), "fn main() {\n println!(\"hello\");\n}").unwrap();
2504 fs::write(project.join("lib.rs"), "pub fn hello() {}").unwrap();
2505
2506 let cache = CacheManager::new(&project);
2508 let indexer = Indexer::new(cache, IndexConfig::default());
2509 indexer.index(&project, false).unwrap();
2510
2511 let cache = CacheManager::new(&project);
2513 let engine = QueryEngine::new(cache);
2514 let filter = QueryFilter::default(); let results = engine.search("hello", filter).unwrap();
2516
2517 assert!(results.len() >= 2);
2519 assert!(results.iter().any(|r| r.path.contains("main.rs")));
2520 assert!(results.iter().any(|r| r.path.contains("lib.rs")));
2521 }
2522
2523 #[test]
2524 fn test_symbol_search() {
2525 let temp = TempDir::new().unwrap();
2526 let project = temp.path().join("project");
2527 fs::create_dir(&project).unwrap();
2528
2529 fs::write(
2531 project.join("main.rs"),
2532 "fn greet() {}\nfn main() {\n greet();\n}"
2533 ).unwrap();
2534
2535 let cache = CacheManager::new(&project);
2537 let indexer = Indexer::new(cache, IndexConfig::default());
2538 indexer.index(&project, false).unwrap();
2539
2540 let cache = CacheManager::new(&project);
2541
2542 let engine = QueryEngine::new(cache);
2544 let filter = QueryFilter {
2545 symbols_mode: true,
2546 ..Default::default()
2547 };
2548 let results = engine.search("greet", filter).unwrap();
2549
2550 assert!(results.len() >= 1);
2552 assert!(results.iter().any(|r| r.kind == SymbolKind::Function));
2553 }
2554
2555 #[test]
2556 fn test_regex_search() {
2557 let temp = TempDir::new().unwrap();
2558 let project = temp.path().join("project");
2559 fs::create_dir(&project).unwrap();
2560
2561 fs::write(
2562 project.join("main.rs"),
2563 "fn test1() {}\nfn test2() {}\nfn other() {}"
2564 ).unwrap();
2565
2566 let cache = CacheManager::new(&project);
2567 let indexer = Indexer::new(cache, IndexConfig::default());
2568 indexer.index(&project, false).unwrap();
2569
2570 let cache = CacheManager::new(&project);
2571
2572 let engine = QueryEngine::new(cache);
2573 let filter = QueryFilter {
2574 use_regex: true,
2575 ..Default::default()
2576 };
2577 let results = engine.search(r"fn test\d", filter).unwrap();
2578
2579 assert_eq!(results.len(), 2);
2581 assert!(results.iter().all(|r| r.preview.contains("test")));
2582 }
2583
2584 #[test]
2587 fn test_language_filter() {
2588 let temp = TempDir::new().unwrap();
2589 let project = temp.path().join("project");
2590 fs::create_dir(&project).unwrap();
2591
2592 fs::write(project.join("main.rs"), "fn main() {}").unwrap();
2593 fs::write(project.join("main.js"), "function main() {}").unwrap();
2594
2595 let cache = CacheManager::new(&project);
2596 let indexer = Indexer::new(cache, IndexConfig::default());
2597 indexer.index(&project, false).unwrap();
2598
2599 let cache = CacheManager::new(&project);
2600
2601 let engine = QueryEngine::new(cache);
2602
2603 let filter = QueryFilter {
2605 language: Some(Language::Rust),
2606 ..Default::default()
2607 };
2608 let results = engine.search("main", filter).unwrap();
2609
2610 assert!(results.iter().all(|r| r.lang == Language::Rust));
2611 assert!(results.iter().all(|r| r.path.ends_with(".rs")));
2612 }
2613
2614 #[test]
2615 fn test_kind_filter() {
2616 let temp = TempDir::new().unwrap();
2617 let project = temp.path().join("project");
2618 fs::create_dir(&project).unwrap();
2619
2620 fs::write(
2621 project.join("main.rs"),
2622 "struct Point {}\nfn main() {}\nimpl Point { fn new() {} }"
2623 ).unwrap();
2624
2625 let cache = CacheManager::new(&project);
2626 let indexer = Indexer::new(cache, IndexConfig::default());
2627 indexer.index(&project, false).unwrap();
2628
2629 let cache = CacheManager::new(&project);
2630
2631 let engine = QueryEngine::new(cache);
2632
2633 let filter = QueryFilter {
2635 symbols_mode: true,
2636 kind: Some(SymbolKind::Function),
2637 use_contains: true, ..Default::default()
2639 };
2640 let results = engine.search("mai", filter).unwrap();
2642
2643 assert!(results.len() > 0, "Should find at least one result");
2645 assert!(results.iter().any(|r| r.symbol.as_deref() == Some("main")), "Should find 'main' function");
2646 }
2647
2648 #[test]
2649 fn test_file_pattern_filter() {
2650 let temp = TempDir::new().unwrap();
2651 let project = temp.path().join("project");
2652 fs::create_dir_all(project.join("src")).unwrap();
2653 fs::create_dir_all(project.join("tests")).unwrap();
2654
2655 fs::write(project.join("src/lib.rs"), "fn foo() {}").unwrap();
2656 fs::write(project.join("tests/test.rs"), "fn foo() {}").unwrap();
2657
2658 let cache = CacheManager::new(&project);
2659 let indexer = Indexer::new(cache, IndexConfig::default());
2660 indexer.index(&project, false).unwrap();
2661
2662 let cache = CacheManager::new(&project);
2663
2664 let engine = QueryEngine::new(cache);
2665
2666 let filter = QueryFilter {
2668 file_pattern: Some("src/".to_string()),
2669 ..Default::default()
2670 };
2671 let results = engine.search("foo", filter).unwrap();
2672
2673 assert!(results.iter().all(|r| r.path.contains("src/")));
2674 assert!(!results.iter().any(|r| r.path.contains("tests/")));
2675 }
2676
2677 #[test]
2678 fn test_limit_filter() {
2679 let temp = TempDir::new().unwrap();
2680 let project = temp.path().join("project");
2681 fs::create_dir(&project).unwrap();
2682
2683 let content = (0..20).map(|i| format!("fn test{}() {{}}", i)).collect::<Vec<_>>().join("\n");
2685 fs::write(project.join("main.rs"), content).unwrap();
2686
2687 let cache = CacheManager::new(&project);
2688 let indexer = Indexer::new(cache, IndexConfig::default());
2689 indexer.index(&project, false).unwrap();
2690
2691 let cache = CacheManager::new(&project);
2692
2693 let engine = QueryEngine::new(cache);
2694
2695 let filter = QueryFilter {
2697 limit: Some(5),
2698 use_contains: true, ..Default::default()
2700 };
2701 let results = engine.search("test", filter).unwrap();
2702
2703 assert_eq!(results.len(), 5);
2704 }
2705
2706 #[test]
2707 fn test_exact_match_filter() {
2708 let temp = TempDir::new().unwrap();
2709 let project = temp.path().join("project");
2710 fs::create_dir(&project).unwrap();
2711
2712 fs::write(
2713 project.join("main.rs"),
2714 "fn test() {}\nfn test_helper() {}\nfn other_test() {}"
2715 ).unwrap();
2716
2717 let cache = CacheManager::new(&project);
2718 let indexer = Indexer::new(cache, IndexConfig::default());
2719 indexer.index(&project, false).unwrap();
2720
2721 let cache = CacheManager::new(&project);
2722
2723 let engine = QueryEngine::new(cache);
2724
2725 let filter = QueryFilter {
2727 symbols_mode: true,
2728 exact: true,
2729 ..Default::default()
2730 };
2731 let results = engine.search("test", filter).unwrap();
2732
2733 assert_eq!(results.len(), 1);
2735 assert_eq!(results[0].symbol.as_deref(), Some("test"));
2736 }
2737
2738 #[test]
2741 fn test_expand_mode() {
2742 let temp = TempDir::new().unwrap();
2743 let project = temp.path().join("project");
2744 fs::create_dir(&project).unwrap();
2745
2746 fs::write(
2747 project.join("main.rs"),
2748 "fn greet() {\n println!(\"Hello\");\n println!(\"World\");\n}"
2749 ).unwrap();
2750
2751 let cache = CacheManager::new(&project);
2752 let indexer = Indexer::new(cache, IndexConfig::default());
2753 indexer.index(&project, false).unwrap();
2754
2755 let cache = CacheManager::new(&project);
2756
2757 let engine = QueryEngine::new(cache);
2758
2759 let filter = QueryFilter {
2761 symbols_mode: true,
2762 expand: true,
2763 ..Default::default()
2764 };
2765 let results = engine.search("greet", filter).unwrap();
2766
2767 assert!(results.len() >= 1);
2769 let result = &results[0];
2770 assert!(result.preview.contains("println"));
2771 }
2772
2773 #[test]
2776 fn test_search_empty_index() {
2777 let temp = TempDir::new().unwrap();
2778 let project = temp.path().join("project");
2779 fs::create_dir(&project).unwrap();
2780
2781 let cache = CacheManager::new(&project);
2782 let indexer = Indexer::new(cache, IndexConfig::default());
2783 indexer.index(&project, false).unwrap();
2784
2785 let cache = CacheManager::new(&project);
2786
2787 let engine = QueryEngine::new(cache);
2788 let filter = QueryFilter::default();
2789 let results = engine.search("nonexistent", filter).unwrap();
2790
2791 assert_eq!(results.len(), 0);
2792 }
2793
2794 #[test]
2795 fn test_search_no_index() {
2796 let temp = TempDir::new().unwrap();
2797 let project = temp.path().join("project");
2798 fs::create_dir(&project).unwrap();
2799
2800 let cache = CacheManager::new(&project);
2801 let engine = QueryEngine::new(cache);
2802 let filter = QueryFilter::default();
2803
2804 assert!(engine.search("test", filter).is_err());
2806 }
2807
2808 #[test]
2809 fn test_search_special_characters() {
2810 let temp = TempDir::new().unwrap();
2811 let project = temp.path().join("project");
2812 fs::create_dir(&project).unwrap();
2813
2814 fs::write(project.join("main.rs"), "let x = 42;\nlet y = x + 1;").unwrap();
2815
2816 let cache = CacheManager::new(&project);
2817 let indexer = Indexer::new(cache, IndexConfig::default());
2818 indexer.index(&project, false).unwrap();
2819
2820 let cache = CacheManager::new(&project);
2821
2822 let engine = QueryEngine::new(cache);
2823 let filter = QueryFilter::default();
2824
2825 let results = engine.search("x + ", filter).unwrap();
2827 assert!(results.len() >= 1);
2828 }
2829
2830 #[test]
2831 fn test_search_unicode() {
2832 let temp = TempDir::new().unwrap();
2833 let project = temp.path().join("project");
2834 fs::create_dir(&project).unwrap();
2835
2836 fs::write(project.join("main.rs"), "// 你好世界\nfn main() {}").unwrap();
2837
2838 let cache = CacheManager::new(&project);
2839 let indexer = Indexer::new(cache, IndexConfig::default());
2840 indexer.index(&project, false).unwrap();
2841
2842 let cache = CacheManager::new(&project);
2843
2844 let engine = QueryEngine::new(cache);
2845 let filter = QueryFilter {
2846 use_contains: true, force: true, ..Default::default()
2849 };
2850
2851 let results = engine.search("你好", filter).unwrap();
2853 assert!(results.len() >= 1);
2854 }
2855
2856 #[test]
2857 fn test_case_sensitive_search() {
2858 let temp = TempDir::new().unwrap();
2859 let project = temp.path().join("project");
2860 fs::create_dir(&project).unwrap();
2861
2862 fs::write(project.join("main.rs"), "fn Test() {}\nfn test() {}").unwrap();
2863
2864 let cache = CacheManager::new(&project);
2865 let indexer = Indexer::new(cache, IndexConfig::default());
2866 indexer.index(&project, false).unwrap();
2867
2868 let cache = CacheManager::new(&project);
2869
2870 let engine = QueryEngine::new(cache);
2871 let filter = QueryFilter::default();
2872
2873 let results = engine.search("Test", filter).unwrap();
2875 assert!(results.iter().any(|r| r.preview.contains("Test()")));
2876 }
2877
2878 #[test]
2881 fn test_results_sorted_deterministically() {
2882 let temp = TempDir::new().unwrap();
2883 let project = temp.path().join("project");
2884 fs::create_dir(&project).unwrap();
2885
2886 fs::write(project.join("a.rs"), "fn test() {}").unwrap();
2887 fs::write(project.join("z.rs"), "fn test() {}").unwrap();
2888 fs::write(project.join("m.rs"), "fn test() {}\nfn test2() {}").unwrap();
2889
2890 let cache = CacheManager::new(&project);
2891 let indexer = Indexer::new(cache, IndexConfig::default());
2892 indexer.index(&project, false).unwrap();
2893
2894 let cache = CacheManager::new(&project);
2895
2896 let engine = QueryEngine::new(cache);
2897 let filter = QueryFilter::default();
2898
2899 let results1 = engine.search("test", filter.clone()).unwrap();
2901 let results2 = engine.search("test", filter.clone()).unwrap();
2902 let results3 = engine.search("test", filter).unwrap();
2903
2904 assert_eq!(results1.len(), results2.len());
2906 assert_eq!(results1.len(), results3.len());
2907
2908 for i in 0..results1.len() {
2909 assert_eq!(results1[i].path, results2[i].path);
2910 assert_eq!(results1[i].path, results3[i].path);
2911 assert_eq!(results1[i].span.start_line, results2[i].span.start_line);
2912 assert_eq!(results1[i].span.start_line, results3[i].span.start_line);
2913 }
2914
2915 for i in 0..results1.len().saturating_sub(1) {
2917 let curr = &results1[i];
2918 let next = &results1[i + 1];
2919 assert!(
2920 curr.path < next.path ||
2921 (curr.path == next.path && curr.span.start_line <= next.span.start_line)
2922 );
2923 }
2924 }
2925
2926 #[test]
2929 fn test_multiple_filters_combined() {
2930 let temp = TempDir::new().unwrap();
2931 let project = temp.path().join("project");
2932 fs::create_dir_all(project.join("src")).unwrap();
2933
2934 fs::write(project.join("src/main.rs"), "fn test() {}\nstruct Test {}").unwrap();
2935 fs::write(project.join("src/lib.rs"), "fn test() {}").unwrap();
2936 fs::write(project.join("test.js"), "function test() {}").unwrap();
2937
2938 let cache = CacheManager::new(&project);
2939 let indexer = Indexer::new(cache, IndexConfig::default());
2940 indexer.index(&project, false).unwrap();
2941
2942 let cache = CacheManager::new(&project);
2943
2944 let engine = QueryEngine::new(cache);
2945
2946 let filter = QueryFilter {
2948 language: Some(Language::Rust),
2949 kind: Some(SymbolKind::Function),
2950 file_pattern: Some("src/main".to_string()),
2951 symbols_mode: true,
2952 ..Default::default()
2953 };
2954 let results = engine.search("test", filter).unwrap();
2955
2956 assert_eq!(results.len(), 1);
2958 assert!(results[0].path.contains("src/main.rs"));
2959 assert_eq!(results[0].kind, SymbolKind::Function);
2960 }
2961
2962 #[test]
2965 fn test_find_symbol_helper() {
2966 let temp = TempDir::new().unwrap();
2967 let project = temp.path().join("project");
2968 fs::create_dir(&project).unwrap();
2969
2970 fs::write(project.join("main.rs"), "fn greet() {}").unwrap();
2971
2972 let cache = CacheManager::new(&project);
2973 let indexer = Indexer::new(cache, IndexConfig::default());
2974 indexer.index(&project, false).unwrap();
2975
2976 let cache = CacheManager::new(&project);
2977
2978 let engine = QueryEngine::new(cache);
2979 let results = engine.find_symbol("greet").unwrap();
2980
2981 assert!(results.len() >= 1);
2982 assert_eq!(results[0].kind, SymbolKind::Function);
2983 }
2984
2985 #[test]
2986 fn test_list_by_kind_helper() {
2987 let temp = TempDir::new().unwrap();
2988 let project = temp.path().join("project");
2989 fs::create_dir(&project).unwrap();
2990
2991 fs::write(
2992 project.join("main.rs"),
2993 "struct Point {}\nfn test() {}\nstruct Line {}"
2994 ).unwrap();
2995
2996 let cache = CacheManager::new(&project);
2997 let indexer = Indexer::new(cache, IndexConfig::default());
2998 indexer.index(&project, false).unwrap();
2999
3000 let cache = CacheManager::new(&project);
3001
3002 let engine = QueryEngine::new(cache);
3003
3004 let filter = QueryFilter {
3006 kind: Some(SymbolKind::Struct),
3007 symbols_mode: true,
3008 use_contains: true, ..Default::default()
3010 };
3011 let results = engine.search("oin", filter).unwrap();
3012
3013 assert!(results.len() >= 1, "Should find at least Point struct");
3015 assert!(results.iter().all(|r| r.kind == SymbolKind::Struct));
3016 assert!(results.iter().any(|r| r.symbol.as_deref() == Some("Point")));
3017 }
3018
3019 #[test]
3022 fn test_search_with_metadata() {
3023 let temp = TempDir::new().unwrap();
3024 let project = temp.path().join("project");
3025 fs::create_dir(&project).unwrap();
3026
3027 fs::write(project.join("main.rs"), "fn test() {}").unwrap();
3028
3029 let cache = CacheManager::new(&project);
3030 let indexer = Indexer::new(cache, IndexConfig::default());
3031 indexer.index(&project, false).unwrap();
3032
3033 let cache = CacheManager::new(&project);
3034
3035 let engine = QueryEngine::new(cache);
3036 let filter = QueryFilter::default();
3037 let response = engine.search_with_metadata("test", filter).unwrap();
3038
3039 assert!(response.results.len() >= 1);
3041 }
3043
3044 #[test]
3047 fn test_search_across_languages() {
3048 let temp = TempDir::new().unwrap();
3049 let project = temp.path().join("project");
3050 fs::create_dir(&project).unwrap();
3051
3052 fs::write(project.join("main.rs"), "fn greet() {}").unwrap();
3053 fs::write(project.join("main.ts"), "function greet() {}").unwrap();
3054 fs::write(project.join("main.py"), "def greet(): pass").unwrap();
3055
3056 let cache = CacheManager::new(&project);
3057 let indexer = Indexer::new(cache, IndexConfig::default());
3058 indexer.index(&project, false).unwrap();
3059
3060 let cache = CacheManager::new(&project);
3061
3062 let engine = QueryEngine::new(cache);
3063 let filter = QueryFilter::default();
3064 let results = engine.search("greet", filter).unwrap();
3065
3066 assert!(results.len() >= 3);
3068 assert!(results.iter().any(|r| r.lang == Language::Rust));
3069 assert!(results.iter().any(|r| r.lang == Language::TypeScript));
3070 assert!(results.iter().any(|r| r.lang == Language::Python));
3071 }
3072}