1use anyhow::{Context, Result};
7use regex::Regex;
8
9use crate::cache::CacheManager;
10use crate::content_store::ContentReader;
11use crate::models::{
12 IndexStatus, IndexWarning, IndexWarningDetails, Language, QueryResponse, SearchResult, Span,
13 SymbolKind,
14};
15use crate::output;
16use crate::parsers::ParserFactory;
17use crate::regex_trigrams::extract_trigrams_from_regex;
18use crate::trigram::TrigramIndex;
19
20#[derive(Debug, Clone)]
22pub struct QueryFilter {
23 pub language: Option<Language>,
25 pub kind: Option<SymbolKind>,
27 pub use_ast: bool,
29 pub use_regex: bool,
31 pub limit: Option<usize>,
33 pub symbols_mode: bool,
35 pub expand: bool,
37 pub file_pattern: Option<String>,
39 pub exact: bool,
41 pub use_contains: bool,
43 pub timeout_secs: u64,
45 pub glob_patterns: Vec<String>,
47 pub exclude_patterns: Vec<String>,
49 pub paths_only: bool,
51 pub offset: Option<usize>,
53 pub force: bool,
55 pub suppress_output: bool,
57 pub include_dependencies: bool,
59 #[doc(hidden)]
61 pub test_large_index_threshold: Option<usize>,
62 #[doc(hidden)]
64 pub test_short_pattern_threshold: Option<usize>,
65}
66
67impl Default for QueryFilter {
68 fn default() -> Self {
69 Self {
70 language: None,
71 kind: None,
72 use_ast: false,
73 use_regex: false,
74 limit: Some(100), symbols_mode: false,
76 expand: false,
77 file_pattern: None,
78 exact: false,
79 use_contains: false, timeout_secs: 30, glob_patterns: Vec::new(),
82 exclude_patterns: Vec::new(),
83 paths_only: false,
84 offset: None,
85 force: false, suppress_output: false, include_dependencies: false, test_large_index_threshold: None, test_short_pattern_threshold: None, }
91 }
92}
93
94pub struct QueryEngine {
96 cache: CacheManager,
97}
98
99impl QueryEngine {
100 pub fn new(cache: CacheManager) -> Self {
102 Self { cache }
103 }
104
105 fn load_dependencies(&self, results: &mut [SearchResult], include_deps: bool) -> Result<()> {
108 if !include_deps || results.is_empty() {
109 return Ok(());
110 }
111
112 log::debug!("Loading dependencies for {} results", results.len());
113
114 let workspace_root = self.cache.path().parent()
118 .ok_or_else(|| anyhow::anyhow!("Cache path has no parent"))?;
119 let cache_for_deps = CacheManager::new(workspace_root);
120 let dep_index = crate::dependency::DependencyIndex::new(cache_for_deps);
121
122 for result in results {
124 let normalized_path = result.path.strip_prefix("./").unwrap_or(&result.path);
126
127 match self.cache.get_file_id(normalized_path) {
129 Ok(Some(file_id)) => {
130 log::debug!("Found file_id={} for path={}", file_id, result.path);
131 match dep_index.get_dependencies_info(file_id) {
133 Ok(dep_infos) => {
134 log::debug!("Loaded {} dependencies for file_id={}", dep_infos.len(), file_id);
135 if !dep_infos.is_empty() {
136 result.dependencies = Some(dep_infos);
137 }
138 }
139 Err(e) => {
140 log::warn!("Failed to get dependencies for file_id={}: {}", file_id, e);
141 }
142 }
143 }
144 Ok(None) => {
145 log::warn!("No file_id found for path: {}", result.path);
146 }
147 Err(e) => {
148 log::warn!("Failed to get file_id for path {}: {}", result.path, e);
149 }
150 }
151 }
152
153 Ok(())
154 }
155
156 fn group_and_load_dependencies(
159 &self,
160 results: Vec<SearchResult>,
161 include_deps: bool,
162 ) -> Result<Vec<crate::models::FileGroupedResult>> {
163 use std::collections::HashMap;
164 use crate::models::{FileGroupedResult, MatchResult};
165
166 if results.is_empty() {
167 return Ok(Vec::new());
168 }
169
170 let mut grouped: HashMap<String, Vec<SearchResult>> = HashMap::new();
172 for result in results {
173 grouped
174 .entry(result.path.clone())
175 .or_default()
176 .push(result);
177 }
178
179 let dep_index = if include_deps {
181 let workspace_root = self.cache.path().parent()
182 .ok_or_else(|| anyhow::anyhow!("Cache path has no parent"))?;
183 let cache_for_deps = CacheManager::new(workspace_root);
184 Some(crate::dependency::DependencyIndex::new(cache_for_deps))
185 } else {
186 None
187 };
188
189 let content_path = self.cache.path().join("content.bin");
191 let content_reader_opt = ContentReader::open(&content_path).ok();
192
193 let mut file_results: Vec<FileGroupedResult> = grouped
195 .into_iter()
196 .map(|(path, file_matches)| {
197 let dependencies = if let Some(dep_idx) = &dep_index {
199 let normalized_path = path.strip_prefix("./").unwrap_or(&path);
200 match self.cache.get_file_id(normalized_path) {
201 Ok(Some(file_id)) => {
202 match dep_idx.get_dependencies_info(file_id) {
203 Ok(dep_infos) if !dep_infos.is_empty() => {
204 log::debug!("Loaded {} dependencies for file: {}", dep_infos.len(), path);
205 Some(dep_infos)
206 }
207 Ok(_) => None,
208 Err(e) => {
209 log::warn!("Failed to get dependencies for {}: {}", path, e);
210 None
211 }
212 }
213 }
214 Ok(None) => {
215 log::warn!("No file_id found for path: {}", path);
216 None
217 }
218 Err(e) => {
219 log::warn!("Failed to get file_id for path {}: {}", path, e);
220 None
221 }
222 }
223 } else {
224 None
225 };
226
227 let normalized_path = path.strip_prefix("./").unwrap_or(&path);
231 let file_id_for_context = if let Some(reader) = &content_reader_opt {
232 reader.get_file_id_by_path(normalized_path)
233 } else {
234 None
235 };
236 log::debug!("Context extraction: file={}, file_id={:?}, content_reader={}",
237 path, file_id_for_context, content_reader_opt.is_some());
238
239 let matches: Vec<MatchResult> = file_matches
241 .into_iter()
242 .map(|r| {
243 let (context_before, context_after) = if let (Some(reader), Some(fid)) = (&content_reader_opt, file_id_for_context) {
245 let result = reader.get_context_by_line(fid as u32, r.span.start_line, 3)
246 .unwrap_or_else(|e| {
247 log::warn!("Failed to extract context for {}:{}: {}", path, r.span.start_line, e);
248 (vec![], vec![])
249 });
250 log::debug!("Extracted context for {}:{} - before: {}, after: {}",
251 path, r.span.start_line, result.0.len(), result.1.len());
252 result
253 } else {
254 if content_reader_opt.is_none() {
255 log::debug!("No ContentReader available for context extraction");
256 }
257 if file_id_for_context.is_none() {
258 log::debug!("No file_id found for {}", path);
259 }
260 (vec![], vec![])
261 };
262
263 MatchResult {
264 kind: r.kind,
265 symbol: r.symbol,
266 span: r.span,
267 preview: r.preview,
268 context_before,
269 context_after,
270 }
271 })
272 .collect();
273
274 FileGroupedResult {
275 path,
276 dependencies,
277 matches,
278 }
279 })
280 .collect();
281
282 file_results.sort_by(|a, b| a.path.cmp(&b.path));
284
285 Ok(file_results)
286 }
287
288 pub fn search_with_metadata(&self, pattern: &str, filter: QueryFilter) -> Result<QueryResponse> {
293 log::info!("Executing query with metadata: pattern='{}', filter={:?}", pattern, filter);
294
295 if !self.cache.exists() {
297 anyhow::bail!(
298 "Index not found. Run 'rfx index' to build the cache first."
299 );
300 }
301
302 if let Err(e) = self.cache.validate() {
304 anyhow::bail!(
305 "Cache appears to be corrupted: {}. Run 'rfx clear' followed by 'rfx index' to rebuild.",
306 e
307 );
308 }
309
310 let (status, can_trust_results, warning) = self.get_index_status()?;
312
313 let (results, total) = self.search_internal(pattern, filter.clone())?;
315
316 use crate::models::PaginationInfo;
318 let pagination = PaginationInfo {
319 total,
320 count: results.len(),
321 offset: filter.offset.unwrap_or(0),
322 limit: filter.limit,
323 has_more: total > filter.offset.unwrap_or(0) + results.len(),
324 };
325
326 let grouped_results = self.group_and_load_dependencies(results, filter.include_dependencies)?;
329
330 Ok(QueryResponse {
331 ai_instruction: None, status,
333 can_trust_results,
334 warning,
335 pagination,
336 results: grouped_results,
337 })
338 }
339
340 pub fn search(&self, pattern: &str, filter: QueryFilter) -> Result<Vec<SearchResult>> {
345 log::info!("Executing query: pattern='{}', filter={:?}", pattern, filter);
346
347 if !self.cache.exists() {
349 anyhow::bail!(
350 "Index not found. Run 'rfx index' to build the cache first."
351 );
352 }
353
354 if let Err(e) = self.cache.validate() {
356 anyhow::bail!(
357 "Cache appears to be corrupted: {}. Run 'rfx clear' followed by 'rfx index' to rebuild.",
358 e
359 );
360 }
361
362 self.check_index_freshness(&filter)?;
364
365 let (mut results, _total_count) = self.search_internal(pattern, filter.clone())?;
367
368 self.load_dependencies(&mut results, filter.include_dependencies)?;
370
371 Ok(results)
372 }
373
374 fn search_internal(&self, pattern: &str, filter: QueryFilter) -> Result<(Vec<SearchResult>, usize)> {
377 use std::time::{Duration, Instant};
378
379 let start_time = Instant::now();
381 let timeout = if filter.timeout_secs > 0 {
382 Some(Duration::from_secs(filter.timeout_secs))
383 } else {
384 None
385 };
386
387 let is_keyword_query = if filter.symbols_mode || filter.kind.is_some() {
398 ParserFactory::get_all_keywords().contains(&pattern)
399 } else {
400 false
401 };
402
403 let mut filter = filter.clone(); if is_keyword_query && filter.kind.is_none() {
408 if let Some(inferred_kind) = Self::keyword_to_kind(pattern) {
409 log::info!("Keyword '{}' mapped to kind {:?} (auto-inferred)", pattern, inferred_kind);
410 filter.kind = Some(inferred_kind);
411 }
412 }
413
414 if !filter.force && !filter.use_regex && !is_keyword_query {
426 let stats = self.cache.stats()?;
427 let total_files = stats.total_files;
428 let pattern_len = pattern.chars().count();
429
430 let large_index_threshold = filter.test_large_index_threshold.unwrap_or(20_000);
435 let short_pattern_threshold = filter.test_short_pattern_threshold.unwrap_or(4);
436
437 if total_files > large_index_threshold && pattern_len < short_pattern_threshold {
438 anyhow::bail!(
439 "Query too broad - would be expensive to execute on this large index\n\
440 \n\
441 This index contains {} files, and pattern '{}' ({} characters) is too short for efficient searching.\n\
442 On large codebases, short patterns can take 10-30+ seconds to complete.\n\
443 \n\
444 This query could:\n\
445 • Hang for an extended period before returning results\n\
446 • Return thousands of results\n\
447 • Flood LLM context windows with excessive data\n\
448 • Fail entirely\n\
449 \n\
450 Suggestions to narrow the query:\n\
451 • Use a longer, more specific pattern (4+ characters recommended for large indexes)\n\
452 • Add a language filter: --lang <language>\n\
453 • Add a file filter: --glob <pattern> or --file <path>\n\
454 • Use --force to bypass this check if you really need all results\n\
455 \n\
456 To force execution anyway:\n\
457 rfx query \"{}\" --force",
458 total_files,
459 pattern,
460 pattern_len,
461 pattern
462 );
463 }
464 }
465
466 let mut results = if is_keyword_query {
468 if let Some(lang) = filter.language {
471 log::info!("Keyword query detected for '{}' - scanning all {:?} files (bypassing trigram search)",
472 pattern, lang);
473 } else {
474 log::info!("Keyword query detected for '{}' - scanning all files (bypassing trigram search)", pattern);
475 }
476 self.get_all_language_files(&filter)?
477 } else if filter.use_regex {
478 self.get_regex_candidates(pattern, timeout.as_ref(), &start_time, filter.suppress_output)?
480 } else {
481 self.get_trigram_candidates(pattern, &filter)?
483 };
484
485 if !is_keyword_query {
491 if let Some(lang) = filter.language {
492 let before_count = results.len();
493 results.retain(|r| r.lang == lang);
494 log::debug!(
495 "Language filter ({:?}): reduced {} candidates to {} candidates",
496 lang,
497 before_count,
498 results.len()
499 );
500 }
501 }
502
503 if !filter.glob_patterns.is_empty() || !filter.exclude_patterns.is_empty() {
507 use globset::{Glob, GlobSetBuilder};
508
509 let include_matcher = if !filter.glob_patterns.is_empty() {
511 let mut builder = GlobSetBuilder::new();
512 for pattern in &filter.glob_patterns {
513 let normalized = Self::normalize_glob_pattern(pattern);
515 match Glob::new(&normalized) {
516 Ok(glob) => {
517 builder.add(glob);
518 }
519 Err(e) => {
520 log::warn!("Invalid glob pattern '{}': {}", pattern, e);
521 }
522 }
523 }
524 match builder.build() {
525 Ok(matcher) => Some(matcher),
526 Err(e) => {
527 log::warn!("Failed to build glob matcher: {}", e);
528 None
529 }
530 }
531 } else {
532 None
533 };
534
535 let exclude_matcher = if !filter.exclude_patterns.is_empty() {
537 let mut builder = GlobSetBuilder::new();
538 for pattern in &filter.exclude_patterns {
539 let normalized = Self::normalize_glob_pattern(pattern);
541 match Glob::new(&normalized) {
542 Ok(glob) => {
543 builder.add(glob);
544 }
545 Err(e) => {
546 log::warn!("Invalid exclude pattern '{}': {}", pattern, e);
547 }
548 }
549 }
550 match builder.build() {
551 Ok(matcher) => Some(matcher),
552 Err(e) => {
553 log::warn!("Failed to build exclude matcher: {}", e);
554 None
555 }
556 }
557 } else {
558 None
559 };
560
561 let before_count = results.len();
563 results.retain(|r| {
564 let included = if let Some(ref matcher) = include_matcher {
566 matcher.is_match(&r.path)
567 } else {
568 true };
570
571 let excluded = if let Some(ref matcher) = exclude_matcher {
573 matcher.is_match(&r.path)
574 } else {
575 false };
577
578 included && !excluded
579 });
580 log::debug!(
581 "Glob filter: reduced {} candidates to {} candidates",
582 before_count,
583 results.len()
584 );
585 }
586
587 if let Some(timeout_duration) = timeout {
589 if start_time.elapsed() > timeout_duration {
590 anyhow::bail!(
591 "Query timeout exceeded ({} seconds).\n\
592 \n\
593 The query took too long to complete. Try one of these approaches:\n\
594 • Use a more specific search pattern (longer patterns = faster search)\n\
595 • Add a language filter with --lang to narrow the search space\n\
596 • Add a file filter with --file to search specific directories\n\
597 • Increase the timeout with --timeout <seconds>\n\
598 \n\
599 Example: rfx query \"{}\" --lang rust --timeout 60",
600 filter.timeout_secs,
601 pattern
602 );
603 }
604 }
605
606 if !filter.force {
609 let candidate_count = results.len();
610 let pattern_len = pattern.chars().count();
611
612 let is_short_pattern = pattern_len < 3 && !filter.use_regex && !is_keyword_query;
615
616 let is_broad_ast = filter.use_ast && filter.glob_patterns.is_empty() && candidate_count >= 100;
619
620 let threshold = if filter.use_ast && filter.glob_patterns.is_empty() {
627 100 } else if filter.use_ast {
629 10_000 } else if is_keyword_query {
631 20_000 } else {
633 50_000 };
635
636 let has_many_candidates = candidate_count > threshold &&
637 (filter.symbols_mode || filter.kind.is_some() || filter.use_ast);
638
639 if is_short_pattern || has_many_candidates || is_broad_ast {
640 let reason = if is_short_pattern {
641 format!("Pattern '{}' is too short ({} characters). Short patterns bypass trigram optimization and require scanning many files.", pattern, pattern_len)
642 } else if is_broad_ast {
643 format!("AST query without --glob restriction will scan the entire codebase ({} files). AST queries are SLOW (500ms-10s+).", candidate_count)
644 } else if is_keyword_query {
645 format!("Keyword query '{}' matched {} files. This query scans all files of the target language, which will take significant time and produce excessive results.", pattern, candidate_count)
646 } else {
647 format!("Query matched {} files. Parsing this many files with --symbols or --kind will take significant time and produce excessive results.", candidate_count)
648 };
649
650 let suggestions = if is_short_pattern {
651 vec![
652 "• Use a longer, more specific pattern (3+ characters recommended)",
653 "• Add a language filter: --lang <language>",
654 "• Add a file path filter: --file <path> or --glob <pattern>",
655 "• Use --force to bypass this check if you really need all results"
656 ]
657 } else if is_broad_ast {
658 vec![
659 "• Add --glob to restrict AST query to specific files: --glob 'src/**/*.rs'",
660 "• Use --symbols instead (10-100x faster in 95% of cases)",
661 "• Use --force to bypass this check if you need a full codebase scan"
662 ]
663 } else if is_keyword_query {
664 vec![
665 "• Add a language filter to reduce files scanned: --lang <language>",
666 "• Add glob patterns to search specific directories: --glob 'src/**/*.rs'",
667 "• Add --kind to filter to specific symbol types: --kind function",
668 "• Use a more specific pattern instead of a keyword",
669 "• Use --force to bypass this check if you need all results"
670 ]
671 } else {
672 vec![
673 "• Add a language filter to reduce candidate set: --lang <language>",
674 "• Add glob patterns to search specific directories: --glob 'src/**/*.rs'",
675 "• Use a more specific search pattern",
676 "• Use --force to bypass this check if you need all results"
677 ]
678 };
679
680 let mut cmd_flags = String::new();
682 if filter.symbols_mode {
683 cmd_flags.push_str("--symbols ");
684 }
685 if let Some(ref lang) = filter.language {
686 cmd_flags.push_str(&format!("--lang {:?} ", lang));
687 }
688 if let Some(ref kind) = filter.kind {
689 cmd_flags.push_str(&format!("--kind {:?} ", kind));
690 }
691 if filter.use_ast {
692 cmd_flags.push_str("--ast ");
693 }
694
695 anyhow::bail!(
696 "Query too broad - would be expensive to execute\n\
697 \n\
698 {}\n\
699 \n\
700 This query could:\n\
701 • Hang for an extended period before returning results\n\
702 • Return thousands of results\n\
703 • Flood LLM context windows with excessive data\n\
704 • Fail entirely\n\
705 \n\
706 Suggestions to narrow the query:\n\
707 {}\n\
708 \n\
709 To force execution anyway:\n\
710 rfx query \"{}\" --force {}",
711 reason,
712 suggestions.join("\n "),
713 pattern,
714 cmd_flags
715 );
716 }
717 }
718
719 if filter.symbols_mode || filter.kind.is_some() || filter.use_ast {
722 results.sort_by(|a, b| {
723 a.path.cmp(&b.path)
724 .then_with(|| a.span.start_line.cmp(&b.span.start_line))
725 });
726
727 let candidate_count = results.len();
729 if candidate_count > 1000 && !filter.suppress_output {
730 output::warn(&format!(
731 "Pattern '{}' matched {} files - parsing may take some time. Consider using --file, --glob, or a more specific pattern to narrow the search.",
732 pattern,
733 candidate_count
734 ));
735 } else if candidate_count > 100 {
736 log::info!("Parsing {} candidate files for symbol extraction", candidate_count);
737 }
738 }
739
740 if filter.use_ast {
742 results = self.enrich_with_ast(results, pattern, filter.language)?;
744 } else if filter.symbols_mode || filter.kind.is_some() {
745 results = self.enrich_with_symbols(results, pattern, &filter)?;
747 }
748
749 if let Some(ref kind) = filter.kind {
756 results.retain(|r| {
757 if matches!(kind, SymbolKind::Function) {
758 matches!(r.kind, SymbolKind::Function | SymbolKind::Method)
760 } else {
761 r.kind == *kind
762 }
763 });
764 }
765
766 if let Some(ref file_pattern) = filter.file_pattern {
768 results.retain(|r| r.path.contains(file_pattern));
769 }
770
771 if filter.exact && filter.symbols_mode {
773 results.retain(|r| r.symbol.as_deref() == Some(pattern));
774 }
775
776 if filter.expand {
779 let content_path = self.cache.path().join("content.bin");
781 if let Ok(content_reader) = ContentReader::open(&content_path) {
782 for result in &mut results {
783 if result.span.start_line < result.span.end_line {
785 if let Some(file_id) = Self::find_file_id(&content_reader, &result.path) {
787 if let Ok(content) = content_reader.get_file_content(file_id) {
789 let lines: Vec<&str> = content.lines().collect();
790 let start_idx = (result.span.start_line as usize).saturating_sub(1);
791 let end_idx = (result.span.end_line as usize).min(lines.len());
792
793 if start_idx < end_idx {
794 let full_body = lines[start_idx..end_idx].join("\n");
795 result.preview = full_body;
796 }
797 }
798 }
799 }
800 }
801 }
802 }
803
804 if filter.paths_only {
806 use std::collections::HashSet;
807 let mut seen_paths = HashSet::new();
808 results.retain(|r| seen_paths.insert(r.path.clone()));
809 }
810
811 results.sort_by(|a, b| {
813 a.path.cmp(&b.path)
814 .then_with(|| a.span.start_line.cmp(&b.span.start_line))
815 });
816
817 let total_count = results.len();
820
821 if let Some(offset) = filter.offset {
823 if offset < results.len() {
824 results = results.into_iter().skip(offset).collect();
825 } else {
826 results.clear();
828 }
829 }
830
831 if let Some(limit) = filter.limit {
833 results.truncate(limit);
834 }
835
836 log::info!("Query returned {} results (total before pagination: {})", results.len(), total_count);
837
838 Ok((results, total_count))
839 }
840
841 pub fn find_symbol(&self, name: &str) -> Result<Vec<SearchResult>> {
843 let filter = QueryFilter {
844 symbols_mode: true,
845 ..Default::default()
846 };
847 self.search(name, filter)
848 }
849
850 pub fn search_ast(&self, pattern: &str, lang: Option<Language>) -> Result<Vec<SearchResult>> {
852 let filter = QueryFilter {
853 language: lang,
854 use_ast: true,
855 ..Default::default()
856 };
857
858 self.search(pattern, filter)
859 }
860
861 pub fn search_ast_all_files(&self, ast_pattern: &str, filter: QueryFilter) -> Result<Vec<SearchResult>> {
882 log::info!("Executing AST query on all files: pattern='{}', filter={:?}", ast_pattern, filter);
883
884 let lang = filter.language.ok_or_else(|| anyhow::anyhow!(
886 "Language must be specified for AST pattern matching. Use --lang to specify the language.\n\
887 \n\
888 Example: rfx query \"(function_definition) @fn\" --ast --lang python"
889 ))?;
890
891 if !self.cache.exists() {
893 anyhow::bail!(
894 "Index not found. Run 'rfx index' to build the cache first."
895 );
896 }
897
898 self.check_index_freshness(&filter)?;
900
901 let content_path = self.cache.path().join("content.bin");
903 let content_reader = ContentReader::open(&content_path)
904 .context("Failed to open content store")?;
905
906 use globset::{Glob, GlobSetBuilder};
908
909 let include_matcher = if !filter.glob_patterns.is_empty() {
910 let mut builder = GlobSetBuilder::new();
911 for pattern in &filter.glob_patterns {
912 let normalized = Self::normalize_glob_pattern(pattern);
914 if let Ok(glob) = Glob::new(&normalized) {
915 builder.add(glob);
916 }
917 }
918 builder.build().ok()
919 } else {
920 None
921 };
922
923 let exclude_matcher = if !filter.exclude_patterns.is_empty() {
924 let mut builder = GlobSetBuilder::new();
925 for pattern in &filter.exclude_patterns {
926 let normalized = Self::normalize_glob_pattern(pattern);
928 if let Ok(glob) = Glob::new(&normalized) {
929 builder.add(glob);
930 }
931 }
932 builder.build().ok()
933 } else {
934 None
935 };
936
937 let mut candidates: Vec<SearchResult> = Vec::new();
939
940 for file_id in 0..content_reader.file_count() {
941 let file_path = match content_reader.get_file_path(file_id as u32) {
942 Some(p) => p,
943 None => continue,
944 };
945
946 let ext = file_path.extension()
948 .and_then(|e| e.to_str())
949 .unwrap_or("");
950 let detected_lang = Language::from_extension(ext);
951
952 if detected_lang != lang {
954 continue;
955 }
956
957 let file_path_str = file_path.to_string_lossy().to_string();
958
959 let included = include_matcher.as_ref().map_or(true, |m| m.is_match(&file_path_str));
961 let excluded = exclude_matcher.as_ref().map_or(false, |m| m.is_match(&file_path_str));
962
963 if !included || excluded {
964 continue;
965 }
966
967 candidates.push(SearchResult {
969 path: file_path_str,
970 lang: detected_lang,
971 span: Span { start_line: 1, end_line: 1 },
972 symbol: None,
973 kind: SymbolKind::Unknown("ast_query".to_string()),
974 preview: String::new(),
975 dependencies: None,
976 });
977 }
978
979 log::info!("AST query scanning {} files for language {:?}", candidates.len(), lang);
980
981 if !filter.force && filter.glob_patterns.is_empty() && candidates.len() >= 100 {
984 anyhow::bail!(
985 "Query too broad - would be expensive to execute\n\
986 \n\
987 AST query without --glob restriction will scan the ENTIRE codebase ({} files). AST queries are SLOW (500ms-10s+).\n\
988 \n\
989 This query could:\n\
990 • Hang for an extended period before returning results\n\
991 • Return thousands of results\n\
992 • Flood LLM context windows with excessive data\n\
993 • Fail entirely\n\
994 \n\
995 Suggestions to narrow the query:\n\
996 • Add --glob to restrict AST query to specific files: --glob 'src/**/*.rs'\n\
997 • Use --symbols instead (10-100x faster in 95% of cases)\n\
998 • Use --force to bypass this check if you need a full codebase scan\n\
999 \n\
1000 To force execution anyway:\n\
1001 rfx query \"{}\" --force --ast --lang {:?}",
1002 candidates.len(),
1003 ast_pattern,
1004 lang
1005 );
1006 }
1007
1008 if candidates.is_empty() {
1009 if !filter.suppress_output {
1010 output::warn(&format!("No files found for language {:?}. Check your language filter or glob patterns.", lang));
1011 }
1012 return Ok(Vec::new());
1013 }
1014
1015 let mut results = self.enrich_with_ast(candidates, ast_pattern, filter.language)?;
1018
1019 log::debug!("AST query found {} matches before filtering", results.len());
1020
1021 if let Some(ref kind) = filter.kind {
1025 results.retain(|r| {
1026 if matches!(kind, SymbolKind::Function) {
1027 matches!(r.kind, SymbolKind::Function | SymbolKind::Method)
1028 } else {
1029 r.kind == *kind
1030 }
1031 });
1032 }
1033
1034 if filter.expand {
1038 let content_path = self.cache.path().join("content.bin");
1039 if let Ok(content_reader) = ContentReader::open(&content_path) {
1040 for result in &mut results {
1041 if result.span.start_line < result.span.end_line {
1042 if let Some(file_id) = Self::find_file_id(&content_reader, &result.path) {
1043 if let Ok(content) = content_reader.get_file_content(file_id) {
1044 let lines: Vec<&str> = content.lines().collect();
1045 let start_idx = (result.span.start_line as usize).saturating_sub(1);
1046 let end_idx = (result.span.end_line as usize).min(lines.len());
1047
1048 if start_idx < end_idx {
1049 let full_body = lines[start_idx..end_idx].join("\n");
1050 result.preview = full_body;
1051 }
1052 }
1053 }
1054 }
1055 }
1056 }
1057 }
1058
1059 if filter.paths_only {
1061 use std::collections::HashSet;
1062 let mut seen_paths = HashSet::new();
1063 results.retain(|r| seen_paths.insert(r.path.clone()));
1064 }
1065
1066 results.sort_by(|a, b| {
1068 a.path.cmp(&b.path)
1069 .then_with(|| a.span.start_line.cmp(&b.span.start_line))
1070 });
1071
1072 if let Some(offset) = filter.offset {
1074 if offset < results.len() {
1075 results = results.into_iter().skip(offset).collect();
1076 } else {
1077 results.clear();
1078 }
1079 }
1080
1081 if let Some(limit) = filter.limit {
1083 results.truncate(limit);
1084 }
1085
1086 log::info!("AST query returned {} results", results.len());
1087
1088 self.load_dependencies(&mut results, filter.include_dependencies)?;
1090
1091 Ok(results)
1092 }
1093
1094 pub fn search_ast_with_text_filter(
1106 &self,
1107 text_pattern: &str,
1108 ast_pattern: &str,
1109 filter: QueryFilter,
1110 ) -> Result<Vec<SearchResult>> {
1111 log::info!("Executing AST query with text filter: text='{}', ast='{}', filter={:?}",
1112 text_pattern, ast_pattern, filter);
1113
1114 if !self.cache.exists() {
1116 anyhow::bail!(
1117 "Index not found. Run 'rfx index' to build the cache first."
1118 );
1119 }
1120
1121 self.check_index_freshness(&filter)?;
1123
1124 use std::time::{Duration, Instant};
1126 let start_time = Instant::now();
1127 let timeout = if filter.timeout_secs > 0 {
1128 Some(Duration::from_secs(filter.timeout_secs))
1129 } else {
1130 None
1131 };
1132
1133 let candidates = if filter.use_regex {
1135 self.get_regex_candidates(text_pattern, timeout.as_ref(), &start_time, filter.suppress_output)?
1136 } else {
1137 self.get_trigram_candidates(text_pattern, &filter)?
1138 };
1139
1140 log::debug!("Phase 1 found {} candidate locations", candidates.len());
1141
1142 let mut results = self.enrich_with_ast(candidates, ast_pattern, filter.language)?;
1144
1145 log::debug!("Phase 2 AST matching found {} results", results.len());
1146
1147 if let Some(lang) = filter.language {
1149 results.retain(|r| r.lang == lang);
1150 }
1151
1152 if let Some(ref kind) = filter.kind {
1153 results.retain(|r| {
1154 if matches!(kind, SymbolKind::Function) {
1155 matches!(r.kind, SymbolKind::Function | SymbolKind::Method)
1156 } else {
1157 r.kind == *kind
1158 }
1159 });
1160 }
1161
1162 if let Some(ref file_pattern) = filter.file_pattern {
1163 results.retain(|r| r.path.contains(file_pattern));
1164 }
1165
1166 if !filter.glob_patterns.is_empty() || !filter.exclude_patterns.is_empty() {
1168 use globset::{Glob, GlobSetBuilder};
1169
1170 let include_matcher = if !filter.glob_patterns.is_empty() {
1171 let mut builder = GlobSetBuilder::new();
1172 for pattern in &filter.glob_patterns {
1173 let normalized = Self::normalize_glob_pattern(pattern);
1175 if let Ok(glob) = Glob::new(&normalized) {
1176 builder.add(glob);
1177 }
1178 }
1179 builder.build().ok()
1180 } else {
1181 None
1182 };
1183
1184 let exclude_matcher = if !filter.exclude_patterns.is_empty() {
1185 let mut builder = GlobSetBuilder::new();
1186 for pattern in &filter.exclude_patterns {
1187 let normalized = Self::normalize_glob_pattern(pattern);
1189 if let Ok(glob) = Glob::new(&normalized) {
1190 builder.add(glob);
1191 }
1192 }
1193 builder.build().ok()
1194 } else {
1195 None
1196 };
1197
1198 results.retain(|r| {
1199 let included = include_matcher.as_ref().map_or(true, |m| m.is_match(&r.path));
1200 let excluded = exclude_matcher.as_ref().map_or(false, |m| m.is_match(&r.path));
1201 included && !excluded
1202 });
1203 }
1204
1205 if filter.exact && filter.symbols_mode {
1206 results.retain(|r| r.symbol.as_deref() == Some(text_pattern));
1207 }
1208
1209 if filter.expand {
1211 let content_path = self.cache.path().join("content.bin");
1212 if let Ok(content_reader) = ContentReader::open(&content_path) {
1213 for result in &mut results {
1214 if result.span.start_line < result.span.end_line {
1215 if let Some(file_id) = Self::find_file_id(&content_reader, &result.path) {
1216 if let Ok(content) = content_reader.get_file_content(file_id) {
1217 let lines: Vec<&str> = content.lines().collect();
1218 let start_idx = (result.span.start_line as usize).saturating_sub(1);
1219 let end_idx = (result.span.end_line as usize).min(lines.len());
1220
1221 if start_idx < end_idx {
1222 let full_body = lines[start_idx..end_idx].join("\n");
1223 result.preview = full_body;
1224 }
1225 }
1226 }
1227 }
1228 }
1229 }
1230 }
1231
1232 results.sort_by(|a, b| {
1234 a.path.cmp(&b.path)
1235 .then_with(|| a.span.start_line.cmp(&b.span.start_line))
1236 });
1237
1238 if let Some(offset) = filter.offset {
1240 if offset < results.len() {
1241 results = results.into_iter().skip(offset).collect();
1242 } else {
1243 results.clear();
1244 }
1245 }
1246
1247 if let Some(limit) = filter.limit {
1249 results.truncate(limit);
1250 }
1251
1252 log::info!("AST query returned {} results", results.len());
1253
1254 Ok(results)
1255 }
1256
1257 pub fn list_by_kind(&self, kind: SymbolKind) -> Result<Vec<SearchResult>> {
1259 let filter = QueryFilter {
1260 kind: Some(kind),
1261 symbols_mode: true,
1262 ..Default::default()
1263 };
1264
1265 self.search("*", filter)
1266 }
1267
1268 fn enrich_with_symbols(&self, candidates: Vec<SearchResult>, pattern: &str, filter: &QueryFilter) -> Result<Vec<SearchResult>> {
1289 let content_path = self.cache.path().join("content.bin");
1291 let content_reader = ContentReader::open(&content_path)
1292 .context("Failed to open content store")?;
1293
1294 let trigrams_path = self.cache.path().join("trigrams.bin");
1296 let trigram_index = if trigrams_path.exists() {
1297 TrigramIndex::load(&trigrams_path)?
1298 } else {
1299 Self::rebuild_trigram_index(&content_reader)?
1300 };
1301
1302 let symbol_cache = crate::symbol_cache::SymbolCache::open(self.cache.path())
1304 .context("Failed to open symbol cache")?;
1305
1306 let root = self.cache.workspace_root();
1308 let branch = crate::git::get_current_branch(&root)
1309 .unwrap_or_else(|_| "_default".to_string());
1310 let file_hashes = self.cache.load_hashes_for_branch(&branch)
1311 .context("Failed to load file hashes")?;
1312 log::debug!("Loaded {} file hashes for branch '{}' for symbol cache lookups", file_hashes.len(), branch);
1313
1314 use std::collections::HashMap;
1316 let mut files_by_path: HashMap<String, Vec<SearchResult>> = HashMap::new();
1317 let mut skipped_unsupported = 0;
1318
1319 for candidate in candidates {
1320 if !candidate.lang.is_supported() {
1322 skipped_unsupported += 1;
1323 continue;
1324 }
1325
1326 files_by_path
1327 .entry(candidate.path.clone())
1328 .or_insert_with(Vec::new)
1329 .push(candidate);
1330 }
1331
1332 let total_files = files_by_path.len();
1333 log::debug!("Processing {} candidate files for symbol enrichment (skipped {} unsupported language files)",
1334 total_files, skipped_unsupported);
1335
1336 if total_files > 1000 && !filter.suppress_output {
1338 output::warn(&format!(
1339 "Pattern '{}' matched {} files. This may take some time to parse. Consider using a more specific pattern or adding --lang/--file filters to narrow the search.",
1340 pattern,
1341 total_files
1342 ));
1343 }
1344
1345 let mut files_to_process: Vec<String> = files_by_path.keys().cloned().collect();
1347
1348 let mut files_to_skip: std::collections::HashSet<String> = std::collections::HashSet::new();
1351
1352 for file_path in &files_to_process {
1353 let ext = std::path::Path::new(file_path)
1355 .extension()
1356 .and_then(|e| e.to_str())
1357 .unwrap_or("");
1358 let lang = Language::from_extension(ext);
1359
1360 if let Some(line_filter) = crate::line_filter::get_filter(lang) {
1362 let file_id = match Self::find_file_id_by_path(&content_reader, &trigram_index, file_path) {
1364 Some(id) => id,
1365 None => continue,
1366 };
1367
1368 let content = match content_reader.get_file_content(file_id) {
1370 Ok(c) => c,
1371 Err(_) => continue,
1372 };
1373
1374 let mut all_in_non_code = true;
1376 for line in content.lines() {
1377 let mut search_start = 0;
1379 while let Some(pos) = line[search_start..].find(pattern) {
1380 let absolute_pos = search_start + pos;
1381
1382 let in_comment = line_filter.is_in_comment(line, absolute_pos);
1384 let in_string = line_filter.is_in_string(line, absolute_pos);
1385
1386 if !in_comment && !in_string {
1387 all_in_non_code = false;
1389 break;
1390 }
1391
1392 search_start = absolute_pos + pattern.len();
1393 }
1394
1395 if !all_in_non_code {
1396 break;
1397 }
1398 }
1399
1400 if all_in_non_code {
1402 if content.contains(pattern) {
1404 files_to_skip.insert(file_path.clone());
1405 log::debug!("Pre-filter: Skipping {} (all matches in comments/strings)", file_path);
1406 }
1407 }
1408 }
1409 }
1410
1411 files_to_process.retain(|path| !files_to_skip.contains(path));
1413
1414 log::debug!("Pre-filter: Skipped {} files where all matches are in comments/strings (parsing {} files)",
1415 files_to_skip.len(), files_to_process.len());
1416
1417 let num_threads = {
1419 let available_cores = std::thread::available_parallelism()
1420 .map(|n| n.get())
1421 .unwrap_or(4);
1422 ((available_cores as f64 * 0.8).ceil() as usize).max(1).min(8)
1425 };
1426
1427 log::debug!("Using {} threads for parallel symbol extraction (out of {} available cores)",
1428 num_threads,
1429 std::thread::available_parallelism().map(|n| n.get()).unwrap_or(4));
1430
1431 let pool = rayon::ThreadPoolBuilder::new()
1433 .num_threads(num_threads)
1434 .build()
1435 .context("Failed to create thread pool for symbol extraction")?;
1436
1437 let files_with_hashes: Vec<String> = files_to_process
1442 .iter()
1443 .filter(|path| file_hashes.contains_key(path.as_str()))
1444 .cloned()
1445 .collect();
1446
1447 let file_id_map = self.cache.batch_get_file_ids(&files_with_hashes)
1449 .context("Failed to batch lookup file IDs")?;
1450
1451 let file_lookup_tuples: Vec<(i64, String, String)> = files_with_hashes
1453 .iter()
1454 .filter_map(|path| {
1455 let file_id = file_id_map.get(path)?;
1456 let hash = file_hashes.get(path.as_str())?;
1457 Some((*file_id, hash.clone(), path.clone()))
1458 })
1459 .collect();
1460
1461 let batch_results = symbol_cache.batch_get_with_kind(&file_lookup_tuples, filter.kind.clone())
1463 .context("Failed to batch read symbol cache")?;
1464
1465 let mut cached_symbols: HashMap<String, Vec<SearchResult>> = HashMap::new();
1467 let mut files_needing_parse: Vec<String> = Vec::new();
1468
1469 let id_to_path: HashMap<i64, String> = file_id_map
1471 .iter()
1472 .map(|(path, id)| (*id, path.clone()))
1473 .collect();
1474
1475 for (file_id, symbols) in batch_results {
1477 if let Some(file_path) = id_to_path.get(&file_id) {
1478 cached_symbols.insert(file_path.clone(), symbols);
1479 }
1480 }
1481
1482 for path in &files_with_hashes {
1484 if file_id_map.contains_key(path) && !cached_symbols.contains_key(path) {
1485 files_needing_parse.push(path.clone());
1486 }
1487 }
1488
1489 for file_path in &files_to_process {
1491 if !file_hashes.contains_key(file_path.as_str()) {
1492 files_needing_parse.push(file_path.clone());
1493 }
1494 }
1495
1496 log::debug!(
1497 "Symbol cache: {} hits, {} need parsing",
1498 cached_symbols.len(),
1499 files_needing_parse.len()
1500 );
1501
1502 use rayon::prelude::*;
1504
1505 let parsed_symbols: Vec<SearchResult> = pool.install(|| {
1506 files_needing_parse
1507 .par_iter()
1508 .flat_map(|file_path| {
1509 let file_id = match Self::find_file_id_by_path(&content_reader, &trigram_index, file_path) {
1511 Some(id) => id,
1512 None => {
1513 log::warn!("Could not find file_id for path: {}", file_path);
1514 return Vec::new();
1515 }
1516 };
1517
1518 let content = match content_reader.get_file_content(file_id) {
1519 Ok(c) => c,
1520 Err(e) => {
1521 log::warn!("Failed to read file {}: {}", file_path, e);
1522 return Vec::new();
1523 }
1524 };
1525
1526 let ext = std::path::Path::new(file_path)
1528 .extension()
1529 .and_then(|e| e.to_str())
1530 .unwrap_or("");
1531 let lang = Language::from_extension(ext);
1532
1533 let symbols = match ParserFactory::parse(file_path, content, lang) {
1535 Ok(symbols) => {
1536 log::debug!("Parsed {} symbols from {}", symbols.len(), file_path);
1537 symbols
1538 }
1539 Err(e) => {
1540 log::debug!("Failed to parse {}: {}", file_path, e);
1541 Vec::new()
1542 }
1543 };
1544
1545 if let Some(file_hash) = file_hashes.get(file_path.as_str()) {
1547 if let Err(e) = symbol_cache.set(file_path, file_hash, &symbols) {
1548 log::debug!("Failed to cache symbols for {}: {}", file_path, e);
1549 }
1550 }
1551
1552 symbols
1553 })
1554 .collect()
1555 });
1556
1557 let mut all_symbols: Vec<SearchResult> = Vec::new();
1559
1560 for symbols in cached_symbols.values() {
1562 all_symbols.extend_from_slice(symbols);
1563 }
1564
1565 all_symbols.extend(parsed_symbols);
1567
1568 let is_keyword_query = {
1576 let lang_to_check = if let Some(lang) = filter.language {
1578 vec![lang]
1581 } else {
1582 let mut langs: Vec<Language> = all_symbols.iter()
1586 .map(|s| s.lang)
1587 .collect::<Vec<_>>();
1588 langs.sort_by(|a, b| format!("{:?}", a).cmp(&format!("{:?}", b))); langs.dedup(); langs
1591 };
1592
1593 lang_to_check.iter().any(|lang| {
1595 ParserFactory::get_keywords(*lang).contains(&pattern)
1596 })
1597 };
1598
1599 let filtered: Vec<SearchResult> = if is_keyword_query {
1602 log::info!("Pattern '{}' is a language keyword - listing all symbols (kind filtering will be applied in Phase 3)", pattern);
1603 all_symbols
1604 } else if filter.use_regex {
1605 use std::collections::{HashMap, HashSet};
1611 let mut candidate_lines: HashMap<String, HashSet<usize>> = HashMap::new();
1612 for candidate in &files_by_path {
1613 for cand in candidate.1 {
1614 candidate_lines
1615 .entry(candidate.0.clone())
1616 .or_insert_with(HashSet::new)
1617 .insert(cand.span.start_line);
1618 }
1619 }
1620
1621 all_symbols
1623 .into_iter()
1624 .filter(|sym| {
1625 if let Some(lines) = candidate_lines.get(&sym.path) {
1626 for line in sym.span.start_line..=sym.span.end_line {
1628 if lines.contains(&line) {
1629 return true;
1630 }
1631 }
1632 }
1633 false
1634 })
1635 .collect()
1636 } else if filter.use_contains {
1637 all_symbols
1639 .into_iter()
1640 .filter(|sym| sym.symbol.as_deref().map_or(false, |s| s.contains(pattern)))
1641 .collect()
1642 } else {
1643 all_symbols
1645 .into_iter()
1646 .filter(|sym| sym.symbol.as_deref().map_or(false, |s| s == pattern))
1647 .collect()
1648 };
1649
1650 log::info!("Symbol enrichment found {} matches for pattern '{}'", filtered.len(), pattern);
1651
1652 Ok(filtered)
1653 }
1654
1655 fn enrich_with_ast(&self, candidates: Vec<SearchResult>, ast_pattern: &str, language: Option<Language>) -> Result<Vec<SearchResult>> {
1674 let lang = language.ok_or_else(|| anyhow::anyhow!(
1676 "Language must be specified for AST pattern matching. Use --lang to specify the language."
1677 ))?;
1678
1679 let content_path = self.cache.path().join("content.bin");
1681 let content_reader = ContentReader::open(&content_path)
1682 .context("Failed to open content store")?;
1683
1684 let trigrams_path = self.cache.path().join("trigrams.bin");
1686 let trigram_index = if trigrams_path.exists() {
1687 TrigramIndex::load(&trigrams_path)?
1688 } else {
1689 Self::rebuild_trigram_index(&content_reader)?
1690 };
1691
1692 use std::collections::HashMap;
1694 let mut file_contents: HashMap<String, String> = HashMap::new();
1695
1696 for candidate in &candidates {
1697 if file_contents.contains_key(&candidate.path) {
1698 continue;
1699 }
1700
1701 let file_id = match Self::find_file_id_by_path(&content_reader, &trigram_index, &candidate.path) {
1703 Some(id) => id,
1704 None => {
1705 log::warn!("Could not find file_id for path: {}", candidate.path);
1706 continue;
1707 }
1708 };
1709
1710 let content = match content_reader.get_file_content(file_id) {
1712 Ok(c) => c,
1713 Err(e) => {
1714 log::warn!("Failed to read file {}: {}", candidate.path, e);
1715 continue;
1716 }
1717 };
1718
1719 file_contents.insert(candidate.path.clone(), content.to_string());
1720 }
1721
1722 log::debug!("Executing AST query on {} candidate files with language {:?}", file_contents.len(), lang);
1723
1724 let results = crate::ast_query::execute_ast_query(candidates, ast_pattern, lang, &file_contents)?;
1726
1727 log::info!("AST query found {} matches for pattern '{}'", results.len(), ast_pattern);
1728
1729 Ok(results)
1730 }
1731
1732 fn find_file_id_by_path(
1734 content_reader: &ContentReader,
1735 trigram_index: &TrigramIndex,
1736 target_path: &str,
1737 ) -> Option<u32> {
1738 for file_id in 0..trigram_index.file_count() {
1740 if let Some(path) = trigram_index.get_file(file_id as u32) {
1741 if path.to_string_lossy() == target_path {
1742 return Some(file_id as u32);
1743 }
1744 }
1745 }
1746
1747 for file_id in 0..content_reader.file_count() {
1749 if let Some(path) = content_reader.get_file_path(file_id as u32) {
1750 if path.to_string_lossy() == target_path {
1751 return Some(file_id as u32);
1752 }
1753 }
1754 }
1755
1756 None
1757 }
1758
1759 fn keyword_to_kind(keyword: &str) -> Option<SymbolKind> {
1767 match keyword {
1768 "class" => Some(SymbolKind::Class),
1770 "struct" => Some(SymbolKind::Struct),
1771 "enum" => Some(SymbolKind::Enum),
1772 "interface" => Some(SymbolKind::Interface),
1773 "trait" => Some(SymbolKind::Trait),
1774 "type" => Some(SymbolKind::Type),
1775 "record" => Some(SymbolKind::Struct), "function" | "fn" | "def" | "func" => Some(SymbolKind::Function),
1779
1780 "const" | "static" => Some(SymbolKind::Constant),
1782 "var" | "let" => Some(SymbolKind::Variable),
1783
1784 "mod" | "module" | "namespace" => Some(SymbolKind::Module),
1786
1787 "impl" => None, "async" => None, _ => None,
1793 }
1794 }
1795
1796 fn get_all_language_files(&self, filter: &QueryFilter) -> Result<Vec<SearchResult>> {
1804 let content_path = self.cache.path().join("content.bin");
1809 let content_reader = ContentReader::open(&content_path)
1810 .context("Failed to open content store")?;
1811
1812 use globset::{Glob, GlobSetBuilder};
1814
1815 let include_matcher = if !filter.glob_patterns.is_empty() {
1816 let mut builder = GlobSetBuilder::new();
1817 for pattern in &filter.glob_patterns {
1818 let normalized = Self::normalize_glob_pattern(pattern);
1819 if let Ok(glob) = Glob::new(&normalized) {
1820 builder.add(glob);
1821 }
1822 }
1823 builder.build().ok()
1824 } else {
1825 None
1826 };
1827
1828 let exclude_matcher = if !filter.exclude_patterns.is_empty() {
1829 let mut builder = GlobSetBuilder::new();
1830 for pattern in &filter.exclude_patterns {
1831 let normalized = Self::normalize_glob_pattern(pattern);
1832 if let Ok(glob) = Glob::new(&normalized) {
1833 builder.add(glob);
1834 }
1835 }
1836 builder.build().ok()
1837 } else {
1838 None
1839 };
1840
1841 let mut candidates: Vec<SearchResult> = Vec::new();
1843
1844 for file_id in 0..content_reader.file_count() {
1845 let file_path = match content_reader.get_file_path(file_id as u32) {
1846 Some(p) => p,
1847 None => continue,
1848 };
1849
1850 let ext = file_path.extension()
1852 .and_then(|e| e.to_str())
1853 .unwrap_or("");
1854 let detected_lang = Language::from_extension(ext);
1855
1856 if let Some(lang) = filter.language {
1858 if detected_lang != lang {
1859 continue;
1860 }
1861 }
1862
1863 let file_path_str = file_path.to_string_lossy().to_string();
1864
1865 let included = include_matcher.as_ref().map_or(true, |m| m.is_match(&file_path_str));
1867 let excluded = exclude_matcher.as_ref().map_or(false, |m| m.is_match(&file_path_str));
1868
1869 if !included || excluded {
1870 continue;
1871 }
1872
1873 if let Some(ref file_pattern) = filter.file_pattern {
1875 if !file_path_str.contains(file_pattern) {
1876 continue;
1877 }
1878 }
1879
1880 candidates.push(SearchResult {
1883 path: file_path_str,
1884 lang: detected_lang,
1885 span: Span { start_line: 1, end_line: 1 },
1886 symbol: None,
1887 kind: SymbolKind::Unknown("keyword_query".to_string()),
1888 preview: String::new(),
1889 dependencies: None,
1890 });
1891 }
1892
1893 if let Some(lang) = filter.language {
1894 log::info!("Keyword query will scan {} {:?} files for symbol extraction", candidates.len(), lang);
1895 } else {
1896 log::info!("Keyword query will scan {} files (all languages) for symbol extraction", candidates.len());
1897 }
1898
1899 Ok(candidates)
1900 }
1901
1902 fn get_trigram_candidates(&self, pattern: &str, filter: &QueryFilter) -> Result<Vec<SearchResult>> {
1904 let content_path = self.cache.path().join("content.bin");
1906 let content_reader = ContentReader::open(&content_path)
1907 .context("Failed to open content store")?;
1908
1909 let trigrams_path = self.cache.path().join("trigrams.bin");
1911 let trigram_index = if trigrams_path.exists() {
1912 match TrigramIndex::load(&trigrams_path) {
1913 Ok(index) => {
1914 log::debug!("Loaded trigram index from disk: {} trigrams, {} files",
1915 index.trigram_count(), index.file_count());
1916 index
1917 }
1918 Err(e) => {
1919 log::warn!("Failed to load trigram index from disk: {}", e);
1920 log::warn!("Rebuilding trigram index from content store...");
1921 Self::rebuild_trigram_index(&content_reader)?
1922 }
1923 }
1924 } else {
1925 log::debug!("trigrams.bin not found, rebuilding from content store");
1926 Self::rebuild_trigram_index(&content_reader)?
1927 };
1928
1929 let candidates = trigram_index.search(pattern);
1931 log::debug!("Found {} candidate locations from trigram search", candidates.len());
1932
1933 let pattern_owned = pattern.to_string();
1935
1936 let compiled_regex = if filter.use_regex {
1938 match Regex::new(&pattern_owned) {
1939 Ok(re) => Some(re),
1940 Err(e) => {
1941 log::error!("Invalid regex pattern '{}': {}", pattern_owned, e);
1942 anyhow::bail!("Invalid regex pattern '{}': {}", pattern_owned, e);
1943 }
1944 }
1945 } else {
1946 None
1947 };
1948
1949 use std::collections::HashMap;
1951 let mut candidates_by_file: HashMap<u32, Vec<crate::trigram::FileLocation>> = HashMap::new();
1952 for loc in candidates {
1953 candidates_by_file
1954 .entry(loc.file_id)
1955 .or_insert_with(Vec::new)
1956 .push(loc);
1957 }
1958
1959 log::debug!("Scanning {} files with trigram matches", candidates_by_file.len());
1960
1961 use rayon::prelude::*;
1963
1964 let results: Vec<SearchResult> = candidates_by_file
1965 .par_iter()
1966 .flat_map(|(file_id, locations)| {
1967 let file_path = match trigram_index.get_file(*file_id) {
1969 Some(p) => p,
1970 None => return Vec::new(),
1971 };
1972
1973 let content = match content_reader.get_file_content(*file_id) {
1974 Ok(c) => c,
1975 Err(_) => return Vec::new(),
1976 };
1977
1978 let file_path_str = file_path.to_string_lossy().to_string();
1979
1980 let ext = file_path.extension()
1982 .and_then(|e| e.to_str())
1983 .unwrap_or("");
1984 let lang = Language::from_extension(ext);
1985
1986 let lines: Vec<&str> = content.lines().collect();
1988
1989 let mut seen_lines: std::collections::HashSet<usize> = std::collections::HashSet::new();
1991 let mut file_results = Vec::new();
1992
1993 for loc in locations {
1995 let line_no = loc.line_no as usize;
1996
1997 if seen_lines.contains(&line_no) {
1999 continue;
2000 }
2001
2002 if line_no == 0 || line_no > lines.len() {
2004 log::debug!("Line {} out of bounds (file has {} lines)", line_no, lines.len());
2005 continue;
2006 }
2007
2008 let line = lines[line_no - 1];
2009
2010 let line_matches = if filter.use_regex {
2015 compiled_regex.as_ref()
2018 .map(|re| re.is_match(line))
2019 .unwrap_or(false)
2020 } else if filter.use_contains {
2021 line.contains(&pattern_owned)
2023 } else {
2024 Self::has_word_boundary_match(line, &pattern_owned)
2026 };
2027
2028 if !line_matches {
2029 continue;
2030 }
2031
2032 seen_lines.insert(line_no);
2033
2034 file_results.push(SearchResult {
2036 path: file_path_str.clone(),
2037 lang: lang.clone(),
2038 kind: SymbolKind::Unknown("text_match".to_string()),
2039 symbol: None, span: Span {
2041 start_line: line_no,
2042 end_line: line_no,
2043 },
2044 preview: line.to_string(),
2045 dependencies: None,
2046 });
2047 }
2048
2049 file_results
2050 })
2051 .collect();
2052
2053 Ok(results)
2054 }
2055
2056 fn get_regex_candidates(&self, pattern: &str, timeout: Option<&std::time::Duration>, start_time: &std::time::Instant, suppress_output: bool) -> Result<Vec<SearchResult>> {
2080 let regex = Regex::new(pattern)
2082 .with_context(|| format!("Invalid regex pattern: {}", pattern))?;
2083
2084 if let Some(timeout_duration) = timeout {
2086 if start_time.elapsed() > *timeout_duration {
2087 anyhow::bail!(
2088 "Query timeout exceeded ({} seconds) during regex compilation",
2089 timeout_duration.as_secs()
2090 );
2091 }
2092 }
2093
2094 let trigrams = extract_trigrams_from_regex(pattern);
2096
2097 let content_path = self.cache.path().join("content.bin");
2099 let content_reader = ContentReader::open(&content_path)
2100 .context("Failed to open content store")?;
2101
2102 let mut results = Vec::new();
2103
2104 if trigrams.is_empty() {
2105 if !suppress_output {
2107 output::warn(&format!(
2108 "Regex pattern '{}' has no literals (≥3 chars), falling back to full content scan. This may be slow on large codebases. Consider using patterns with literal text.",
2109 pattern
2110 ));
2111 }
2112
2113 for file_id in 0..content_reader.file_count() {
2115 let file_path = content_reader.get_file_path(file_id as u32)
2116 .context("Invalid file_id")?;
2117 let content = content_reader.get_file_content(file_id as u32)?;
2118
2119 self.find_regex_matches_in_file(
2120 ®ex,
2121 file_path,
2122 content,
2123 &mut results,
2124 )?;
2125 }
2126 } else {
2127 log::debug!("Using {} trigrams to narrow regex search candidates", trigrams.len());
2129
2130 let trigrams_path = self.cache.path().join("trigrams.bin");
2132 let trigram_index = if trigrams_path.exists() {
2133 TrigramIndex::load(&trigrams_path)?
2134 } else {
2135 Self::rebuild_trigram_index(&content_reader)?
2136 };
2137
2138 use crate::regex_trigrams::extract_literal_sequences;
2140 let literals = extract_literal_sequences(pattern);
2141
2142 if literals.is_empty() {
2143 log::warn!("Regex extraction found trigrams but no literal sequences - this shouldn't happen");
2144 for file_id in 0..content_reader.file_count() {
2146 let file_path = content_reader.get_file_path(file_id as u32)
2147 .context("Invalid file_id")?;
2148 let content = content_reader.get_file_content(file_id as u32)?;
2149 self.find_regex_matches_in_file(®ex, file_path, content, &mut results)?;
2150 }
2151 } else {
2152 use std::collections::HashSet;
2157 let mut candidate_files: HashSet<u32> = HashSet::new();
2158
2159 for literal in &literals {
2160 let candidates = trigram_index.search(literal);
2162 let file_ids: HashSet<u32> = candidates.iter().map(|loc| loc.file_id).collect();
2163
2164 log::debug!("Literal '{}' found in {} files", literal, file_ids.len());
2165
2166 candidate_files.extend(file_ids);
2169 }
2170
2171 let final_candidates = candidate_files;
2172 log::debug!("After union: searching {} files that contain any literal", final_candidates.len());
2173
2174 for &file_id in &final_candidates {
2176 let file_path = trigram_index.get_file(file_id)
2177 .context("Invalid file_id from trigram search")?;
2178 let content = content_reader.get_file_content(file_id)?;
2179
2180 self.find_regex_matches_in_file(
2181 ®ex,
2182 file_path,
2183 content,
2184 &mut results,
2185 )?;
2186 }
2187 }
2188 }
2189
2190 log::info!("Regex search found {} matches for pattern '{}'", results.len(), pattern);
2191 Ok(results)
2192 }
2193
2194 fn find_regex_matches_in_file(
2196 &self,
2197 regex: &Regex,
2198 file_path: &std::path::Path,
2199 content: &str,
2200 results: &mut Vec<SearchResult>,
2201 ) -> Result<()> {
2202 let file_path_str = file_path.to_string_lossy().to_string();
2203
2204 let ext = file_path.extension()
2206 .and_then(|e| e.to_str())
2207 .unwrap_or("");
2208 let lang = Language::from_extension(ext);
2209
2210 for (line_idx, line) in content.lines().enumerate() {
2212 if regex.is_match(line) {
2213 let line_no = line_idx + 1;
2214
2215 results.push(SearchResult {
2222 path: file_path_str.clone(),
2223 lang: lang.clone(),
2224 kind: SymbolKind::Unknown("regex_match".to_string()),
2225 symbol: None, span: Span {
2227 start_line: line_no,
2228 end_line: line_no,
2229 },
2230 preview: line.to_string(),
2231 dependencies: None,
2232 });
2233 }
2234 }
2235
2236 Ok(())
2237 }
2238
2239 fn find_file_id(content_reader: &ContentReader, target_path: &str) -> Option<u32> {
2241 for file_id in 0..content_reader.file_count() {
2242 if let Some(path) = content_reader.get_file_path(file_id as u32) {
2243 if path.to_string_lossy() == target_path {
2244 return Some(file_id as u32);
2245 }
2246 }
2247 }
2248 None
2249 }
2250
2251 fn rebuild_trigram_index(content_reader: &ContentReader) -> Result<TrigramIndex> {
2253 log::debug!("Rebuilding trigram index from {} files", content_reader.file_count());
2254 let mut trigram_index = TrigramIndex::new();
2255
2256 for file_id in 0..content_reader.file_count() {
2257 let file_path = content_reader.get_file_path(file_id as u32)
2258 .context("Invalid file_id")?
2259 .to_path_buf();
2260 let content = content_reader.get_file_content(file_id as u32)?;
2261
2262 let idx = trigram_index.add_file(file_path);
2263 trigram_index.index_file(idx, content);
2264 }
2265
2266 trigram_index.finalize();
2267 log::debug!("Trigram index rebuilt with {} trigrams", trigram_index.trigram_count());
2268
2269 Ok(trigram_index)
2270 }
2271
2272 fn normalize_glob_pattern(pattern: &str) -> String {
2284 if pattern.starts_with('.') || pattern.starts_with('/') || pattern.starts_with('*') {
2285 pattern.to_string()
2287 } else {
2288 format!("./{}", pattern)
2290 }
2291 }
2292
2293 fn has_word_boundary_match(line: &str, pattern: &str) -> bool {
2304 let escaped_pattern = regex::escape(pattern);
2306 let pattern_with_boundaries = format!(r"\b{}\b", escaped_pattern);
2307
2308 if let Ok(re) = Regex::new(&pattern_with_boundaries) {
2309 re.is_match(line)
2310 } else {
2311 log::debug!("Word boundary regex failed for pattern '{}', falling back to substring", pattern);
2313 line.contains(pattern)
2314 }
2315 }
2316
2317 fn get_index_status(&self) -> Result<(IndexStatus, bool, Option<IndexWarning>)> {
2322 let root = std::env::current_dir()?;
2323
2324 if crate::git::is_git_repo(&root) {
2326 if let Ok(current_branch) = crate::git::get_current_branch(&root) {
2327 if !self.cache.branch_exists(¤t_branch).unwrap_or(false) {
2329 let warning = IndexWarning {
2330 reason: format!("Branch '{}' has not been indexed", current_branch),
2331 action_required: "rfx index".to_string(),
2332 details: Some(IndexWarningDetails {
2333 current_branch: Some(current_branch),
2334 indexed_branch: None,
2335 current_commit: None,
2336 indexed_commit: None,
2337 }),
2338 };
2339 return Ok((IndexStatus::Stale, false, Some(warning)));
2340 }
2341
2342 if let (Ok(current_commit), Ok(branch_info)) =
2344 (crate::git::get_current_commit(&root), self.cache.get_branch_info(¤t_branch)) {
2345
2346 if branch_info.commit_sha != current_commit {
2347 let warning = IndexWarning {
2348 reason: format!(
2349 "Commit changed from {} to {}",
2350 &branch_info.commit_sha[..7],
2351 ¤t_commit[..7]
2352 ),
2353 action_required: "rfx index".to_string(),
2354 details: Some(IndexWarningDetails {
2355 current_branch: Some(current_branch.clone()),
2356 indexed_branch: Some(current_branch.clone()),
2357 current_commit: Some(current_commit.clone()),
2358 indexed_commit: Some(branch_info.commit_sha.clone()),
2359 }),
2360 };
2361 return Ok((IndexStatus::Stale, false, Some(warning)));
2362 }
2363
2364 if let Ok(branch_files) = self.cache.get_branch_files(¤t_branch) {
2366 let mut checked = 0;
2367 let mut changed = 0;
2368 const SAMPLE_SIZE: usize = 10;
2369
2370 for (path, _indexed_hash) in branch_files.iter().take(SAMPLE_SIZE) {
2371 checked += 1;
2372 let file_path = std::path::Path::new(path);
2373
2374 if let Ok(metadata) = std::fs::metadata(file_path) {
2375 if let Ok(modified) = metadata.modified() {
2376 let indexed_time = branch_info.last_indexed;
2377 let file_time = modified.duration_since(std::time::UNIX_EPOCH)
2378 .unwrap_or_default()
2379 .as_secs() as i64;
2380
2381 if file_time > indexed_time {
2382 changed += 1;
2385 }
2386 }
2387 }
2388 }
2389
2390 if changed > 0 {
2391 let warning = IndexWarning {
2392 reason: format!("{} of {} sampled files modified", changed, checked),
2393 action_required: "rfx index".to_string(),
2394 details: Some(IndexWarningDetails {
2395 current_branch: Some(current_branch.clone()),
2396 indexed_branch: Some(branch_info.branch.clone()),
2397 current_commit: Some(current_commit.clone()),
2398 indexed_commit: Some(branch_info.commit_sha.clone()),
2399 }),
2400 };
2401 return Ok((IndexStatus::Stale, false, Some(warning)));
2402 }
2403 }
2404
2405 return Ok((IndexStatus::Fresh, true, None));
2407 }
2408 }
2409 }
2410
2411 Ok((IndexStatus::Fresh, true, None))
2413 }
2414
2415 fn check_index_freshness(&self, filter: &QueryFilter) -> Result<()> {
2422 let root = std::env::current_dir()?;
2423
2424 if crate::git::is_git_repo(&root) {
2426 if let Ok(current_branch) = crate::git::get_current_branch(&root) {
2427 if !self.cache.branch_exists(¤t_branch).unwrap_or(false) {
2429 if !filter.suppress_output {
2430 output::warn(&format!("⚠️ WARNING: Index not found for branch '{}'. Run 'rfx index' to index this branch.", current_branch));
2431 }
2432 return Ok(());
2433 }
2434
2435 if let (Ok(current_commit), Ok(branch_info)) =
2437 (crate::git::get_current_commit(&root), self.cache.get_branch_info(¤t_branch)) {
2438
2439 if branch_info.commit_sha != current_commit {
2440 if !filter.suppress_output {
2441 output::warn(&format!("⚠️ WARNING: Index may be stale (commit changed: {} → {}). Consider running 'rfx index'.",
2442 &branch_info.commit_sha[..7], ¤t_commit[..7]));
2443 }
2444 return Ok(());
2445 }
2446
2447 if let Ok(branch_files) = self.cache.get_branch_files(¤t_branch) {
2450 let mut checked = 0;
2451 let mut changed = 0;
2452 const SAMPLE_SIZE: usize = 10;
2453
2454 for (path, _indexed_hash) in branch_files.iter().take(SAMPLE_SIZE) {
2455 checked += 1;
2456 let file_path = std::path::Path::new(path);
2457
2458 if let Ok(metadata) = std::fs::metadata(file_path) {
2460 if let Ok(modified) = metadata.modified() {
2461 let indexed_time = branch_info.last_indexed;
2462 let file_time = modified.duration_since(std::time::UNIX_EPOCH)
2463 .unwrap_or_default()
2464 .as_secs() as i64;
2465
2466 if file_time > indexed_time {
2468 changed += 1;
2473 }
2474 }
2475 }
2476 }
2477
2478 if changed > 0 && !filter.suppress_output {
2479 output::warn(&format!("⚠️ WARNING: {} of {} sampled files changed since indexing. Consider running 'rfx index'.", changed, checked));
2480 }
2481 }
2482 }
2483 }
2484 }
2485
2486 Ok(())
2487 }
2488}
2489
2490pub fn generate_ai_instruction(
2495 result_count: usize,
2496 total_count: usize,
2497 has_more: bool,
2498 symbols_mode: bool,
2499 paths_only: bool,
2500 use_ast: bool,
2501 use_regex: bool,
2502 language_filter: bool,
2503 glob_filter: bool,
2504 exact_mode: bool,
2505) -> Option<String> {
2506 if result_count == 0 {
2508 return Some(
2509 "No results found. Consider these alternatives: 1) Check pattern spelling, 2) Remove --kind or --lang filters to broaden search, 3) Try partial match or related term, 4) Use search_regex tool for pattern matching with special characters or complex patterns."
2510 .to_string()
2511 );
2512 }
2513
2514 if total_count >= 500 {
2516 return Some(
2517 format!("Query too broad: {} results found. STOP. Do not list results. Refine search automatically by adding filters: kind parameter (Function/Struct/Class), lang parameter (rust/python/etc), or glob parameter (['src/**/*.rs']). Call search_code again with appropriate filters.", total_count)
2518 );
2519 }
2520
2521 if has_more {
2523 return Some(
2524 format!("Showing {} of {} results. PAGINATED - there are more results available. Do not automatically fetch all results. Show current page, ask user if these results answer their question before fetching more with --offset parameter.", result_count, total_count)
2525 );
2526 }
2527
2528 if result_count == 1 && symbols_mode {
2530 return Some(
2531 "Found 1 precise result. Respond concisely: '[symbol] at [path]:[line]'.".to_string()
2532 );
2533 }
2534
2535 if result_count >= 2 && result_count <= 10 && symbols_mode {
2537 return Some(
2538 format!("Found {} precise results (definitions only, not usages). List locations concisely: '[symbol] at [path]:[line]' for each result.", result_count)
2539 );
2540 }
2541
2542 if total_count >= 101 && total_count < 500 {
2544 return Some(
2545 format!("Found {} results - this is broad. Suggest refining search with: kind parameter (Function/Struct/Class/etc), lang parameter (rust/python/etc), or glob parameter to narrow file scope.", total_count)
2546 );
2547 }
2548
2549 if result_count >= 100 && !symbols_mode {
2551 return Some(
2552 format!("Found {} results in full-text search mode (includes definitions AND all usages). Consider using symbols=true parameter to filter to definitions only. This typically reduces results by 80-90%.", result_count)
2553 );
2554 }
2555
2556 if paths_only {
2558 return Some(
2559 format!("Found {} unique files (paths-only mode - no code content included). Next step: Use Read tool on specific files that look relevant based on their paths.", result_count)
2560 );
2561 }
2562
2563 if use_ast {
2565 return Some(
2566 format!("Found {} results using AST pattern matching. These are structure-based matches using Tree-sitter patterns, not text search.", result_count)
2567 );
2568 }
2569
2570 if use_regex && result_count >= 100 {
2572 return Some(
2573 format!("Found {} results using regex pattern matching. Regex matches are expansive. Consider using exact text search or symbols mode for more precise results.", result_count)
2574 );
2575 }
2576
2577 if language_filter && result_count <= 5 {
2579 return Some(
2580 format!("Found {} results with language filter active. Results are limited to this language only. Remove lang parameter if you want to search all languages.", result_count)
2581 );
2582 }
2583
2584 if glob_filter && result_count <= 10 {
2586 return Some(
2587 format!("Found {} results with glob filter active. Results are limited to matching paths. Remove glob parameter to search entire codebase.", result_count)
2588 );
2589 }
2590
2591 if exact_mode && result_count <= 5 {
2593 return Some(
2594 format!("Found {} results in exact match mode. Only exact symbol name matches are included. Remove exact parameter to allow substring matching.", result_count)
2595 );
2596 }
2597
2598 None
2600}
2601
2602#[cfg(test)]
2603mod tests {
2604 use super::*;
2605 use crate::indexer::Indexer;
2606 use crate::models::IndexConfig;
2607 use std::fs;
2608 use tempfile::TempDir;
2609
2610 #[test]
2613 fn test_query_engine_creation() {
2614 let temp = TempDir::new().unwrap();
2615 let cache = CacheManager::new(temp.path());
2616 let engine = QueryEngine::new(cache);
2617
2618 assert!(engine.cache.path().ends_with(".reflex"));
2619 }
2620
2621 #[test]
2622 fn test_filter_modes() {
2623 let filter_fulltext = QueryFilter::default();
2625 assert!(!filter_fulltext.symbols_mode);
2626
2627 let filter_symbols = QueryFilter {
2628 symbols_mode: true,
2629 ..Default::default()
2630 };
2631 assert!(filter_symbols.symbols_mode);
2632
2633 let filter_with_kind = QueryFilter {
2635 kind: Some(SymbolKind::Function),
2636 symbols_mode: true,
2637 ..Default::default()
2638 };
2639 assert!(filter_with_kind.symbols_mode);
2640 }
2641
2642 #[test]
2645 fn test_fulltext_search() {
2646 let temp = TempDir::new().unwrap();
2647 let project = temp.path().join("project");
2648 fs::create_dir(&project).unwrap();
2649
2650 fs::write(project.join("main.rs"), "fn main() {\n println!(\"hello\");\n}").unwrap();
2652 fs::write(project.join("lib.rs"), "pub fn hello() {}").unwrap();
2653
2654 let cache = CacheManager::new(&project);
2656 let indexer = Indexer::new(cache, IndexConfig::default());
2657 indexer.index(&project, false).unwrap();
2658
2659 let cache = CacheManager::new(&project);
2661 let engine = QueryEngine::new(cache);
2662 let filter = QueryFilter::default(); let results = engine.search("hello", filter).unwrap();
2664
2665 assert!(results.len() >= 2);
2667 assert!(results.iter().any(|r| r.path.contains("main.rs")));
2668 assert!(results.iter().any(|r| r.path.contains("lib.rs")));
2669 }
2670
2671 #[test]
2672 fn test_symbol_search() {
2673 let temp = TempDir::new().unwrap();
2674 let project = temp.path().join("project");
2675 fs::create_dir(&project).unwrap();
2676
2677 fs::write(
2679 project.join("main.rs"),
2680 "fn greet() {}\nfn main() {\n greet();\n}"
2681 ).unwrap();
2682
2683 let cache = CacheManager::new(&project);
2685 let indexer = Indexer::new(cache, IndexConfig::default());
2686 indexer.index(&project, false).unwrap();
2687
2688 let cache = CacheManager::new(&project);
2689
2690 let engine = QueryEngine::new(cache);
2692 let filter = QueryFilter {
2693 symbols_mode: true,
2694 ..Default::default()
2695 };
2696 let results = engine.search("greet", filter).unwrap();
2697
2698 assert!(results.len() >= 1);
2700 assert!(results.iter().any(|r| r.kind == SymbolKind::Function));
2701 }
2702
2703 #[test]
2704 fn test_regex_search() {
2705 let temp = TempDir::new().unwrap();
2706 let project = temp.path().join("project");
2707 fs::create_dir(&project).unwrap();
2708
2709 fs::write(
2710 project.join("main.rs"),
2711 "fn test1() {}\nfn test2() {}\nfn other() {}"
2712 ).unwrap();
2713
2714 let cache = CacheManager::new(&project);
2715 let indexer = Indexer::new(cache, IndexConfig::default());
2716 indexer.index(&project, false).unwrap();
2717
2718 let cache = CacheManager::new(&project);
2719
2720 let engine = QueryEngine::new(cache);
2721 let filter = QueryFilter {
2722 use_regex: true,
2723 ..Default::default()
2724 };
2725 let results = engine.search(r"fn test\d", filter).unwrap();
2726
2727 assert_eq!(results.len(), 2);
2729 assert!(results.iter().all(|r| r.preview.contains("test")));
2730 }
2731
2732 #[test]
2735 fn test_language_filter() {
2736 let temp = TempDir::new().unwrap();
2737 let project = temp.path().join("project");
2738 fs::create_dir(&project).unwrap();
2739
2740 fs::write(project.join("main.rs"), "fn main() {}").unwrap();
2741 fs::write(project.join("main.js"), "function main() {}").unwrap();
2742
2743 let cache = CacheManager::new(&project);
2744 let indexer = Indexer::new(cache, IndexConfig::default());
2745 indexer.index(&project, false).unwrap();
2746
2747 let cache = CacheManager::new(&project);
2748
2749 let engine = QueryEngine::new(cache);
2750
2751 let filter = QueryFilter {
2753 language: Some(Language::Rust),
2754 ..Default::default()
2755 };
2756 let results = engine.search("main", filter).unwrap();
2757
2758 assert!(results.iter().all(|r| r.lang == Language::Rust));
2759 assert!(results.iter().all(|r| r.path.ends_with(".rs")));
2760 }
2761
2762 #[test]
2763 fn test_kind_filter() {
2764 let temp = TempDir::new().unwrap();
2765 let project = temp.path().join("project");
2766 fs::create_dir(&project).unwrap();
2767
2768 fs::write(
2769 project.join("main.rs"),
2770 "struct Point {}\nfn main() {}\nimpl Point { fn new() {} }"
2771 ).unwrap();
2772
2773 let cache = CacheManager::new(&project);
2774 let indexer = Indexer::new(cache, IndexConfig::default());
2775 indexer.index(&project, false).unwrap();
2776
2777 let cache = CacheManager::new(&project);
2778
2779 let engine = QueryEngine::new(cache);
2780
2781 let filter = QueryFilter {
2783 symbols_mode: true,
2784 kind: Some(SymbolKind::Function),
2785 use_contains: true, ..Default::default()
2787 };
2788 let results = engine.search("mai", filter).unwrap();
2790
2791 assert!(results.len() > 0, "Should find at least one result");
2793 assert!(results.iter().any(|r| r.symbol.as_deref() == Some("main")), "Should find 'main' function");
2794 }
2795
2796 #[test]
2797 fn test_file_pattern_filter() {
2798 let temp = TempDir::new().unwrap();
2799 let project = temp.path().join("project");
2800 fs::create_dir_all(project.join("src")).unwrap();
2801 fs::create_dir_all(project.join("tests")).unwrap();
2802
2803 fs::write(project.join("src/lib.rs"), "fn foo() {}").unwrap();
2804 fs::write(project.join("tests/test.rs"), "fn foo() {}").unwrap();
2805
2806 let cache = CacheManager::new(&project);
2807 let indexer = Indexer::new(cache, IndexConfig::default());
2808 indexer.index(&project, false).unwrap();
2809
2810 let cache = CacheManager::new(&project);
2811
2812 let engine = QueryEngine::new(cache);
2813
2814 let filter = QueryFilter {
2816 file_pattern: Some("src/".to_string()),
2817 ..Default::default()
2818 };
2819 let results = engine.search("foo", filter).unwrap();
2820
2821 assert!(results.iter().all(|r| r.path.contains("src/")));
2822 assert!(!results.iter().any(|r| r.path.contains("tests/")));
2823 }
2824
2825 #[test]
2826 fn test_limit_filter() {
2827 let temp = TempDir::new().unwrap();
2828 let project = temp.path().join("project");
2829 fs::create_dir(&project).unwrap();
2830
2831 let content = (0..20).map(|i| format!("fn test{}() {{}}", i)).collect::<Vec<_>>().join("\n");
2833 fs::write(project.join("main.rs"), content).unwrap();
2834
2835 let cache = CacheManager::new(&project);
2836 let indexer = Indexer::new(cache, IndexConfig::default());
2837 indexer.index(&project, false).unwrap();
2838
2839 let cache = CacheManager::new(&project);
2840
2841 let engine = QueryEngine::new(cache);
2842
2843 let filter = QueryFilter {
2845 limit: Some(5),
2846 use_contains: true, ..Default::default()
2848 };
2849 let results = engine.search("test", filter).unwrap();
2850
2851 assert_eq!(results.len(), 5);
2852 }
2853
2854 #[test]
2855 fn test_exact_match_filter() {
2856 let temp = TempDir::new().unwrap();
2857 let project = temp.path().join("project");
2858 fs::create_dir(&project).unwrap();
2859
2860 fs::write(
2861 project.join("main.rs"),
2862 "fn test() {}\nfn test_helper() {}\nfn other_test() {}"
2863 ).unwrap();
2864
2865 let cache = CacheManager::new(&project);
2866 let indexer = Indexer::new(cache, IndexConfig::default());
2867 indexer.index(&project, false).unwrap();
2868
2869 let cache = CacheManager::new(&project);
2870
2871 let engine = QueryEngine::new(cache);
2872
2873 let filter = QueryFilter {
2875 symbols_mode: true,
2876 exact: true,
2877 ..Default::default()
2878 };
2879 let results = engine.search("test", filter).unwrap();
2880
2881 assert_eq!(results.len(), 1);
2883 assert_eq!(results[0].symbol.as_deref(), Some("test"));
2884 }
2885
2886 #[test]
2889 fn test_expand_mode() {
2890 let temp = TempDir::new().unwrap();
2891 let project = temp.path().join("project");
2892 fs::create_dir(&project).unwrap();
2893
2894 fs::write(
2895 project.join("main.rs"),
2896 "fn greet() {\n println!(\"Hello\");\n println!(\"World\");\n}"
2897 ).unwrap();
2898
2899 let cache = CacheManager::new(&project);
2900 let indexer = Indexer::new(cache, IndexConfig::default());
2901 indexer.index(&project, false).unwrap();
2902
2903 let cache = CacheManager::new(&project);
2904
2905 let engine = QueryEngine::new(cache);
2906
2907 let filter = QueryFilter {
2909 symbols_mode: true,
2910 expand: true,
2911 ..Default::default()
2912 };
2913 let results = engine.search("greet", filter).unwrap();
2914
2915 assert!(results.len() >= 1);
2917 let result = &results[0];
2918 assert!(result.preview.contains("println"));
2919 }
2920
2921 #[test]
2924 fn test_search_empty_index() {
2925 let temp = TempDir::new().unwrap();
2926 let project = temp.path().join("project");
2927 fs::create_dir(&project).unwrap();
2928
2929 let cache = CacheManager::new(&project);
2930 let indexer = Indexer::new(cache, IndexConfig::default());
2931 indexer.index(&project, false).unwrap();
2932
2933 let cache = CacheManager::new(&project);
2934
2935 let engine = QueryEngine::new(cache);
2936 let filter = QueryFilter::default();
2937 let results = engine.search("nonexistent", filter).unwrap();
2938
2939 assert_eq!(results.len(), 0);
2940 }
2941
2942 #[test]
2943 fn test_search_no_index() {
2944 let temp = TempDir::new().unwrap();
2945 let project = temp.path().join("project");
2946 fs::create_dir(&project).unwrap();
2947
2948 let cache = CacheManager::new(&project);
2949 let engine = QueryEngine::new(cache);
2950 let filter = QueryFilter::default();
2951
2952 assert!(engine.search("test", filter).is_err());
2954 }
2955
2956 #[test]
2957 fn test_search_special_characters() {
2958 let temp = TempDir::new().unwrap();
2959 let project = temp.path().join("project");
2960 fs::create_dir(&project).unwrap();
2961
2962 fs::write(project.join("main.rs"), "let x = 42;\nlet y = x + 1;").unwrap();
2963
2964 let cache = CacheManager::new(&project);
2965 let indexer = Indexer::new(cache, IndexConfig::default());
2966 indexer.index(&project, false).unwrap();
2967
2968 let cache = CacheManager::new(&project);
2969
2970 let engine = QueryEngine::new(cache);
2971 let filter = QueryFilter::default();
2972
2973 let results = engine.search("x + ", filter).unwrap();
2975 assert!(results.len() >= 1);
2976 }
2977
2978 #[test]
2979 fn test_search_unicode() {
2980 let temp = TempDir::new().unwrap();
2981 let project = temp.path().join("project");
2982 fs::create_dir(&project).unwrap();
2983
2984 fs::write(project.join("main.rs"), "// 你好世界\nfn main() {}").unwrap();
2985
2986 let cache = CacheManager::new(&project);
2987 let indexer = Indexer::new(cache, IndexConfig::default());
2988 indexer.index(&project, false).unwrap();
2989
2990 let cache = CacheManager::new(&project);
2991
2992 let engine = QueryEngine::new(cache);
2993 let filter = QueryFilter {
2994 use_contains: true, force: true, ..Default::default()
2997 };
2998
2999 let results = engine.search("你好", filter).unwrap();
3001 assert!(results.len() >= 1);
3002 }
3003
3004 #[test]
3005 fn test_case_sensitive_search() {
3006 let temp = TempDir::new().unwrap();
3007 let project = temp.path().join("project");
3008 fs::create_dir(&project).unwrap();
3009
3010 fs::write(project.join("main.rs"), "fn Test() {}\nfn test() {}").unwrap();
3011
3012 let cache = CacheManager::new(&project);
3013 let indexer = Indexer::new(cache, IndexConfig::default());
3014 indexer.index(&project, false).unwrap();
3015
3016 let cache = CacheManager::new(&project);
3017
3018 let engine = QueryEngine::new(cache);
3019 let filter = QueryFilter::default();
3020
3021 let results = engine.search("Test", filter).unwrap();
3023 assert!(results.iter().any(|r| r.preview.contains("Test()")));
3024 }
3025
3026 #[test]
3029 fn test_results_sorted_deterministically() {
3030 let temp = TempDir::new().unwrap();
3031 let project = temp.path().join("project");
3032 fs::create_dir(&project).unwrap();
3033
3034 fs::write(project.join("a.rs"), "fn test() {}").unwrap();
3035 fs::write(project.join("z.rs"), "fn test() {}").unwrap();
3036 fs::write(project.join("m.rs"), "fn test() {}\nfn test2() {}").unwrap();
3037
3038 let cache = CacheManager::new(&project);
3039 let indexer = Indexer::new(cache, IndexConfig::default());
3040 indexer.index(&project, false).unwrap();
3041
3042 let cache = CacheManager::new(&project);
3043
3044 let engine = QueryEngine::new(cache);
3045 let filter = QueryFilter::default();
3046
3047 let results1 = engine.search("test", filter.clone()).unwrap();
3049 let results2 = engine.search("test", filter.clone()).unwrap();
3050 let results3 = engine.search("test", filter).unwrap();
3051
3052 assert_eq!(results1.len(), results2.len());
3054 assert_eq!(results1.len(), results3.len());
3055
3056 for i in 0..results1.len() {
3057 assert_eq!(results1[i].path, results2[i].path);
3058 assert_eq!(results1[i].path, results3[i].path);
3059 assert_eq!(results1[i].span.start_line, results2[i].span.start_line);
3060 assert_eq!(results1[i].span.start_line, results3[i].span.start_line);
3061 }
3062
3063 for i in 0..results1.len().saturating_sub(1) {
3065 let curr = &results1[i];
3066 let next = &results1[i + 1];
3067 assert!(
3068 curr.path < next.path ||
3069 (curr.path == next.path && curr.span.start_line <= next.span.start_line)
3070 );
3071 }
3072 }
3073
3074 #[test]
3077 fn test_multiple_filters_combined() {
3078 let temp = TempDir::new().unwrap();
3079 let project = temp.path().join("project");
3080 fs::create_dir_all(project.join("src")).unwrap();
3081
3082 fs::write(project.join("src/main.rs"), "fn test() {}\nstruct Test {}").unwrap();
3083 fs::write(project.join("src/lib.rs"), "fn test() {}").unwrap();
3084 fs::write(project.join("test.js"), "function test() {}").unwrap();
3085
3086 let cache = CacheManager::new(&project);
3087 let indexer = Indexer::new(cache, IndexConfig::default());
3088 indexer.index(&project, false).unwrap();
3089
3090 let cache = CacheManager::new(&project);
3091
3092 let engine = QueryEngine::new(cache);
3093
3094 let filter = QueryFilter {
3096 language: Some(Language::Rust),
3097 kind: Some(SymbolKind::Function),
3098 file_pattern: Some("src/main".to_string()),
3099 symbols_mode: true,
3100 ..Default::default()
3101 };
3102 let results = engine.search("test", filter).unwrap();
3103
3104 assert_eq!(results.len(), 1);
3106 assert!(results[0].path.contains("src/main.rs"));
3107 assert_eq!(results[0].kind, SymbolKind::Function);
3108 }
3109
3110 #[test]
3113 fn test_find_symbol_helper() {
3114 let temp = TempDir::new().unwrap();
3115 let project = temp.path().join("project");
3116 fs::create_dir(&project).unwrap();
3117
3118 fs::write(project.join("main.rs"), "fn greet() {}").unwrap();
3119
3120 let cache = CacheManager::new(&project);
3121 let indexer = Indexer::new(cache, IndexConfig::default());
3122 indexer.index(&project, false).unwrap();
3123
3124 let cache = CacheManager::new(&project);
3125
3126 let engine = QueryEngine::new(cache);
3127 let results = engine.find_symbol("greet").unwrap();
3128
3129 assert!(results.len() >= 1);
3130 assert_eq!(results[0].kind, SymbolKind::Function);
3131 }
3132
3133 #[test]
3134 fn test_list_by_kind_helper() {
3135 let temp = TempDir::new().unwrap();
3136 let project = temp.path().join("project");
3137 fs::create_dir(&project).unwrap();
3138
3139 fs::write(
3140 project.join("main.rs"),
3141 "struct Point {}\nfn test() {}\nstruct Line {}"
3142 ).unwrap();
3143
3144 let cache = CacheManager::new(&project);
3145 let indexer = Indexer::new(cache, IndexConfig::default());
3146 indexer.index(&project, false).unwrap();
3147
3148 let cache = CacheManager::new(&project);
3149
3150 let engine = QueryEngine::new(cache);
3151
3152 let filter = QueryFilter {
3154 kind: Some(SymbolKind::Struct),
3155 symbols_mode: true,
3156 use_contains: true, ..Default::default()
3158 };
3159 let results = engine.search("oin", filter).unwrap();
3160
3161 assert!(results.len() >= 1, "Should find at least Point struct");
3163 assert!(results.iter().all(|r| r.kind == SymbolKind::Struct));
3164 assert!(results.iter().any(|r| r.symbol.as_deref() == Some("Point")));
3165 }
3166
3167 #[test]
3170 fn test_search_with_metadata() {
3171 let temp = TempDir::new().unwrap();
3172 let project = temp.path().join("project");
3173 fs::create_dir(&project).unwrap();
3174
3175 fs::write(project.join("main.rs"), "fn test() {}").unwrap();
3176
3177 let cache = CacheManager::new(&project);
3178 let indexer = Indexer::new(cache, IndexConfig::default());
3179 indexer.index(&project, false).unwrap();
3180
3181 let cache = CacheManager::new(&project);
3182
3183 let engine = QueryEngine::new(cache);
3184 let filter = QueryFilter::default();
3185 let response = engine.search_with_metadata("test", filter).unwrap();
3186
3187 assert!(response.results.len() >= 1);
3189 }
3191
3192 #[test]
3195 fn test_search_across_languages() {
3196 let temp = TempDir::new().unwrap();
3197 let project = temp.path().join("project");
3198 fs::create_dir(&project).unwrap();
3199
3200 fs::write(project.join("main.rs"), "fn greet() {}").unwrap();
3201 fs::write(project.join("main.ts"), "function greet() {}").unwrap();
3202 fs::write(project.join("main.py"), "def greet(): pass").unwrap();
3203
3204 let cache = CacheManager::new(&project);
3205 let indexer = Indexer::new(cache, IndexConfig::default());
3206 indexer.index(&project, false).unwrap();
3207
3208 let cache = CacheManager::new(&project);
3209
3210 let engine = QueryEngine::new(cache);
3211 let filter = QueryFilter::default();
3212 let results = engine.search("greet", filter).unwrap();
3213
3214 assert!(results.len() >= 3);
3216 assert!(results.iter().any(|r| r.lang == Language::Rust));
3217 assert!(results.iter().any(|r| r.lang == Language::TypeScript));
3218 assert!(results.iter().any(|r| r.lang == Language::Python));
3219 }
3220}