1use anyhow::{Context, Result};
7use regex::Regex;
8
9use crate::cache::CacheManager;
10use crate::content_store::ContentReader;
11use crate::models::{
12 IndexStatus, IndexWarning, IndexWarningDetails, Language, QueryResponse, SearchResult, Span,
13 SymbolKind,
14};
15use crate::parsers::ParserFactory;
16use crate::regex_trigrams::extract_trigrams_from_regex;
17use crate::trigram::TrigramIndex;
18
19#[derive(Debug, Clone)]
21pub struct QueryFilter {
22 pub language: Option<Language>,
24 pub kind: Option<SymbolKind>,
26 pub use_ast: bool,
28 pub use_regex: bool,
30 pub limit: Option<usize>,
32 pub symbols_mode: bool,
34 pub expand: bool,
36 pub file_pattern: Option<String>,
38 pub exact: bool,
40 pub use_contains: bool,
42 pub timeout_secs: u64,
44 pub glob_patterns: Vec<String>,
46 pub exclude_patterns: Vec<String>,
48 pub paths_only: bool,
50}
51
52impl Default for QueryFilter {
53 fn default() -> Self {
54 Self {
55 language: None,
56 kind: None,
57 use_ast: false,
58 use_regex: false,
59 limit: None,
60 symbols_mode: false,
61 expand: false,
62 file_pattern: None,
63 exact: false,
64 use_contains: false, timeout_secs: 30, glob_patterns: Vec::new(),
67 exclude_patterns: Vec::new(),
68 paths_only: false,
69 }
70 }
71}
72
73pub struct QueryEngine {
75 cache: CacheManager,
76}
77
78impl QueryEngine {
79 pub fn new(cache: CacheManager) -> Self {
81 Self { cache }
82 }
83
84 pub fn search_with_metadata(&self, pattern: &str, filter: QueryFilter) -> Result<QueryResponse> {
89 log::info!("Executing query with metadata: pattern='{}', filter={:?}", pattern, filter);
90
91 if !self.cache.exists() {
93 anyhow::bail!(
94 "Index not found. Run 'rfx index' to build the cache first."
95 );
96 }
97
98 if let Err(e) = self.cache.validate() {
100 anyhow::bail!(
101 "Cache appears to be corrupted: {}. Run 'rfx clear' followed by 'rfx index' to rebuild.",
102 e
103 );
104 }
105
106 let (status, can_trust_results, warning) = self.get_index_status()?;
108
109 let results = self.search_internal(pattern, filter)?;
111
112 Ok(QueryResponse {
113 status,
114 can_trust_results,
115 warning,
116 results,
117 })
118 }
119
120 pub fn search(&self, pattern: &str, filter: QueryFilter) -> Result<Vec<SearchResult>> {
125 log::info!("Executing query: pattern='{}', filter={:?}", pattern, filter);
126
127 if !self.cache.exists() {
129 anyhow::bail!(
130 "Index not found. Run 'rfx index' to build the cache first."
131 );
132 }
133
134 if let Err(e) = self.cache.validate() {
136 anyhow::bail!(
137 "Cache appears to be corrupted: {}. Run 'rfx clear' followed by 'rfx index' to rebuild.",
138 e
139 );
140 }
141
142 self.check_index_freshness()?;
144
145 self.search_internal(pattern, filter)
147 }
148
149 fn search_internal(&self, pattern: &str, filter: QueryFilter) -> Result<Vec<SearchResult>> {
151 use std::time::{Duration, Instant};
152
153 let start_time = Instant::now();
155 let timeout = if filter.timeout_secs > 0 {
156 Some(Duration::from_secs(filter.timeout_secs))
157 } else {
158 None
159 };
160
161 let mut results = if filter.use_regex {
163 self.get_regex_candidates(pattern, timeout.as_ref(), &start_time)?
165 } else {
166 self.get_trigram_candidates(pattern, &filter)?
168 };
169
170 if let Some(timeout_duration) = timeout {
172 if start_time.elapsed() > timeout_duration {
173 anyhow::bail!(
174 "Query timeout exceeded ({} seconds).\n\
175 \n\
176 The query took too long to complete. Try one of these approaches:\n\
177 • Use a more specific search pattern (longer patterns = faster search)\n\
178 • Add a language filter with --lang to narrow the search space\n\
179 • Add a file filter with --file to search specific directories\n\
180 • Increase the timeout with --timeout <seconds>\n\
181 \n\
182 Example: rfx query \"{}\" --lang rust --timeout 60",
183 filter.timeout_secs,
184 pattern
185 );
186 }
187 }
188
189 if filter.use_ast {
191 results = self.enrich_with_ast(results, pattern, filter.language)?;
193 } else if filter.symbols_mode || filter.kind.is_some() {
194 results = self.enrich_with_symbols(results, pattern, &filter)?;
196 }
197
198 if let Some(lang) = filter.language {
200 results.retain(|r| r.lang == lang);
201 }
202
203 if let Some(ref kind) = filter.kind {
206 results.retain(|r| {
207 if matches!(kind, SymbolKind::Function) {
208 matches!(r.kind, SymbolKind::Function | SymbolKind::Method)
210 } else {
211 r.kind == *kind
212 }
213 });
214 }
215
216 if let Some(ref file_pattern) = filter.file_pattern {
218 results.retain(|r| r.path.contains(file_pattern));
219 }
220
221 if !filter.glob_patterns.is_empty() || !filter.exclude_patterns.is_empty() {
223 use globset::{Glob, GlobSetBuilder};
224
225 let include_matcher = if !filter.glob_patterns.is_empty() {
227 let mut builder = GlobSetBuilder::new();
228 for pattern in &filter.glob_patterns {
229 match Glob::new(pattern) {
230 Ok(glob) => {
231 builder.add(glob);
232 }
233 Err(e) => {
234 log::warn!("Invalid glob pattern '{}': {}", pattern, e);
235 }
236 }
237 }
238 match builder.build() {
239 Ok(matcher) => Some(matcher),
240 Err(e) => {
241 log::warn!("Failed to build glob matcher: {}", e);
242 None
243 }
244 }
245 } else {
246 None
247 };
248
249 let exclude_matcher = if !filter.exclude_patterns.is_empty() {
251 let mut builder = GlobSetBuilder::new();
252 for pattern in &filter.exclude_patterns {
253 match Glob::new(pattern) {
254 Ok(glob) => {
255 builder.add(glob);
256 }
257 Err(e) => {
258 log::warn!("Invalid exclude pattern '{}': {}", pattern, e);
259 }
260 }
261 }
262 match builder.build() {
263 Ok(matcher) => Some(matcher),
264 Err(e) => {
265 log::warn!("Failed to build exclude matcher: {}", e);
266 None
267 }
268 }
269 } else {
270 None
271 };
272
273 results.retain(|r| {
275 let included = if let Some(ref matcher) = include_matcher {
277 matcher.is_match(&r.path)
278 } else {
279 true };
281
282 let excluded = if let Some(ref matcher) = exclude_matcher {
284 matcher.is_match(&r.path)
285 } else {
286 false };
288
289 included && !excluded
290 });
291 }
292
293 if filter.exact && filter.symbols_mode {
295 results.retain(|r| r.symbol.as_deref() == Some(pattern));
296 }
297
298 if filter.expand {
301 let content_path = self.cache.path().join("content.bin");
303 if let Ok(content_reader) = ContentReader::open(&content_path) {
304 for result in &mut results {
305 if result.span.start_line < result.span.end_line {
307 if let Some(file_id) = Self::find_file_id(&content_reader, &result.path) {
309 if let Ok(content) = content_reader.get_file_content(file_id) {
311 let lines: Vec<&str> = content.lines().collect();
312 let start_idx = (result.span.start_line as usize).saturating_sub(1);
313 let end_idx = (result.span.end_line as usize).min(lines.len());
314
315 if start_idx < end_idx {
316 let full_body = lines[start_idx..end_idx].join("\n");
317 result.preview = full_body;
318 }
319 }
320 }
321 }
322 }
323 }
324 }
325
326 if filter.paths_only {
328 use std::collections::HashSet;
329 let mut seen_paths = HashSet::new();
330 results.retain(|r| seen_paths.insert(r.path.clone()));
331 }
332
333 results.sort_by(|a, b| {
335 a.path.cmp(&b.path)
336 .then_with(|| a.span.start_line.cmp(&b.span.start_line))
337 });
338
339 if let Some(limit) = filter.limit {
341 results.truncate(limit);
342 }
343
344 log::info!("Query returned {} results", results.len());
345
346 Ok(results)
347 }
348
349 pub fn find_symbol(&self, name: &str) -> Result<Vec<SearchResult>> {
351 let filter = QueryFilter {
352 symbols_mode: true,
353 ..Default::default()
354 };
355 self.search(name, filter)
356 }
357
358 pub fn search_ast(&self, pattern: &str, lang: Option<Language>) -> Result<Vec<SearchResult>> {
360 let filter = QueryFilter {
361 language: lang,
362 use_ast: true,
363 ..Default::default()
364 };
365
366 self.search(pattern, filter)
367 }
368
369 pub fn search_ast_all_files(&self, ast_pattern: &str, filter: QueryFilter) -> Result<Vec<SearchResult>> {
390 log::info!("Executing AST query on all files: pattern='{}', filter={:?}", ast_pattern, filter);
391
392 let lang = filter.language.ok_or_else(|| anyhow::anyhow!(
394 "Language must be specified for AST pattern matching. Use --lang to specify the language.\n\
395 \n\
396 Example: rfx query \"(function_definition) @fn\" --ast --lang python"
397 ))?;
398
399 if !self.cache.exists() {
401 anyhow::bail!(
402 "Index not found. Run 'rfx index' to build the cache first."
403 );
404 }
405
406 self.check_index_freshness()?;
408
409 let content_path = self.cache.path().join("content.bin");
411 let content_reader = ContentReader::open(&content_path)
412 .context("Failed to open content store")?;
413
414 use globset::{Glob, GlobSetBuilder};
416
417 let include_matcher = if !filter.glob_patterns.is_empty() {
418 let mut builder = GlobSetBuilder::new();
419 for pattern in &filter.glob_patterns {
420 if let Ok(glob) = Glob::new(pattern) {
421 builder.add(glob);
422 }
423 }
424 builder.build().ok()
425 } else {
426 None
427 };
428
429 let exclude_matcher = if !filter.exclude_patterns.is_empty() {
430 let mut builder = GlobSetBuilder::new();
431 for pattern in &filter.exclude_patterns {
432 if let Ok(glob) = Glob::new(pattern) {
433 builder.add(glob);
434 }
435 }
436 builder.build().ok()
437 } else {
438 None
439 };
440
441 let mut candidates: Vec<SearchResult> = Vec::new();
443
444 for file_id in 0..content_reader.file_count() {
445 let file_path = match content_reader.get_file_path(file_id as u32) {
446 Some(p) => p,
447 None => continue,
448 };
449
450 let ext = file_path.extension()
452 .and_then(|e| e.to_str())
453 .unwrap_or("");
454 let detected_lang = Language::from_extension(ext);
455
456 if detected_lang != lang {
458 continue;
459 }
460
461 let file_path_str = file_path.to_string_lossy().to_string();
462
463 let included = include_matcher.as_ref().map_or(true, |m| m.is_match(&file_path_str));
465 let excluded = exclude_matcher.as_ref().map_or(false, |m| m.is_match(&file_path_str));
466
467 if !included || excluded {
468 continue;
469 }
470
471 candidates.push(SearchResult {
473 path: file_path_str,
474 lang: detected_lang,
475 span: Span { start_line: 1, start_col: 1, end_line: 1, end_col: 1 },
476 symbol: None,
477 kind: SymbolKind::Unknown("ast_query".to_string()),
478 scope: None,
479 preview: String::new(),
480 });
481 }
482
483 log::info!("AST query scanning {} files for language {:?}", candidates.len(), lang);
484
485 if candidates.is_empty() {
486 log::warn!("No files found for language {:?}. Check your language filter or glob patterns.", lang);
487 return Ok(Vec::new());
488 }
489
490 let mut results = self.enrich_with_ast(candidates, ast_pattern, filter.language)?;
493
494 log::debug!("AST query found {} matches before filtering", results.len());
495
496 if let Some(ref kind) = filter.kind {
500 results.retain(|r| {
501 if matches!(kind, SymbolKind::Function) {
502 matches!(r.kind, SymbolKind::Function | SymbolKind::Method)
503 } else {
504 r.kind == *kind
505 }
506 });
507 }
508
509 if filter.expand {
513 let content_path = self.cache.path().join("content.bin");
514 if let Ok(content_reader) = ContentReader::open(&content_path) {
515 for result in &mut results {
516 if result.span.start_line < result.span.end_line {
517 if let Some(file_id) = Self::find_file_id(&content_reader, &result.path) {
518 if let Ok(content) = content_reader.get_file_content(file_id) {
519 let lines: Vec<&str> = content.lines().collect();
520 let start_idx = (result.span.start_line as usize).saturating_sub(1);
521 let end_idx = (result.span.end_line as usize).min(lines.len());
522
523 if start_idx < end_idx {
524 let full_body = lines[start_idx..end_idx].join("\n");
525 result.preview = full_body;
526 }
527 }
528 }
529 }
530 }
531 }
532 }
533
534 if filter.paths_only {
536 use std::collections::HashSet;
537 let mut seen_paths = HashSet::new();
538 results.retain(|r| seen_paths.insert(r.path.clone()));
539 }
540
541 results.sort_by(|a, b| {
543 a.path.cmp(&b.path)
544 .then_with(|| a.span.start_line.cmp(&b.span.start_line))
545 });
546
547 if let Some(limit) = filter.limit {
549 results.truncate(limit);
550 }
551
552 log::info!("AST query returned {} results", results.len());
553
554 Ok(results)
555 }
556
557 pub fn search_ast_with_text_filter(
569 &self,
570 text_pattern: &str,
571 ast_pattern: &str,
572 filter: QueryFilter,
573 ) -> Result<Vec<SearchResult>> {
574 log::info!("Executing AST query with text filter: text='{}', ast='{}', filter={:?}",
575 text_pattern, ast_pattern, filter);
576
577 if !self.cache.exists() {
579 anyhow::bail!(
580 "Index not found. Run 'rfx index' to build the cache first."
581 );
582 }
583
584 self.check_index_freshness()?;
586
587 use std::time::{Duration, Instant};
589 let start_time = Instant::now();
590 let timeout = if filter.timeout_secs > 0 {
591 Some(Duration::from_secs(filter.timeout_secs))
592 } else {
593 None
594 };
595
596 let candidates = if filter.use_regex {
598 self.get_regex_candidates(text_pattern, timeout.as_ref(), &start_time)?
599 } else {
600 self.get_trigram_candidates(text_pattern, &filter)?
601 };
602
603 log::debug!("Phase 1 found {} candidate locations", candidates.len());
604
605 let mut results = self.enrich_with_ast(candidates, ast_pattern, filter.language)?;
607
608 log::debug!("Phase 2 AST matching found {} results", results.len());
609
610 if let Some(lang) = filter.language {
612 results.retain(|r| r.lang == lang);
613 }
614
615 if let Some(ref kind) = filter.kind {
616 results.retain(|r| {
617 if matches!(kind, SymbolKind::Function) {
618 matches!(r.kind, SymbolKind::Function | SymbolKind::Method)
619 } else {
620 r.kind == *kind
621 }
622 });
623 }
624
625 if let Some(ref file_pattern) = filter.file_pattern {
626 results.retain(|r| r.path.contains(file_pattern));
627 }
628
629 if !filter.glob_patterns.is_empty() || !filter.exclude_patterns.is_empty() {
631 use globset::{Glob, GlobSetBuilder};
632
633 let include_matcher = if !filter.glob_patterns.is_empty() {
634 let mut builder = GlobSetBuilder::new();
635 for pattern in &filter.glob_patterns {
636 if let Ok(glob) = Glob::new(pattern) {
637 builder.add(glob);
638 }
639 }
640 builder.build().ok()
641 } else {
642 None
643 };
644
645 let exclude_matcher = if !filter.exclude_patterns.is_empty() {
646 let mut builder = GlobSetBuilder::new();
647 for pattern in &filter.exclude_patterns {
648 if let Ok(glob) = Glob::new(pattern) {
649 builder.add(glob);
650 }
651 }
652 builder.build().ok()
653 } else {
654 None
655 };
656
657 results.retain(|r| {
658 let included = include_matcher.as_ref().map_or(true, |m| m.is_match(&r.path));
659 let excluded = exclude_matcher.as_ref().map_or(false, |m| m.is_match(&r.path));
660 included && !excluded
661 });
662 }
663
664 if filter.exact && filter.symbols_mode {
665 results.retain(|r| r.symbol.as_deref() == Some(text_pattern));
666 }
667
668 if filter.expand {
670 let content_path = self.cache.path().join("content.bin");
671 if let Ok(content_reader) = ContentReader::open(&content_path) {
672 for result in &mut results {
673 if result.span.start_line < result.span.end_line {
674 if let Some(file_id) = Self::find_file_id(&content_reader, &result.path) {
675 if let Ok(content) = content_reader.get_file_content(file_id) {
676 let lines: Vec<&str> = content.lines().collect();
677 let start_idx = (result.span.start_line as usize).saturating_sub(1);
678 let end_idx = (result.span.end_line as usize).min(lines.len());
679
680 if start_idx < end_idx {
681 let full_body = lines[start_idx..end_idx].join("\n");
682 result.preview = full_body;
683 }
684 }
685 }
686 }
687 }
688 }
689 }
690
691 results.sort_by(|a, b| {
693 a.path.cmp(&b.path)
694 .then_with(|| a.span.start_line.cmp(&b.span.start_line))
695 });
696
697 if let Some(limit) = filter.limit {
699 results.truncate(limit);
700 }
701
702 log::info!("AST query returned {} results", results.len());
703
704 Ok(results)
705 }
706
707 pub fn list_by_kind(&self, kind: SymbolKind) -> Result<Vec<SearchResult>> {
709 let filter = QueryFilter {
710 kind: Some(kind),
711 symbols_mode: true,
712 ..Default::default()
713 };
714
715 self.search("*", filter)
716 }
717
718 fn enrich_with_symbols(&self, candidates: Vec<SearchResult>, pattern: &str, filter: &QueryFilter) -> Result<Vec<SearchResult>> {
740 let content_path = self.cache.path().join("content.bin");
742 let content_reader = ContentReader::open(&content_path)
743 .context("Failed to open content store")?;
744
745 let trigrams_path = self.cache.path().join("trigrams.bin");
747 let trigram_index = if trigrams_path.exists() {
748 TrigramIndex::load(&trigrams_path)?
749 } else {
750 Self::rebuild_trigram_index(&content_reader)?
751 };
752
753 use std::collections::HashMap;
755 let mut files_by_path: HashMap<String, Vec<SearchResult>> = HashMap::new();
756 let mut skipped_unsupported = 0;
757
758 for candidate in candidates {
759 if !candidate.lang.is_supported() {
761 skipped_unsupported += 1;
762 continue;
763 }
764
765 files_by_path
766 .entry(candidate.path.clone())
767 .or_insert_with(Vec::new)
768 .push(candidate);
769 }
770
771 let total_files = files_by_path.len();
772 log::debug!("Processing {} candidate files for symbol enrichment (skipped {} unsupported language files)",
773 total_files, skipped_unsupported);
774
775 if total_files > 1000 {
777 log::warn!(
778 "Pattern '{}' matched {} files. This may take some time to parse.",
779 pattern,
780 total_files
781 );
782 log::warn!("Consider using a more specific pattern or adding --lang/--file filters to narrow the search.");
783 }
784
785 let mut files_to_process: Vec<String> = files_by_path.keys().cloned().collect();
787
788 let mut files_to_skip: std::collections::HashSet<String> = std::collections::HashSet::new();
791
792 for file_path in &files_to_process {
793 let ext = std::path::Path::new(file_path)
795 .extension()
796 .and_then(|e| e.to_str())
797 .unwrap_or("");
798 let lang = Language::from_extension(ext);
799
800 if let Some(line_filter) = crate::line_filter::get_filter(lang) {
802 let file_id = match Self::find_file_id_by_path(&content_reader, &trigram_index, file_path) {
804 Some(id) => id,
805 None => continue,
806 };
807
808 let content = match content_reader.get_file_content(file_id) {
810 Ok(c) => c,
811 Err(_) => continue,
812 };
813
814 let mut all_in_non_code = true;
816 for line in content.lines() {
817 let mut search_start = 0;
819 while let Some(pos) = line[search_start..].find(pattern) {
820 let absolute_pos = search_start + pos;
821
822 let in_comment = line_filter.is_in_comment(line, absolute_pos);
824 let in_string = line_filter.is_in_string(line, absolute_pos);
825
826 if !in_comment && !in_string {
827 all_in_non_code = false;
829 break;
830 }
831
832 search_start = absolute_pos + pattern.len();
833 }
834
835 if !all_in_non_code {
836 break;
837 }
838 }
839
840 if all_in_non_code {
842 if content.contains(pattern) {
844 files_to_skip.insert(file_path.clone());
845 log::debug!("Pre-filter: Skipping {} (all matches in comments/strings)", file_path);
846 }
847 }
848 }
849 }
850
851 files_to_process.retain(|path| !files_to_skip.contains(path));
853
854 log::debug!("Pre-filter: Skipped {} files where all matches are in comments/strings (parsing {} files)",
855 files_to_skip.len(), files_to_process.len());
856
857 let num_threads = {
859 let available_cores = std::thread::available_parallelism()
860 .map(|n| n.get())
861 .unwrap_or(4);
862 ((available_cores as f64 * 0.8).ceil() as usize).max(1).min(8)
865 };
866
867 log::debug!("Using {} threads for parallel symbol extraction (out of {} available cores)",
868 num_threads,
869 std::thread::available_parallelism().map(|n| n.get()).unwrap_or(4));
870
871 let pool = rayon::ThreadPoolBuilder::new()
873 .num_threads(num_threads)
874 .build()
875 .context("Failed to create thread pool for symbol extraction")?;
876
877 use rayon::prelude::*;
879
880 let all_symbols: Vec<SearchResult> = pool.install(|| {
881 files_to_process
882 .par_iter()
883 .flat_map(|file_path| {
884 let file_id = match Self::find_file_id_by_path(&content_reader, &trigram_index, file_path) {
886 Some(id) => id,
887 None => {
888 log::warn!("Could not find file_id for path: {}", file_path);
889 return Vec::new();
890 }
891 };
892
893 let content = match content_reader.get_file_content(file_id) {
894 Ok(c) => c,
895 Err(e) => {
896 log::warn!("Failed to read file {}: {}", file_path, e);
897 return Vec::new();
898 }
899 };
900
901 let ext = std::path::Path::new(file_path)
903 .extension()
904 .and_then(|e| e.to_str())
905 .unwrap_or("");
906 let lang = Language::from_extension(ext);
907
908 match ParserFactory::parse(file_path, content, lang) {
910 Ok(symbols) => {
911 log::debug!("Parsed {} symbols from {}", symbols.len(), file_path);
912 symbols
913 }
914 Err(e) => {
915 log::debug!("Failed to parse {}: {}", file_path, e);
916 Vec::new()
917 }
918 }
919 })
920 .collect()
921 });
922
923 let filtered: Vec<SearchResult> = if filter.use_regex {
925 let regex = Regex::new(pattern)
927 .with_context(|| format!("Invalid regex pattern: {}", pattern))?;
928
929 all_symbols
930 .into_iter()
931 .filter(|sym| {
932 sym.symbol.as_deref().map_or(false, |s| regex.is_match(s))
933 })
934 .collect()
935 } else if filter.use_contains {
936 all_symbols
938 .into_iter()
939 .filter(|sym| sym.symbol.as_deref().map_or(false, |s| s.contains(pattern)))
940 .collect()
941 } else {
942 all_symbols
944 .into_iter()
945 .filter(|sym| sym.symbol.as_deref().map_or(false, |s| s == pattern))
946 .collect()
947 };
948
949 log::info!("Symbol enrichment found {} matches for pattern '{}'", filtered.len(), pattern);
950
951 Ok(filtered)
952 }
953
954 fn enrich_with_ast(&self, candidates: Vec<SearchResult>, ast_pattern: &str, language: Option<Language>) -> Result<Vec<SearchResult>> {
973 let lang = language.ok_or_else(|| anyhow::anyhow!(
975 "Language must be specified for AST pattern matching. Use --lang to specify the language."
976 ))?;
977
978 let content_path = self.cache.path().join("content.bin");
980 let content_reader = ContentReader::open(&content_path)
981 .context("Failed to open content store")?;
982
983 let trigrams_path = self.cache.path().join("trigrams.bin");
985 let trigram_index = if trigrams_path.exists() {
986 TrigramIndex::load(&trigrams_path)?
987 } else {
988 Self::rebuild_trigram_index(&content_reader)?
989 };
990
991 use std::collections::HashMap;
993 let mut file_contents: HashMap<String, String> = HashMap::new();
994
995 for candidate in &candidates {
996 if file_contents.contains_key(&candidate.path) {
997 continue;
998 }
999
1000 let file_id = match Self::find_file_id_by_path(&content_reader, &trigram_index, &candidate.path) {
1002 Some(id) => id,
1003 None => {
1004 log::warn!("Could not find file_id for path: {}", candidate.path);
1005 continue;
1006 }
1007 };
1008
1009 let content = match content_reader.get_file_content(file_id) {
1011 Ok(c) => c,
1012 Err(e) => {
1013 log::warn!("Failed to read file {}: {}", candidate.path, e);
1014 continue;
1015 }
1016 };
1017
1018 file_contents.insert(candidate.path.clone(), content.to_string());
1019 }
1020
1021 log::debug!("Executing AST query on {} candidate files with language {:?}", file_contents.len(), lang);
1022
1023 let results = crate::ast_query::execute_ast_query(candidates, ast_pattern, lang, &file_contents)?;
1025
1026 log::info!("AST query found {} matches for pattern '{}'", results.len(), ast_pattern);
1027
1028 Ok(results)
1029 }
1030
1031 fn find_file_id_by_path(
1033 content_reader: &ContentReader,
1034 trigram_index: &TrigramIndex,
1035 target_path: &str,
1036 ) -> Option<u32> {
1037 for file_id in 0..trigram_index.file_count() {
1039 if let Some(path) = trigram_index.get_file(file_id as u32) {
1040 if path.to_string_lossy() == target_path {
1041 return Some(file_id as u32);
1042 }
1043 }
1044 }
1045
1046 for file_id in 0..content_reader.file_count() {
1048 if let Some(path) = content_reader.get_file_path(file_id as u32) {
1049 if path.to_string_lossy() == target_path {
1050 return Some(file_id as u32);
1051 }
1052 }
1053 }
1054
1055 None
1056 }
1057
1058 fn get_trigram_candidates(&self, pattern: &str, filter: &QueryFilter) -> Result<Vec<SearchResult>> {
1060 let content_path = self.cache.path().join("content.bin");
1062 let content_reader = ContentReader::open(&content_path)
1063 .context("Failed to open content store")?;
1064
1065 let trigrams_path = self.cache.path().join("trigrams.bin");
1067 let trigram_index = if trigrams_path.exists() {
1068 match TrigramIndex::load(&trigrams_path) {
1069 Ok(index) => {
1070 log::debug!("Loaded trigram index from disk: {} trigrams, {} files",
1071 index.trigram_count(), index.file_count());
1072 index
1073 }
1074 Err(e) => {
1075 log::warn!("Failed to load trigram index from disk: {}", e);
1076 log::warn!("Rebuilding trigram index from content store...");
1077 Self::rebuild_trigram_index(&content_reader)?
1078 }
1079 }
1080 } else {
1081 log::debug!("trigrams.bin not found, rebuilding from content store");
1082 Self::rebuild_trigram_index(&content_reader)?
1083 };
1084
1085 let candidates = trigram_index.search(pattern);
1087 log::debug!("Found {} candidate locations from trigram search", candidates.len());
1088
1089 let pattern_owned = pattern.to_string();
1091
1092 use std::collections::HashMap;
1094 let mut candidates_by_file: HashMap<u32, Vec<crate::trigram::FileLocation>> = HashMap::new();
1095 for loc in candidates {
1096 candidates_by_file
1097 .entry(loc.file_id)
1098 .or_insert_with(Vec::new)
1099 .push(loc);
1100 }
1101
1102 log::debug!("Scanning {} files with trigram matches", candidates_by_file.len());
1103
1104 use rayon::prelude::*;
1106
1107 let results: Vec<SearchResult> = candidates_by_file
1108 .par_iter()
1109 .flat_map(|(file_id, locations)| {
1110 let file_path = match trigram_index.get_file(*file_id) {
1112 Some(p) => p,
1113 None => return Vec::new(),
1114 };
1115
1116 let content = match content_reader.get_file_content(*file_id) {
1117 Ok(c) => c,
1118 Err(_) => return Vec::new(),
1119 };
1120
1121 let file_path_str = file_path.to_string_lossy().to_string();
1122
1123 let ext = file_path.extension()
1125 .and_then(|e| e.to_str())
1126 .unwrap_or("");
1127 let lang = Language::from_extension(ext);
1128
1129 let lines: Vec<&str> = content.lines().collect();
1131
1132 let mut seen_lines: std::collections::HashSet<usize> = std::collections::HashSet::new();
1134 let mut file_results = Vec::new();
1135
1136 for loc in locations {
1138 let line_no = loc.line_no as usize;
1139
1140 if seen_lines.contains(&line_no) {
1142 continue;
1143 }
1144
1145 if line_no == 0 || line_no > lines.len() {
1147 log::debug!("Line {} out of bounds (file has {} lines)", line_no, lines.len());
1148 continue;
1149 }
1150
1151 let line = lines[line_no - 1];
1152
1153 let line_matches = if filter.use_contains || filter.use_regex {
1157 line.contains(&pattern_owned)
1159 } else {
1160 Self::has_word_boundary_match(line, &pattern_owned)
1162 };
1163
1164 if !line_matches {
1165 continue;
1166 }
1167
1168 seen_lines.insert(line_no);
1169
1170 file_results.push(SearchResult {
1172 path: file_path_str.clone(),
1173 lang: lang.clone(),
1174 kind: SymbolKind::Unknown("text_match".to_string()),
1175 symbol: Some(pattern_owned.clone()),
1176 span: Span {
1177 start_line: line_no,
1178 end_line: line_no,
1179 start_col: 0,
1180 end_col: 0,
1181 },
1182 scope: None,
1183 preview: line.to_string(),
1184 });
1185 }
1186
1187 file_results
1188 })
1189 .collect();
1190
1191 Ok(results)
1192 }
1193
1194 fn get_regex_candidates(&self, pattern: &str, timeout: Option<&std::time::Duration>, start_time: &std::time::Instant) -> Result<Vec<SearchResult>> {
1218 let regex = Regex::new(pattern)
1220 .with_context(|| format!("Invalid regex pattern: {}", pattern))?;
1221
1222 if let Some(timeout_duration) = timeout {
1224 if start_time.elapsed() > *timeout_duration {
1225 anyhow::bail!(
1226 "Query timeout exceeded ({} seconds) during regex compilation",
1227 timeout_duration.as_secs()
1228 );
1229 }
1230 }
1231
1232 let trigrams = extract_trigrams_from_regex(pattern);
1234
1235 let content_path = self.cache.path().join("content.bin");
1237 let content_reader = ContentReader::open(&content_path)
1238 .context("Failed to open content store")?;
1239
1240 let mut results = Vec::new();
1241
1242 if trigrams.is_empty() {
1243 log::warn!("Regex pattern '{}' has no literals (≥3 chars), falling back to full content scan", pattern);
1245 log::warn!("This may be slow on large codebases. Consider using patterns with literal text.");
1246
1247 for file_id in 0..content_reader.file_count() {
1249 let file_path = content_reader.get_file_path(file_id as u32)
1250 .context("Invalid file_id")?;
1251 let content = content_reader.get_file_content(file_id as u32)?;
1252
1253 self.find_regex_matches_in_file(
1254 ®ex,
1255 file_path,
1256 content,
1257 &mut results,
1258 )?;
1259 }
1260 } else {
1261 log::debug!("Using {} trigrams to narrow regex search candidates", trigrams.len());
1263
1264 let trigrams_path = self.cache.path().join("trigrams.bin");
1266 let trigram_index = if trigrams_path.exists() {
1267 TrigramIndex::load(&trigrams_path)?
1268 } else {
1269 Self::rebuild_trigram_index(&content_reader)?
1270 };
1271
1272 use crate::regex_trigrams::extract_literal_sequences;
1274 let literals = extract_literal_sequences(pattern);
1275
1276 if literals.is_empty() {
1277 log::warn!("Regex extraction found trigrams but no literal sequences - this shouldn't happen");
1278 for file_id in 0..content_reader.file_count() {
1280 let file_path = content_reader.get_file_path(file_id as u32)
1281 .context("Invalid file_id")?;
1282 let content = content_reader.get_file_content(file_id as u32)?;
1283 self.find_regex_matches_in_file(®ex, file_path, content, &mut results)?;
1284 }
1285 } else {
1286 use std::collections::HashSet;
1291 let mut candidate_files: HashSet<u32> = HashSet::new();
1292
1293 for literal in &literals {
1294 let candidates = trigram_index.search(literal);
1296 let file_ids: HashSet<u32> = candidates.iter().map(|loc| loc.file_id).collect();
1297
1298 log::debug!("Literal '{}' found in {} files", literal, file_ids.len());
1299
1300 candidate_files.extend(file_ids);
1303 }
1304
1305 let final_candidates = candidate_files;
1306 log::debug!("After union: searching {} files that contain any literal", final_candidates.len());
1307
1308 for &file_id in &final_candidates {
1310 let file_path = trigram_index.get_file(file_id)
1311 .context("Invalid file_id from trigram search")?;
1312 let content = content_reader.get_file_content(file_id)?;
1313
1314 self.find_regex_matches_in_file(
1315 ®ex,
1316 file_path,
1317 content,
1318 &mut results,
1319 )?;
1320 }
1321 }
1322 }
1323
1324 log::info!("Regex search found {} matches for pattern '{}'", results.len(), pattern);
1325 Ok(results)
1326 }
1327
1328 fn find_regex_matches_in_file(
1330 &self,
1331 regex: &Regex,
1332 file_path: &std::path::Path,
1333 content: &str,
1334 results: &mut Vec<SearchResult>,
1335 ) -> Result<()> {
1336 let file_path_str = file_path.to_string_lossy().to_string();
1337
1338 let ext = file_path.extension()
1340 .and_then(|e| e.to_str())
1341 .unwrap_or("");
1342 let lang = Language::from_extension(ext);
1343
1344 for (line_idx, line) in content.lines().enumerate() {
1346 if regex.is_match(line) {
1347 let line_no = line_idx + 1;
1348
1349 results.push(SearchResult {
1356 path: file_path_str.clone(),
1357 lang: lang.clone(),
1358 kind: SymbolKind::Unknown("regex_match".to_string()),
1359 symbol: None, span: Span {
1361 start_line: line_no,
1362 end_line: line_no,
1363 start_col: 0,
1364 end_col: 0,
1365 },
1366 scope: None,
1367 preview: line.to_string(),
1368 });
1369 }
1370 }
1371
1372 Ok(())
1373 }
1374
1375 fn find_file_id(content_reader: &ContentReader, target_path: &str) -> Option<u32> {
1377 for file_id in 0..content_reader.file_count() {
1378 if let Some(path) = content_reader.get_file_path(file_id as u32) {
1379 if path.to_string_lossy() == target_path {
1380 return Some(file_id as u32);
1381 }
1382 }
1383 }
1384 None
1385 }
1386
1387 fn rebuild_trigram_index(content_reader: &ContentReader) -> Result<TrigramIndex> {
1389 log::debug!("Rebuilding trigram index from {} files", content_reader.file_count());
1390 let mut trigram_index = TrigramIndex::new();
1391
1392 for file_id in 0..content_reader.file_count() {
1393 let file_path = content_reader.get_file_path(file_id as u32)
1394 .context("Invalid file_id")?
1395 .to_path_buf();
1396 let content = content_reader.get_file_content(file_id as u32)?;
1397
1398 let idx = trigram_index.add_file(file_path);
1399 trigram_index.index_file(idx, content);
1400 }
1401
1402 trigram_index.finalize();
1403 log::debug!("Trigram index rebuilt with {} trigrams", trigram_index.trigram_count());
1404
1405 Ok(trigram_index)
1406 }
1407
1408 fn has_word_boundary_match(line: &str, pattern: &str) -> bool {
1419 let escaped_pattern = regex::escape(pattern);
1421 let pattern_with_boundaries = format!(r"\b{}\b", escaped_pattern);
1422
1423 if let Ok(re) = Regex::new(&pattern_with_boundaries) {
1424 re.is_match(line)
1425 } else {
1426 log::debug!("Word boundary regex failed for pattern '{}', falling back to substring", pattern);
1428 line.contains(pattern)
1429 }
1430 }
1431
1432 fn get_index_status(&self) -> Result<(IndexStatus, bool, Option<IndexWarning>)> {
1437 let root = std::env::current_dir()?;
1438
1439 if crate::git::is_git_repo(&root) {
1441 if let Ok(current_branch) = crate::git::get_current_branch(&root) {
1442 if !self.cache.branch_exists(¤t_branch).unwrap_or(false) {
1444 let warning = IndexWarning {
1445 reason: format!("Branch '{}' has not been indexed", current_branch),
1446 action_required: "rfx index".to_string(),
1447 details: Some(IndexWarningDetails {
1448 current_branch: Some(current_branch),
1449 indexed_branch: None,
1450 current_commit: None,
1451 indexed_commit: None,
1452 }),
1453 };
1454 return Ok((IndexStatus::Stale, false, Some(warning)));
1455 }
1456
1457 if let (Ok(current_commit), Ok(branch_info)) =
1459 (crate::git::get_current_commit(&root), self.cache.get_branch_info(¤t_branch)) {
1460
1461 if branch_info.commit_sha != current_commit {
1462 let warning = IndexWarning {
1463 reason: format!(
1464 "Commit changed from {} to {}",
1465 &branch_info.commit_sha[..7],
1466 ¤t_commit[..7]
1467 ),
1468 action_required: "rfx index".to_string(),
1469 details: Some(IndexWarningDetails {
1470 current_branch: Some(current_branch.clone()),
1471 indexed_branch: Some(current_branch.clone()),
1472 current_commit: Some(current_commit.clone()),
1473 indexed_commit: Some(branch_info.commit_sha.clone()),
1474 }),
1475 };
1476 return Ok((IndexStatus::Stale, false, Some(warning)));
1477 }
1478
1479 if let Ok(branch_files) = self.cache.get_branch_files(¤t_branch) {
1481 let mut checked = 0;
1482 let mut changed = 0;
1483 const SAMPLE_SIZE: usize = 10;
1484
1485 for (path, _indexed_hash) in branch_files.iter().take(SAMPLE_SIZE) {
1486 checked += 1;
1487 let file_path = std::path::Path::new(path);
1488
1489 if let Ok(metadata) = std::fs::metadata(file_path) {
1490 if let Ok(modified) = metadata.modified() {
1491 let indexed_time = branch_info.last_indexed;
1492 let file_time = modified.duration_since(std::time::UNIX_EPOCH)
1493 .unwrap_or_default()
1494 .as_secs() as i64;
1495
1496 if file_time > indexed_time {
1497 changed += 1;
1500 }
1501 }
1502 }
1503 }
1504
1505 if changed > 0 {
1506 let warning = IndexWarning {
1507 reason: format!("{} of {} sampled files modified", changed, checked),
1508 action_required: "rfx index".to_string(),
1509 details: Some(IndexWarningDetails {
1510 current_branch: Some(current_branch.clone()),
1511 indexed_branch: Some(branch_info.branch.clone()),
1512 current_commit: Some(current_commit.clone()),
1513 indexed_commit: Some(branch_info.commit_sha.clone()),
1514 }),
1515 };
1516 return Ok((IndexStatus::Stale, false, Some(warning)));
1517 }
1518 }
1519
1520 return Ok((IndexStatus::Fresh, true, None));
1522 }
1523 }
1524 }
1525
1526 Ok((IndexStatus::Fresh, true, None))
1528 }
1529
1530 fn check_index_freshness(&self) -> Result<()> {
1537 let root = std::env::current_dir()?;
1538
1539 if crate::git::is_git_repo(&root) {
1541 if let Ok(current_branch) = crate::git::get_current_branch(&root) {
1542 if !self.cache.branch_exists(¤t_branch).unwrap_or(false) {
1544 eprintln!("⚠️ WARNING: Index not found for branch '{}'. Run 'rfx index' to index this branch.", current_branch);
1545 return Ok(());
1546 }
1547
1548 if let (Ok(current_commit), Ok(branch_info)) =
1550 (crate::git::get_current_commit(&root), self.cache.get_branch_info(¤t_branch)) {
1551
1552 if branch_info.commit_sha != current_commit {
1553 eprintln!("⚠️ WARNING: Index may be stale (commit changed: {} → {}). Consider running 'rfx index'.",
1554 &branch_info.commit_sha[..7], ¤t_commit[..7]);
1555 return Ok(());
1556 }
1557
1558 if let Ok(branch_files) = self.cache.get_branch_files(¤t_branch) {
1561 let mut checked = 0;
1562 let mut changed = 0;
1563 const SAMPLE_SIZE: usize = 10;
1564
1565 for (path, _indexed_hash) in branch_files.iter().take(SAMPLE_SIZE) {
1566 checked += 1;
1567 let file_path = std::path::Path::new(path);
1568
1569 if let Ok(metadata) = std::fs::metadata(file_path) {
1571 if let Ok(modified) = metadata.modified() {
1572 let indexed_time = branch_info.last_indexed;
1573 let file_time = modified.duration_since(std::time::UNIX_EPOCH)
1574 .unwrap_or_default()
1575 .as_secs() as i64;
1576
1577 if file_time > indexed_time {
1579 changed += 1;
1584 }
1585 }
1586 }
1587 }
1588
1589 if changed > 0 {
1590 eprintln!("⚠️ WARNING: {} of {} sampled files changed since indexing. Consider running 'rfx index'.", changed, checked);
1591 }
1592 }
1593 }
1594 }
1595 }
1596
1597 Ok(())
1598 }
1599}
1600
1601#[cfg(test)]
1602mod tests {
1603 use super::*;
1604 use crate::indexer::Indexer;
1605 use crate::models::IndexConfig;
1606 use std::fs;
1607 use tempfile::TempDir;
1608
1609 #[test]
1612 fn test_query_engine_creation() {
1613 let temp = TempDir::new().unwrap();
1614 let cache = CacheManager::new(temp.path());
1615 let engine = QueryEngine::new(cache);
1616
1617 assert!(engine.cache.path().ends_with(".reflex"));
1618 }
1619
1620 #[test]
1621 fn test_filter_modes() {
1622 let filter_fulltext = QueryFilter::default();
1624 assert!(!filter_fulltext.symbols_mode);
1625
1626 let filter_symbols = QueryFilter {
1627 symbols_mode: true,
1628 ..Default::default()
1629 };
1630 assert!(filter_symbols.symbols_mode);
1631
1632 let filter_with_kind = QueryFilter {
1634 kind: Some(SymbolKind::Function),
1635 symbols_mode: true,
1636 ..Default::default()
1637 };
1638 assert!(filter_with_kind.symbols_mode);
1639 }
1640
1641 #[test]
1644 fn test_fulltext_search() {
1645 let temp = TempDir::new().unwrap();
1646 let project = temp.path().join("project");
1647 fs::create_dir(&project).unwrap();
1648
1649 fs::write(project.join("main.rs"), "fn main() {\n println!(\"hello\");\n}").unwrap();
1651 fs::write(project.join("lib.rs"), "pub fn hello() {}").unwrap();
1652
1653 let cache = CacheManager::new(&project);
1655 let indexer = Indexer::new(cache, IndexConfig::default());
1656 indexer.index(&project, false).unwrap();
1657
1658 let cache = CacheManager::new(&project);
1660 let engine = QueryEngine::new(cache);
1661 let filter = QueryFilter::default(); let results = engine.search("hello", filter).unwrap();
1663
1664 assert!(results.len() >= 2);
1666 assert!(results.iter().any(|r| r.path.contains("main.rs")));
1667 assert!(results.iter().any(|r| r.path.contains("lib.rs")));
1668 }
1669
1670 #[test]
1671 fn test_symbol_search() {
1672 let temp = TempDir::new().unwrap();
1673 let project = temp.path().join("project");
1674 fs::create_dir(&project).unwrap();
1675
1676 fs::write(
1678 project.join("main.rs"),
1679 "fn greet() {}\nfn main() {\n greet();\n}"
1680 ).unwrap();
1681
1682 let cache = CacheManager::new(&project);
1684 let indexer = Indexer::new(cache, IndexConfig::default());
1685 indexer.index(&project, false).unwrap();
1686
1687 let cache = CacheManager::new(&project);
1688
1689 let engine = QueryEngine::new(cache);
1691 let filter = QueryFilter {
1692 symbols_mode: true,
1693 ..Default::default()
1694 };
1695 let results = engine.search("greet", filter).unwrap();
1696
1697 assert!(results.len() >= 1);
1699 assert!(results.iter().any(|r| r.kind == SymbolKind::Function));
1700 }
1701
1702 #[test]
1703 fn test_regex_search() {
1704 let temp = TempDir::new().unwrap();
1705 let project = temp.path().join("project");
1706 fs::create_dir(&project).unwrap();
1707
1708 fs::write(
1709 project.join("main.rs"),
1710 "fn test1() {}\nfn test2() {}\nfn other() {}"
1711 ).unwrap();
1712
1713 let cache = CacheManager::new(&project);
1714 let indexer = Indexer::new(cache, IndexConfig::default());
1715 indexer.index(&project, false).unwrap();
1716
1717 let cache = CacheManager::new(&project);
1718
1719 let engine = QueryEngine::new(cache);
1720 let filter = QueryFilter {
1721 use_regex: true,
1722 ..Default::default()
1723 };
1724 let results = engine.search(r"fn test\d", filter).unwrap();
1725
1726 assert_eq!(results.len(), 2);
1728 assert!(results.iter().all(|r| r.preview.contains("test")));
1729 }
1730
1731 #[test]
1734 fn test_language_filter() {
1735 let temp = TempDir::new().unwrap();
1736 let project = temp.path().join("project");
1737 fs::create_dir(&project).unwrap();
1738
1739 fs::write(project.join("main.rs"), "fn main() {}").unwrap();
1740 fs::write(project.join("main.js"), "function main() {}").unwrap();
1741
1742 let cache = CacheManager::new(&project);
1743 let indexer = Indexer::new(cache, IndexConfig::default());
1744 indexer.index(&project, false).unwrap();
1745
1746 let cache = CacheManager::new(&project);
1747
1748 let engine = QueryEngine::new(cache);
1749
1750 let filter = QueryFilter {
1752 language: Some(Language::Rust),
1753 ..Default::default()
1754 };
1755 let results = engine.search("main", filter).unwrap();
1756
1757 assert!(results.iter().all(|r| r.lang == Language::Rust));
1758 assert!(results.iter().all(|r| r.path.ends_with(".rs")));
1759 }
1760
1761 #[test]
1762 fn test_kind_filter() {
1763 let temp = TempDir::new().unwrap();
1764 let project = temp.path().join("project");
1765 fs::create_dir(&project).unwrap();
1766
1767 fs::write(
1768 project.join("main.rs"),
1769 "struct Point {}\nfn main() {}\nimpl Point { fn new() {} }"
1770 ).unwrap();
1771
1772 let cache = CacheManager::new(&project);
1773 let indexer = Indexer::new(cache, IndexConfig::default());
1774 indexer.index(&project, false).unwrap();
1775
1776 let cache = CacheManager::new(&project);
1777
1778 let engine = QueryEngine::new(cache);
1779
1780 let filter = QueryFilter {
1782 symbols_mode: true,
1783 kind: Some(SymbolKind::Function),
1784 use_contains: true, ..Default::default()
1786 };
1787 let results = engine.search("mai", filter).unwrap();
1789
1790 assert!(results.len() > 0, "Should find at least one result");
1792 assert!(results.iter().any(|r| r.symbol.as_deref() == Some("main")), "Should find 'main' function");
1793 }
1794
1795 #[test]
1796 fn test_file_pattern_filter() {
1797 let temp = TempDir::new().unwrap();
1798 let project = temp.path().join("project");
1799 fs::create_dir_all(project.join("src")).unwrap();
1800 fs::create_dir_all(project.join("tests")).unwrap();
1801
1802 fs::write(project.join("src/lib.rs"), "fn foo() {}").unwrap();
1803 fs::write(project.join("tests/test.rs"), "fn foo() {}").unwrap();
1804
1805 let cache = CacheManager::new(&project);
1806 let indexer = Indexer::new(cache, IndexConfig::default());
1807 indexer.index(&project, false).unwrap();
1808
1809 let cache = CacheManager::new(&project);
1810
1811 let engine = QueryEngine::new(cache);
1812
1813 let filter = QueryFilter {
1815 file_pattern: Some("src/".to_string()),
1816 ..Default::default()
1817 };
1818 let results = engine.search("foo", filter).unwrap();
1819
1820 assert!(results.iter().all(|r| r.path.contains("src/")));
1821 assert!(!results.iter().any(|r| r.path.contains("tests/")));
1822 }
1823
1824 #[test]
1825 fn test_limit_filter() {
1826 let temp = TempDir::new().unwrap();
1827 let project = temp.path().join("project");
1828 fs::create_dir(&project).unwrap();
1829
1830 let content = (0..20).map(|i| format!("fn test{}() {{}}", i)).collect::<Vec<_>>().join("\n");
1832 fs::write(project.join("main.rs"), content).unwrap();
1833
1834 let cache = CacheManager::new(&project);
1835 let indexer = Indexer::new(cache, IndexConfig::default());
1836 indexer.index(&project, false).unwrap();
1837
1838 let cache = CacheManager::new(&project);
1839
1840 let engine = QueryEngine::new(cache);
1841
1842 let filter = QueryFilter {
1844 limit: Some(5),
1845 use_contains: true, ..Default::default()
1847 };
1848 let results = engine.search("test", filter).unwrap();
1849
1850 assert_eq!(results.len(), 5);
1851 }
1852
1853 #[test]
1854 fn test_exact_match_filter() {
1855 let temp = TempDir::new().unwrap();
1856 let project = temp.path().join("project");
1857 fs::create_dir(&project).unwrap();
1858
1859 fs::write(
1860 project.join("main.rs"),
1861 "fn test() {}\nfn test_helper() {}\nfn other_test() {}"
1862 ).unwrap();
1863
1864 let cache = CacheManager::new(&project);
1865 let indexer = Indexer::new(cache, IndexConfig::default());
1866 indexer.index(&project, false).unwrap();
1867
1868 let cache = CacheManager::new(&project);
1869
1870 let engine = QueryEngine::new(cache);
1871
1872 let filter = QueryFilter {
1874 symbols_mode: true,
1875 exact: true,
1876 ..Default::default()
1877 };
1878 let results = engine.search("test", filter).unwrap();
1879
1880 assert_eq!(results.len(), 1);
1882 assert_eq!(results[0].symbol.as_deref(), Some("test"));
1883 }
1884
1885 #[test]
1888 fn test_expand_mode() {
1889 let temp = TempDir::new().unwrap();
1890 let project = temp.path().join("project");
1891 fs::create_dir(&project).unwrap();
1892
1893 fs::write(
1894 project.join("main.rs"),
1895 "fn greet() {\n println!(\"Hello\");\n println!(\"World\");\n}"
1896 ).unwrap();
1897
1898 let cache = CacheManager::new(&project);
1899 let indexer = Indexer::new(cache, IndexConfig::default());
1900 indexer.index(&project, false).unwrap();
1901
1902 let cache = CacheManager::new(&project);
1903
1904 let engine = QueryEngine::new(cache);
1905
1906 let filter = QueryFilter {
1908 symbols_mode: true,
1909 expand: true,
1910 ..Default::default()
1911 };
1912 let results = engine.search("greet", filter).unwrap();
1913
1914 assert!(results.len() >= 1);
1916 let result = &results[0];
1917 assert!(result.preview.contains("println"));
1918 }
1919
1920 #[test]
1923 fn test_search_empty_index() {
1924 let temp = TempDir::new().unwrap();
1925 let project = temp.path().join("project");
1926 fs::create_dir(&project).unwrap();
1927
1928 let cache = CacheManager::new(&project);
1929 let indexer = Indexer::new(cache, IndexConfig::default());
1930 indexer.index(&project, false).unwrap();
1931
1932 let cache = CacheManager::new(&project);
1933
1934 let engine = QueryEngine::new(cache);
1935 let filter = QueryFilter::default();
1936 let results = engine.search("nonexistent", filter).unwrap();
1937
1938 assert_eq!(results.len(), 0);
1939 }
1940
1941 #[test]
1942 fn test_search_no_index() {
1943 let temp = TempDir::new().unwrap();
1944 let project = temp.path().join("project");
1945 fs::create_dir(&project).unwrap();
1946
1947 let cache = CacheManager::new(&project);
1948 let engine = QueryEngine::new(cache);
1949 let filter = QueryFilter::default();
1950
1951 assert!(engine.search("test", filter).is_err());
1953 }
1954
1955 #[test]
1956 fn test_search_special_characters() {
1957 let temp = TempDir::new().unwrap();
1958 let project = temp.path().join("project");
1959 fs::create_dir(&project).unwrap();
1960
1961 fs::write(project.join("main.rs"), "let x = 42;\nlet y = x + 1;").unwrap();
1962
1963 let cache = CacheManager::new(&project);
1964 let indexer = Indexer::new(cache, IndexConfig::default());
1965 indexer.index(&project, false).unwrap();
1966
1967 let cache = CacheManager::new(&project);
1968
1969 let engine = QueryEngine::new(cache);
1970 let filter = QueryFilter::default();
1971
1972 let results = engine.search("x + ", filter).unwrap();
1974 assert!(results.len() >= 1);
1975 }
1976
1977 #[test]
1978 fn test_search_unicode() {
1979 let temp = TempDir::new().unwrap();
1980 let project = temp.path().join("project");
1981 fs::create_dir(&project).unwrap();
1982
1983 fs::write(project.join("main.rs"), "// 你好世界\nfn main() {}").unwrap();
1984
1985 let cache = CacheManager::new(&project);
1986 let indexer = Indexer::new(cache, IndexConfig::default());
1987 indexer.index(&project, false).unwrap();
1988
1989 let cache = CacheManager::new(&project);
1990
1991 let engine = QueryEngine::new(cache);
1992 let filter = QueryFilter {
1993 use_contains: true, ..Default::default()
1995 };
1996
1997 let results = engine.search("你好", filter).unwrap();
1999 assert!(results.len() >= 1);
2000 }
2001
2002 #[test]
2003 fn test_case_sensitive_search() {
2004 let temp = TempDir::new().unwrap();
2005 let project = temp.path().join("project");
2006 fs::create_dir(&project).unwrap();
2007
2008 fs::write(project.join("main.rs"), "fn Test() {}\nfn test() {}").unwrap();
2009
2010 let cache = CacheManager::new(&project);
2011 let indexer = Indexer::new(cache, IndexConfig::default());
2012 indexer.index(&project, false).unwrap();
2013
2014 let cache = CacheManager::new(&project);
2015
2016 let engine = QueryEngine::new(cache);
2017 let filter = QueryFilter::default();
2018
2019 let results = engine.search("Test", filter).unwrap();
2021 assert!(results.iter().any(|r| r.preview.contains("Test()")));
2022 }
2023
2024 #[test]
2027 fn test_results_sorted_deterministically() {
2028 let temp = TempDir::new().unwrap();
2029 let project = temp.path().join("project");
2030 fs::create_dir(&project).unwrap();
2031
2032 fs::write(project.join("a.rs"), "fn test() {}").unwrap();
2033 fs::write(project.join("z.rs"), "fn test() {}").unwrap();
2034 fs::write(project.join("m.rs"), "fn test() {}\nfn test2() {}").unwrap();
2035
2036 let cache = CacheManager::new(&project);
2037 let indexer = Indexer::new(cache, IndexConfig::default());
2038 indexer.index(&project, false).unwrap();
2039
2040 let cache = CacheManager::new(&project);
2041
2042 let engine = QueryEngine::new(cache);
2043 let filter = QueryFilter::default();
2044
2045 let results1 = engine.search("test", filter.clone()).unwrap();
2047 let results2 = engine.search("test", filter.clone()).unwrap();
2048 let results3 = engine.search("test", filter).unwrap();
2049
2050 assert_eq!(results1.len(), results2.len());
2052 assert_eq!(results1.len(), results3.len());
2053
2054 for i in 0..results1.len() {
2055 assert_eq!(results1[i].path, results2[i].path);
2056 assert_eq!(results1[i].path, results3[i].path);
2057 assert_eq!(results1[i].span.start_line, results2[i].span.start_line);
2058 assert_eq!(results1[i].span.start_line, results3[i].span.start_line);
2059 }
2060
2061 for i in 0..results1.len().saturating_sub(1) {
2063 let curr = &results1[i];
2064 let next = &results1[i + 1];
2065 assert!(
2066 curr.path < next.path ||
2067 (curr.path == next.path && curr.span.start_line <= next.span.start_line)
2068 );
2069 }
2070 }
2071
2072 #[test]
2075 fn test_multiple_filters_combined() {
2076 let temp = TempDir::new().unwrap();
2077 let project = temp.path().join("project");
2078 fs::create_dir_all(project.join("src")).unwrap();
2079
2080 fs::write(project.join("src/main.rs"), "fn test() {}\nstruct Test {}").unwrap();
2081 fs::write(project.join("src/lib.rs"), "fn test() {}").unwrap();
2082 fs::write(project.join("test.js"), "function test() {}").unwrap();
2083
2084 let cache = CacheManager::new(&project);
2085 let indexer = Indexer::new(cache, IndexConfig::default());
2086 indexer.index(&project, false).unwrap();
2087
2088 let cache = CacheManager::new(&project);
2089
2090 let engine = QueryEngine::new(cache);
2091
2092 let filter = QueryFilter {
2094 language: Some(Language::Rust),
2095 kind: Some(SymbolKind::Function),
2096 file_pattern: Some("src/main".to_string()),
2097 symbols_mode: true,
2098 ..Default::default()
2099 };
2100 let results = engine.search("test", filter).unwrap();
2101
2102 assert_eq!(results.len(), 1);
2104 assert!(results[0].path.contains("src/main.rs"));
2105 assert_eq!(results[0].kind, SymbolKind::Function);
2106 }
2107
2108 #[test]
2111 fn test_find_symbol_helper() {
2112 let temp = TempDir::new().unwrap();
2113 let project = temp.path().join("project");
2114 fs::create_dir(&project).unwrap();
2115
2116 fs::write(project.join("main.rs"), "fn greet() {}").unwrap();
2117
2118 let cache = CacheManager::new(&project);
2119 let indexer = Indexer::new(cache, IndexConfig::default());
2120 indexer.index(&project, false).unwrap();
2121
2122 let cache = CacheManager::new(&project);
2123
2124 let engine = QueryEngine::new(cache);
2125 let results = engine.find_symbol("greet").unwrap();
2126
2127 assert!(results.len() >= 1);
2128 assert_eq!(results[0].kind, SymbolKind::Function);
2129 }
2130
2131 #[test]
2132 fn test_list_by_kind_helper() {
2133 let temp = TempDir::new().unwrap();
2134 let project = temp.path().join("project");
2135 fs::create_dir(&project).unwrap();
2136
2137 fs::write(
2138 project.join("main.rs"),
2139 "struct Point {}\nfn test() {}\nstruct Line {}"
2140 ).unwrap();
2141
2142 let cache = CacheManager::new(&project);
2143 let indexer = Indexer::new(cache, IndexConfig::default());
2144 indexer.index(&project, false).unwrap();
2145
2146 let cache = CacheManager::new(&project);
2147
2148 let engine = QueryEngine::new(cache);
2149
2150 let filter = QueryFilter {
2152 kind: Some(SymbolKind::Struct),
2153 symbols_mode: true,
2154 use_contains: true, ..Default::default()
2156 };
2157 let results = engine.search("oin", filter).unwrap();
2158
2159 assert!(results.len() >= 1, "Should find at least Point struct");
2161 assert!(results.iter().all(|r| r.kind == SymbolKind::Struct));
2162 assert!(results.iter().any(|r| r.symbol.as_deref() == Some("Point")));
2163 }
2164
2165 #[test]
2168 fn test_search_with_metadata() {
2169 let temp = TempDir::new().unwrap();
2170 let project = temp.path().join("project");
2171 fs::create_dir(&project).unwrap();
2172
2173 fs::write(project.join("main.rs"), "fn test() {}").unwrap();
2174
2175 let cache = CacheManager::new(&project);
2176 let indexer = Indexer::new(cache, IndexConfig::default());
2177 indexer.index(&project, false).unwrap();
2178
2179 let cache = CacheManager::new(&project);
2180
2181 let engine = QueryEngine::new(cache);
2182 let filter = QueryFilter::default();
2183 let response = engine.search_with_metadata("test", filter).unwrap();
2184
2185 assert!(response.results.len() >= 1);
2187 }
2189
2190 #[test]
2193 fn test_search_across_languages() {
2194 let temp = TempDir::new().unwrap();
2195 let project = temp.path().join("project");
2196 fs::create_dir(&project).unwrap();
2197
2198 fs::write(project.join("main.rs"), "fn greet() {}").unwrap();
2199 fs::write(project.join("main.ts"), "function greet() {}").unwrap();
2200 fs::write(project.join("main.py"), "def greet(): pass").unwrap();
2201
2202 let cache = CacheManager::new(&project);
2203 let indexer = Indexer::new(cache, IndexConfig::default());
2204 indexer.index(&project, false).unwrap();
2205
2206 let cache = CacheManager::new(&project);
2207
2208 let engine = QueryEngine::new(cache);
2209 let filter = QueryFilter::default();
2210 let results = engine.search("greet", filter).unwrap();
2211
2212 assert!(results.len() >= 3);
2214 assert!(results.iter().any(|r| r.lang == Language::Rust));
2215 assert!(results.iter().any(|r| r.lang == Language::TypeScript));
2216 assert!(results.iter().any(|r| r.lang == Language::Python));
2217 }
2218}