1use regex::RegexBuilder;
2use std::collections::HashSet;
3use std::time::Instant;
4use tantivy::{collector::TopDocs, query::QueryParser, Index};
5
6use super::results::{MatchType, SearchHit, SearchResult};
7use crate::config::SearchConfig;
8use crate::error::Result;
9use crate::index::schema::SchemaFields;
10
11pub struct Searcher {
13 config: SearchConfig,
14 index: Index,
15 fields: SchemaFields,
16}
17
18impl Searcher {
19 pub fn new(config: SearchConfig, index: Index) -> Self {
21 let schema = index.schema();
22 let fields = SchemaFields::new(&schema);
23
24 Self {
25 config,
26 index,
27 fields,
28 }
29 }
30
31 pub fn search(
33 &self,
34 query: &str,
35 limit: Option<usize>,
36 case_sensitive: bool,
37 context_before: Option<usize>,
38 context_after: Option<usize>,
39 ) -> Result<SearchResult> {
40 let start = Instant::now();
41 let limit = limit
42 .unwrap_or(self.config.default_limit)
43 .min(self.config.max_limit);
44
45 let reader = super::open_reader_with_retry(&self.index)?;
47 let searcher = reader.searcher();
48
49 let mut query_fields = vec![self.fields.content];
51 if let Some(fp) = self.fields.filepath {
52 query_fields.push(fp);
53 }
54 let query_parser = QueryParser::for_index(&self.index, query_fields);
55
56 let search_terms: Vec<&str> = query
59 .split(|c: char| !c.is_alphanumeric() && c != '_')
60 .filter(|s| !s.is_empty())
61 .collect();
62
63 if search_terms.is_empty() {
65 return Ok(SearchResult {
66 total: 0,
67 hits: vec![],
68 query_time_ms: start.elapsed().as_millis() as u64,
69 text_hits: 0,
70 semantic_hits: 0,
71 });
72 }
73
74 let tantivy_query_str = search_terms.join(" ");
76 let (tantivy_query, _errors) = query_parser.parse_query_lenient(&tantivy_query_str);
77
78 let fetch_limit = limit * 50;
80 let top_docs = searcher.search(&tantivy_query, &TopDocs::with_limit(fetch_limit))?;
81
82 let mut hits = Vec::with_capacity(top_docs.len());
84 let max_score = top_docs.first().map(|(score, _)| *score).unwrap_or(1.0);
85 let mut seen: HashSet<(String, u64, u64)> = HashSet::new();
86
87 let query_normalized = if case_sensitive {
89 query.to_string()
90 } else {
91 query.to_lowercase()
92 };
93 let query_terms: Vec<&str> = query_normalized.split_whitespace().collect();
94 let is_multi_word = query_terms.len() > 1;
95
96 for (score, doc_address) in top_docs {
97 if hits.len() >= limit {
99 break;
100 }
101
102 let doc = searcher.doc(doc_address)?;
103
104 let path = extract_text(&doc, self.fields.path).unwrap_or_default();
106 let doc_id = extract_text(&doc, self.fields.doc_id).unwrap_or_default();
107 let content = extract_text(&doc, self.fields.content).unwrap_or_default();
108 let line_start = extract_u64(&doc, self.fields.line_start).unwrap_or(1);
109 let chunk_id = extract_text(&doc, self.fields.chunk_id).unwrap_or_default();
110
111 let content_normalized = if case_sensitive {
112 content.clone()
113 } else {
114 content.to_lowercase()
115 };
116
117 let path_normalized = path.to_lowercase();
119 let path_match = query_terms
120 .iter()
121 .all(|term| path_normalized.contains(term));
122
123 let exact_match = content_normalized.contains(&query_normalized);
125 let and_match = is_multi_word
126 && query_terms
127 .iter()
128 .all(|term| content_normalized.contains(term));
129 if !exact_match && !and_match && !path_match {
130 continue;
131 }
132
133 let normalized_score = if max_score > 0.0 {
135 score / max_score
136 } else {
137 0.0
138 };
139
140 let is_content_match = exact_match || and_match;
142
143 let (snippet, snippet_offset, snippet_line_count, match_line_offset) =
144 if is_content_match {
145 create_relevant_snippet(&content, query, 10, context_before, context_after)
146 } else {
147 let lines: Vec<&str> = content.lines().take(10).collect();
149 let snippet = lines.join("\n");
150 let line_count = lines.len();
151 (snippet, 0, line_count, 0)
152 };
153
154 let actual_line_start = line_start + snippet_offset as u64;
156 let actual_line_end = actual_line_start + snippet_line_count.saturating_sub(1) as u64;
157 let match_line_in_snippet = match_line_offset - snippet_offset;
158
159 let key = (path.clone(), actual_line_start, actual_line_end);
161 if !seen.insert(key) {
162 continue;
163 }
164
165 hits.push(SearchHit {
166 path,
167 line_start: actual_line_start,
168 line_end: actual_line_end,
169 snippet,
170 score: normalized_score,
171 is_chunk: !chunk_id.is_empty(),
172 doc_id,
173 match_type: MatchType::Text,
174 match_line_in_snippet,
175 });
176 }
177
178 let query_time_ms = start.elapsed().as_millis() as u64;
179 let text_hits = hits.len();
180
181 Ok(SearchResult {
182 total: hits.len(),
183 hits,
184 query_time_ms,
185 text_hits,
186 semantic_hits: 0,
187 })
188 }
189
190 pub fn search_filtered(
192 &self,
193 query: &str,
194 limit: Option<usize>,
195 filters: SearchFilters,
196 use_regex: bool,
197 case_sensitive: bool,
198 context_before: Option<usize>,
199 context_after: Option<usize>,
200 verbose: bool,
201 ) -> Result<SearchResult> {
202 let mut result = if use_regex {
204 self.search_regex(
205 query,
206 Some(limit.unwrap_or(self.config.max_limit) * 2),
207 case_sensitive,
208 context_before,
209 context_after,
210 )?
211 } else {
212 self.search(
213 query,
214 Some(limit.unwrap_or(self.config.max_limit) * 2),
215 case_sensitive,
216 context_before,
217 context_after,
218 )?
219 };
220
221 let pre_filter_count = result.hits.len();
222 if verbose {
223 eprintln!(
224 "[verbose] search mode: {}",
225 if use_regex { "regex" } else { "text" }
226 );
227 eprintln!("[verbose] matches before filtering: {}", pre_filter_count);
228 }
229
230 if let Some(ref extensions) = filters.extensions {
232 result.hits.retain(|hit| {
233 if let Some(ext) = std::path::Path::new(&hit.path).extension() {
234 extensions
235 .iter()
236 .any(|e| e.eq_ignore_ascii_case(&ext.to_string_lossy()))
237 } else {
238 false
239 }
240 });
241 if verbose {
242 eprintln!(
243 "[verbose] after extension filter ({}): {}",
244 extensions.join(", "),
245 result.hits.len()
246 );
247 }
248 }
249
250 if let Some(ref paths) = filters.paths {
251 result
252 .hits
253 .retain(|hit| paths.iter().any(|p| path_matches(p, &hit.path)));
254 if verbose {
255 eprintln!(
256 "[verbose] after path filter ({}): {}",
257 paths.join(", "),
258 result.hits.len()
259 );
260 }
261 }
262
263 let limit = limit
265 .unwrap_or(self.config.default_limit)
266 .min(self.config.max_limit);
267 result.hits.truncate(limit);
268 result.total = result.hits.len();
269
270 result.text_hits = result
272 .hits
273 .iter()
274 .filter(|h| matches!(h.match_type, MatchType::Text | MatchType::Hybrid))
275 .count();
276 result.semantic_hits = result
277 .hits
278 .iter()
279 .filter(|h| matches!(h.match_type, MatchType::Semantic | MatchType::Hybrid))
280 .count();
281
282 if verbose {
283 eprintln!("[verbose] final results: {}", result.total);
284 }
285
286 Ok(result)
287 }
288
289 pub fn search_regex(
291 &self,
292 pattern: &str,
293 limit: Option<usize>,
294 case_sensitive: bool,
295 context_before: Option<usize>,
296 context_after: Option<usize>,
297 ) -> Result<SearchResult> {
298 let start = Instant::now();
299 let limit = limit
300 .unwrap_or(self.config.default_limit)
301 .min(self.config.max_limit);
302
303 let regex = match RegexBuilder::new(pattern)
305 .case_insensitive(!case_sensitive)
306 .build()
307 {
308 Ok(r) => r,
309 Err(e) => {
310 return Err(crate::error::YgrepError::Search(format!(
311 "Invalid regex pattern: {}",
312 e
313 )));
314 }
315 };
316
317 let reader = super::open_reader_with_retry(&self.index)?;
319 let searcher = reader.searcher();
320
321 let mut query_fields = vec![self.fields.content];
323 if let Some(fp) = self.fields.filepath {
324 query_fields.push(fp);
325 }
326 let query_parser = QueryParser::for_index(&self.index, query_fields);
327
328 let search_terms: Vec<&str> = pattern
331 .split(|c: char| !c.is_alphanumeric() && c != '_')
332 .filter(|s| !s.is_empty() && s.len() > 1) .collect();
334
335 let candidates: Vec<_> = if !search_terms.is_empty() {
337 let tantivy_query_str = search_terms.join(" ");
338 let (tantivy_query, _errors) = query_parser.parse_query_lenient(&tantivy_query_str);
339
340 let fetch_limit = limit * 100;
342 searcher.search(&tantivy_query, &TopDocs::with_limit(fetch_limit))?
343 } else {
344 let all_query = tantivy::query::AllQuery;
347 let fetch_limit = limit * 100;
348 searcher.search(&all_query, &TopDocs::with_limit(fetch_limit))?
349 };
350
351 let mut hits = Vec::with_capacity(candidates.len());
353 let max_score = candidates.first().map(|(score, _)| *score).unwrap_or(1.0);
354 let mut seen: HashSet<(String, u64, u64)> = HashSet::new();
355
356 for (score, doc_address) in candidates {
357 if hits.len() >= limit {
359 break;
360 }
361
362 let doc = searcher.doc(doc_address)?;
363
364 let path = extract_text(&doc, self.fields.path).unwrap_or_default();
366 let doc_id = extract_text(&doc, self.fields.doc_id).unwrap_or_default();
367 let content = extract_text(&doc, self.fields.content).unwrap_or_default();
368 let line_start = extract_u64(&doc, self.fields.line_start).unwrap_or(1);
369 let chunk_id = extract_text(&doc, self.fields.chunk_id).unwrap_or_default();
370
371 if !regex.is_match(&content) {
373 continue;
374 }
375
376 let normalized_score = if max_score > 0.0 {
378 score / max_score
379 } else {
380 0.0
381 };
382
383 let (snippet, snippet_offset, snippet_line_count, match_line_offset) =
385 create_regex_snippet(&content, ®ex, 10, context_before, context_after);
386
387 let actual_line_start = line_start + snippet_offset as u64;
389 let actual_line_end = actual_line_start + snippet_line_count.saturating_sub(1) as u64;
390 let match_line_in_snippet = match_line_offset - snippet_offset;
391
392 let key = (path.clone(), actual_line_start, actual_line_end);
394 if !seen.insert(key) {
395 continue;
396 }
397
398 hits.push(SearchHit {
399 path,
400 line_start: actual_line_start,
401 line_end: actual_line_end,
402 snippet,
403 score: normalized_score,
404 is_chunk: !chunk_id.is_empty(),
405 doc_id,
406 match_type: MatchType::Text,
407 match_line_in_snippet,
408 });
409 }
410
411 let query_time_ms = start.elapsed().as_millis() as u64;
412 let text_hits = hits.len();
413
414 Ok(SearchResult {
415 total: hits.len(),
416 hits,
417 query_time_ms,
418 text_hits,
419 semantic_hits: 0,
420 })
421 }
422}
423
424#[derive(Debug, Clone, Default)]
426pub struct SearchFilters {
427 pub extensions: Option<Vec<String>>,
429 pub paths: Option<Vec<String>>,
431}
432
433fn extract_text(doc: &tantivy::TantivyDocument, field: tantivy::schema::Field) -> Option<String> {
435 doc.get_first(field).and_then(|v| {
436 if let tantivy::schema::OwnedValue::Str(s) = v {
437 Some(s.to_string())
438 } else {
439 None
440 }
441 })
442}
443
444fn extract_u64(doc: &tantivy::TantivyDocument, field: tantivy::schema::Field) -> Option<u64> {
446 doc.get_first(field).and_then(|v| {
447 if let tantivy::schema::OwnedValue::U64(n) = v {
448 Some(*n)
449 } else {
450 None
451 }
452 })
453}
454
455fn create_relevant_snippet(
460 content: &str,
461 query: &str,
462 max_lines: usize,
463 ctx_before: Option<usize>,
464 ctx_after: Option<usize>,
465) -> (String, usize, usize, usize) {
466 let lines: Vec<&str> = content.lines().collect();
467 let query_lower = query.to_lowercase();
468 let query_terms: Vec<&str> = query_lower.split_whitespace().collect();
469
470 let mut matching_indices: Vec<usize> = Vec::new();
472 for (i, line) in lines.iter().enumerate() {
473 let line_lower = line.to_lowercase();
474 if query_terms.iter().any(|term| line_lower.contains(term)) {
475 matching_indices.push(i);
476 }
477 }
478
479 if matching_indices.is_empty() {
480 let snippet = lines
482 .iter()
483 .take(max_lines)
484 .copied()
485 .collect::<Vec<_>>()
486 .join("\n");
487 let line_count = snippet.lines().count();
488 return (snippet, 0, line_count, 0);
489 }
490
491 let best_match = if query_terms.len() > 1 {
493 let mut best_line = matching_indices[0];
494 let mut best_count = 0;
495 for &idx in &matching_indices {
496 let line_lower = lines[idx].to_lowercase();
497 let count = query_terms
498 .iter()
499 .filter(|t| line_lower.contains(*t))
500 .count();
501 if count > best_count {
502 best_count = count;
503 best_line = idx;
504 }
505 }
506 best_line
507 } else {
508 matching_indices[0]
509 };
510
511 let context_before = ctx_before.unwrap_or(2);
513 let context_after = ctx_after.unwrap_or_else(|| max_lines.saturating_sub(context_before + 1));
514
515 let start = best_match.saturating_sub(context_before);
516 let end = (best_match + context_after + 1).min(lines.len());
517
518 let snippet = lines[start..end].join("\n");
519 let line_count = end - start;
520 (snippet, start, line_count, best_match)
521}
522
523fn create_regex_snippet(
526 content: &str,
527 regex: ®ex::Regex,
528 max_lines: usize,
529 ctx_before: Option<usize>,
530 ctx_after: Option<usize>,
531) -> (String, usize, usize, usize) {
532 let lines: Vec<&str> = content.lines().collect();
533
534 let mut matching_indices: Vec<usize> = Vec::new();
536 for (i, line) in lines.iter().enumerate() {
537 if regex.is_match(line) {
538 matching_indices.push(i);
539 }
540 }
541
542 if matching_indices.is_empty() {
543 let snippet = lines
545 .iter()
546 .take(max_lines)
547 .copied()
548 .collect::<Vec<_>>()
549 .join("\n");
550 let line_count = snippet.lines().count();
551 return (snippet, 0, line_count, 0);
552 }
553
554 let first_match = matching_indices[0];
556 let context_before = ctx_before.unwrap_or(2);
557 let context_after = ctx_after.unwrap_or_else(|| max_lines.saturating_sub(context_before + 1));
558
559 let start = first_match.saturating_sub(context_before);
560 let end = (first_match + context_after + 1).min(lines.len());
561
562 let snippet = lines[start..end].join("\n");
563 let line_count = end - start;
564 (snippet, start, line_count, first_match)
565}
566
567fn path_matches(pattern: &str, path: &str) -> bool {
575 if pattern.contains('*') || pattern.contains('?') {
576 glob_to_regex(pattern)
577 .map(|re| re.is_match(path))
578 .unwrap_or(false)
579 } else {
580 path.starts_with(pattern) || path.contains(pattern)
581 }
582}
583
584fn glob_to_regex(pattern: &str) -> std::result::Result<regex::Regex, regex::Error> {
586 let mut re = String::with_capacity(pattern.len() * 2);
587 let chars: Vec<char> = pattern.chars().collect();
588 let mut i = 0;
589
590 while i < chars.len() {
591 if chars[i] == '*' && i + 1 < chars.len() && chars[i + 1] == '*' {
592 re.push_str(".*");
594 i += 2;
595 if i < chars.len() && chars[i] == '/' {
597 re.push_str("/?");
598 i += 1;
599 }
600 } else if chars[i] == '*' {
601 re.push_str("[^/]*");
603 i += 1;
604 } else if chars[i] == '?' {
605 re.push_str("[^/]");
606 i += 1;
607 } else {
608 let ch = chars[i];
610 if ".+(){}[]^$|\\".contains(ch) {
611 re.push('\\');
612 }
613 re.push(ch);
614 i += 1;
615 }
616 }
617
618 RegexBuilder::new(&re).case_insensitive(true).build()
619}
620
621#[cfg(test)]
622mod tests {
623 use super::*;
624 use crate::index::schema::build_document_schema;
625 use tantivy::doc;
626 use tempfile::tempdir;
627
628 fn create_test_index(path: &std::path::Path) -> (Index, SchemaFields) {
630 let schema = build_document_schema();
631 let index = Index::create_in_dir(path, schema.clone()).unwrap();
632 crate::index::register_tokenizers(index.tokenizers());
633 let fields = SchemaFields::new(&schema);
634 (index, fields)
635 }
636
637 fn add_doc(
639 index: &Index,
640 fields: &SchemaFields,
641 doc_id: &str,
642 path: &str,
643 content: &str,
644 ext: &str,
645 ) {
646 let mut writer = index.writer(50_000_000).unwrap();
647 writer
648 .add_document(doc!(
649 fields.doc_id => doc_id,
650 fields.path => path,
651 fields.filepath.unwrap() => path,
652 fields.workspace => "/test",
653 fields.content => content,
654 fields.mtime => 0u64,
655 fields.size => content.len() as u64,
656 fields.extension => ext,
657 fields.line_start => 1u64,
658 fields.line_end => content.lines().count() as u64,
659 fields.chunk_id => "",
660 fields.parent_doc => ""
661 ))
662 .unwrap();
663 writer.commit().unwrap();
664 }
665
666 #[test]
667 fn test_basic_search() -> Result<()> {
668 let temp_dir = tempdir().unwrap();
669 let (index, fields) = create_test_index(temp_dir.path());
670 add_doc(
671 &index,
672 &fields,
673 "test1",
674 "src/main.rs",
675 "fn main() { println!(\"Hello, world!\"); }",
676 "rs",
677 );
678
679 let config = SearchConfig::default();
680 let searcher = Searcher::new(config, index);
681 let result = searcher.search("hello", None, false, None, None)?;
682
683 assert_eq!(result.hits.len(), 1);
684 assert_eq!(result.hits[0].path, "src/main.rs");
685
686 Ok(())
687 }
688
689 #[test]
690 fn test_case_insensitive_search() -> Result<()> {
691 let temp_dir = tempdir().unwrap();
692 let (index, fields) = create_test_index(temp_dir.path());
693 add_doc(
694 &index,
695 &fields,
696 "test1",
697 "src/lib.rs",
698 "fn greet() { println!(\"Hello World\"); }",
699 "rs",
700 );
701
702 let config = SearchConfig::default();
703 let searcher = Searcher::new(config, index);
704
705 let result = searcher.search("HELLO", None, false, None, None)?;
707 assert_eq!(result.hits.len(), 1);
708 assert_eq!(result.hits[0].path, "src/lib.rs");
709
710 Ok(())
711 }
712
713 #[test]
714 fn test_empty_query_returns_empty() -> Result<()> {
715 let temp_dir = tempdir().unwrap();
716 let (index, fields) = create_test_index(temp_dir.path());
717 add_doc(
718 &index,
719 &fields,
720 "test1",
721 "src/main.rs",
722 "fn main() {}",
723 "rs",
724 );
725
726 let config = SearchConfig::default();
727 let searcher = Searcher::new(config, index);
728
729 let result = searcher.search("...", None, false, None, None)?;
731 assert!(result.is_empty());
732
733 Ok(())
734 }
735
736 #[test]
737 fn test_regex_search_basic() -> Result<()> {
738 let temp_dir = tempdir().unwrap();
739 let (index, fields) = create_test_index(temp_dir.path());
740 add_doc(
741 &index,
742 &fields,
743 "test1",
744 "src/main.rs",
745 "fn hello_world() {\n println!(\"Hello!\");\n}",
746 "rs",
747 );
748
749 let config = SearchConfig::default();
750 let searcher = Searcher::new(config, index);
751
752 let result = searcher.search_regex("hello.*world", None, false, None, None)?;
753 assert_eq!(result.hits.len(), 1);
754
755 Ok(())
756 }
757
758 #[test]
759 fn test_regex_search_invalid_returns_error() {
760 let temp_dir = tempdir().unwrap();
761 let (index, _fields) = create_test_index(temp_dir.path());
762
763 let config = SearchConfig::default();
764 let searcher = Searcher::new(config, index);
765
766 let result = searcher.search_regex("[invalid", None, false, None, None);
767 assert!(result.is_err());
768 }
769
770 #[test]
771 fn test_search_extension_filter() -> Result<()> {
772 let temp_dir = tempdir().unwrap();
773 let (index, fields) = create_test_index(temp_dir.path());
774 add_doc(
775 &index,
776 &fields,
777 "test1",
778 "src/main.rs",
779 "fn hello() {}",
780 "rs",
781 );
782 add_doc(
783 &index,
784 &fields,
785 "test2",
786 "src/main.py",
787 "def hello(): pass",
788 "py",
789 );
790
791 let config = SearchConfig::default();
792 let searcher = Searcher::new(config, index);
793
794 let filters = SearchFilters {
795 extensions: Some(vec!["rs".to_string()]),
796 paths: None,
797 };
798 let result =
799 searcher.search_filtered("hello", None, filters, false, false, None, None, false)?;
800
801 assert_eq!(result.hits.len(), 1);
802 assert_eq!(result.hits[0].path, "src/main.rs");
803
804 Ok(())
805 }
806
807 #[test]
808 fn test_search_path_filter() -> Result<()> {
809 let temp_dir = tempdir().unwrap();
810 let (index, fields) = create_test_index(temp_dir.path());
811 add_doc(
812 &index,
813 &fields,
814 "test1",
815 "src/main.rs",
816 "fn hello() {}",
817 "rs",
818 );
819 add_doc(
820 &index,
821 &fields,
822 "test2",
823 "lib/utils.rs",
824 "fn hello() {}",
825 "rs",
826 );
827
828 let config = SearchConfig::default();
829 let searcher = Searcher::new(config, index);
830
831 let filters = SearchFilters {
832 extensions: None,
833 paths: Some(vec!["lib/".to_string()]),
834 };
835 let result =
836 searcher.search_filtered("hello", None, filters, false, false, None, None, false)?;
837
838 assert_eq!(result.hits.len(), 1);
839 assert_eq!(result.hits[0].path, "lib/utils.rs");
840
841 Ok(())
842 }
843
844 #[test]
845 fn test_path_matches_glob() {
846 assert!(path_matches("src/", "src/main.rs"));
848 assert!(path_matches("src/", "project/src/main.rs"));
849 assert!(!path_matches("lib/", "src/main.rs"));
850
851 assert!(path_matches("src/*/tests/", "src/api/tests/foo.rs"));
853 assert!(path_matches("src/*/tests/", "src/core/tests/bar.rs"));
854 assert!(!path_matches("src/*/tests/", "src/a/b/tests/foo.rs"));
855
856 assert!(path_matches("**/tests/", "src/api/tests/foo.rs"));
858 assert!(path_matches("**/tests/", "deep/nested/tests/bar.rs"));
859 assert!(path_matches("src/**/test.rs", "src/a/b/c/test.rs"));
860
861 assert!(path_matches("src/?.rs", "src/a.rs"));
863 assert!(!path_matches("src/?.rs", "src/ab.rs"));
864
865 assert!(path_matches("SRC/*/tests/", "src/api/tests/foo.rs"));
867 assert!(!path_matches("SRC/", "src/main.rs"));
869 }
870
871 #[test]
872 fn test_search_path_filter_glob() -> Result<()> {
873 let temp_dir = tempdir().unwrap();
874 let (index, fields) = create_test_index(temp_dir.path());
875 add_doc(
876 &index,
877 &fields,
878 "test1",
879 "user/plugins/impersonate/tests/test.php",
880 "class FooTest extends Plugin {}",
881 "php",
882 );
883 add_doc(
884 &index,
885 &fields,
886 "test2",
887 "user/plugins/impersonate/src/plugin.php",
888 "class Plugin extends Base {}",
889 "php",
890 );
891 add_doc(
892 &index,
893 &fields,
894 "test3",
895 "user/plugins/auth/tests/test.php",
896 "class BarTest extends Plugin {}",
897 "php",
898 );
899
900 let config = SearchConfig::default();
901 let searcher = Searcher::new(config, index);
902
903 let filters = SearchFilters {
905 extensions: None,
906 paths: Some(vec!["user/plugins/*/tests/".to_string()]),
907 };
908 let result = searcher.search_filtered(
909 "extends Plugin",
910 None,
911 filters,
912 false,
913 false,
914 None,
915 None,
916 false,
917 )?;
918
919 assert_eq!(result.hits.len(), 2);
920 assert!(result.hits.iter().all(|h| h.path.contains("/tests/")));
921
922 Ok(())
923 }
924
925 #[test]
926 fn test_multiple_results_ordered_by_score() -> Result<()> {
927 let temp_dir = tempdir().unwrap();
928 let (index, fields) = create_test_index(temp_dir.path());
929
930 add_doc(
932 &index,
933 &fields,
934 "test1",
935 "src/many.rs",
936 "hello hello hello hello hello",
937 "rs",
938 );
939 add_doc(
940 &index,
941 &fields,
942 "test2",
943 "src/one.rs",
944 "hello world goodbye",
945 "rs",
946 );
947
948 let config = SearchConfig::default();
949 let searcher = Searcher::new(config, index);
950 let result = searcher.search("hello", None, false, None, None)?;
951
952 assert!(result.hits.len() >= 2);
953 for pair in result.hits.windows(2) {
955 assert!(pair[0].score >= pair[1].score);
956 }
957
958 Ok(())
959 }
960
961 #[test]
962 fn test_dedup_full_doc_and_chunk() -> Result<()> {
963 let temp_dir = tempdir().unwrap();
964 let (index, fields) = create_test_index(temp_dir.path());
965
966 let content = "fn hello() {\n println!(\"Hello, world!\");\n}";
967
968 let mut writer = index.writer(50_000_000).unwrap();
970 writer
971 .add_document(doc!(
972 fields.doc_id => "full-doc",
973 fields.path => "src/main.rs",
974 fields.filepath.unwrap() => "src/main.rs",
975 fields.workspace => "/test",
976 fields.content => content,
977 fields.mtime => 0u64,
978 fields.size => content.len() as u64,
979 fields.extension => "rs",
980 fields.line_start => 1u64,
981 fields.line_end => 3u64,
982 fields.chunk_id => "",
983 fields.parent_doc => ""
984 ))
985 .unwrap();
986 writer
988 .add_document(doc!(
989 fields.doc_id => "chunk-1",
990 fields.path => "src/main.rs",
991 fields.filepath.unwrap() => "src/main.rs",
992 fields.workspace => "/test",
993 fields.content => content,
994 fields.mtime => 0u64,
995 fields.size => content.len() as u64,
996 fields.extension => "rs",
997 fields.line_start => 1u64,
998 fields.line_end => 3u64,
999 fields.chunk_id => "chunk-1",
1000 fields.parent_doc => "full-doc"
1001 ))
1002 .unwrap();
1003 writer.commit().unwrap();
1004
1005 let config = SearchConfig::default();
1006 let searcher = Searcher::new(config, index);
1007
1008 let result = searcher.search("hello", None, false, None, None)?;
1010 assert_eq!(result.hits.len(), 1);
1011 assert_eq!(result.hits[0].path, "src/main.rs");
1012
1013 let result = searcher.search_regex("hello", None, false, None, None)?;
1015 assert_eq!(result.hits.len(), 1);
1016 assert_eq!(result.hits[0].path, "src/main.rs");
1017
1018 Ok(())
1019 }
1020
1021 #[test]
1022 fn test_filename_search() -> Result<()> {
1023 let temp_dir = tempdir().unwrap();
1024 let (index, fields) = create_test_index(temp_dir.path());
1025
1026 add_doc(
1029 &index,
1030 &fields,
1031 "test1",
1032 "src/commands/dashboard.rs",
1033 "fn run() {\n println!(\"starting...\");\n}",
1034 "rs",
1035 );
1036 add_doc(
1037 &index,
1038 &fields,
1039 "test2",
1040 "src/main.rs",
1041 "fn main() { hello(); }",
1042 "rs",
1043 );
1044
1045 let config = SearchConfig::default();
1046 let searcher = Searcher::new(config, index);
1047
1048 let result = searcher.search("dashboard", None, false, None, None)?;
1050 assert_eq!(result.hits.len(), 1);
1051 assert_eq!(result.hits[0].path, "src/commands/dashboard.rs");
1052
1053 Ok(())
1054 }
1055
1056 #[test]
1057 fn test_text_hits_consistent_after_filter() -> Result<()> {
1058 let temp_dir = tempdir().unwrap();
1060 let (index, fields) = create_test_index(temp_dir.path());
1061 add_doc(
1062 &index,
1063 &fields,
1064 "test1",
1065 "src/main.rs",
1066 "fn hello() {}",
1067 "rs",
1068 );
1069 add_doc(
1070 &index,
1071 &fields,
1072 "test2",
1073 "src/main.py",
1074 "def hello(): pass",
1075 "py",
1076 );
1077 add_doc(
1078 &index,
1079 &fields,
1080 "test3",
1081 "lib/utils.js",
1082 "function hello() {}",
1083 "js",
1084 );
1085
1086 let config = SearchConfig::default();
1087 let searcher = Searcher::new(config, index);
1088
1089 let filters = SearchFilters {
1091 extensions: Some(vec!["rs".to_string()]),
1092 paths: None,
1093 };
1094 let result =
1095 searcher.search_filtered("hello", None, filters, false, false, None, None, false)?;
1096
1097 assert_eq!(result.total, 1);
1098 assert_eq!(result.text_hits, 1);
1099 assert_eq!(result.text_hits, result.total);
1100
1101 let filters = SearchFilters {
1103 extensions: None,
1104 paths: Some(vec!["nonexistent/".to_string()]),
1105 };
1106 let result =
1107 searcher.search_filtered("hello", None, filters, false, false, None, None, false)?;
1108
1109 assert_eq!(result.total, 0);
1110 assert_eq!(result.text_hits, 0);
1111
1112 Ok(())
1113 }
1114}