1use std::path::{Path, PathBuf};
13
14use anyhow::{Context, Result};
15use tantivy::collector::TopDocs;
16use tantivy::query::{BooleanQuery, FuzzyTermQuery, Occur, QueryParser, TermQuery};
17use tantivy::schema::{IndexRecordOption, Value};
18use tantivy::snippet::{Snippet, SnippetGenerator};
19use tantivy::{Index, ReloadPolicy, Searcher as TantivySearcher, Term};
20use tracing::{debug, info, warn};
21
22use crate::indexer::{open_index, IndexSchema};
23use xore_core::types::SearchResult;
24
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub enum QueryType {
28 Standard,
30 Prefix,
32 Fuzzy,
34}
35
36#[derive(Debug, Clone)]
38pub struct SearchConfig {
39 pub limit: usize,
41 pub snippet_max_length: usize,
43 pub enable_highlight: bool,
45 pub fuzzy_distance: u8,
47 pub min_prefix_length: usize,
49}
50
51impl Default for SearchConfig {
52 fn default() -> Self {
53 Self {
54 limit: 100,
55 snippet_max_length: 200,
56 enable_highlight: true,
57 fuzzy_distance: 2,
58 min_prefix_length: 2,
59 }
60 }
61}
62
63pub struct Searcher {
67 index: Index,
68 schema: IndexSchema,
69 reader: tantivy::IndexReader,
70 config: SearchConfig,
71}
72
73impl Searcher {
74 pub fn new(index_path: &Path) -> Result<Self> {
76 Self::with_config(index_path, SearchConfig::default())
77 }
78
79 pub fn with_config(index_path: &Path, config: SearchConfig) -> Result<Self> {
81 let (index, schema) = open_index(index_path).with_context(|| {
82 format!(
83 "无法打开搜索索引: {}\n💡 提示: 请先运行 'xore f --index' 建立索引,或使用 '--rebuild' 重建",
84 index_path.display()
85 )
86 })?;
87
88 let reader = index
89 .reader_builder()
90 .reload_policy(ReloadPolicy::OnCommitWithDelay)
91 .try_into()
92 .with_context(|| {
93 "无法创建索引读取器\n💡 提示: 索引可能已损坏,尝试运行 'xore f --rebuild' 重建"
94 })?;
95
96 Ok(Self { index, schema, reader, config })
97 }
98
99 pub fn search(&self, query_str: &str) -> Result<Vec<SearchResult>> {
101 self.search_with_limit(query_str, self.config.limit)
102 }
103
104 pub fn search_with_limit(&self, query_str: &str, limit: usize) -> Result<Vec<SearchResult>> {
106 info!("Searching for: {}", query_str);
107
108 let searcher = self.reader.searcher();
109
110 let query_parser = QueryParser::for_index(&self.index, vec![self.schema.content_field()]);
112
113 let query = query_parser.parse_query(query_str).with_context(|| {
115 format!(
116 "查询解析失败: '{}'\n💡 提示: 检查查询语法,特殊字符需要转义(如 +, -, :, *, ?)",
117 query_str
118 )
119 })?;
120
121 let top_docs = searcher
123 .search(&query, &TopDocs::with_limit(limit))
124 .with_context(|| format!("搜索执行失败: '{}'", query_str))?;
125
126 debug!("Found {} results", top_docs.len());
127
128 let snippet_generator = if self.config.enable_highlight {
130 Some(SnippetGenerator::create(&searcher, &query, self.schema.content_field())?)
131 } else {
132 None
133 };
134
135 let mut results = Vec::new();
137 for (score, doc_address) in top_docs {
138 if let Ok(doc) = searcher.doc(doc_address) {
139 let result =
140 self.doc_to_search_result(&doc, score, snippet_generator.as_ref(), &searcher)?;
141 results.push(result);
142 }
143 }
144
145 Ok(results)
146 }
147
148 pub fn search_with_filter(
150 &self,
151 query_str: &str,
152 file_type: Option<&str>,
153 limit: usize,
154 ) -> Result<Vec<SearchResult>> {
155 info!("Searching for: {} with filter: {:?}", query_str, file_type);
156
157 let searcher = self.reader.searcher();
158
159 let query_parser = QueryParser::for_index(&self.index, vec![self.schema.content_field()]);
161
162 let content_query = query_parser.parse_query(query_str).with_context(|| {
164 format!("查询解析失败: '{}'\n💡 提示: 检查查询语法,特殊字符需要转义", query_str)
165 })?;
166
167 let final_query: Box<dyn tantivy::query::Query> = if let Some(ft) = file_type {
169 let type_term = Term::from_field_text(self.schema.file_type_field(), ft);
170 let type_query = TermQuery::new(type_term, IndexRecordOption::Basic);
171
172 Box::new(BooleanQuery::new(vec![
173 (Occur::Must, content_query),
174 (Occur::Must, Box::new(type_query)),
175 ]))
176 } else {
177 content_query
178 };
179
180 let top_docs = searcher
182 .search(&*final_query, &TopDocs::with_limit(limit))
183 .with_context(|| format!("带过滤器的搜索执行失败: '{}'", query_str))?;
184
185 let snippet_generator = if self.config.enable_highlight {
187 let content_query = query_parser.parse_query(query_str)?;
189 Some(SnippetGenerator::create(&searcher, &content_query, self.schema.content_field())?)
190 } else {
191 None
192 };
193
194 let mut results = Vec::new();
196 for (score, doc_address) in top_docs {
197 if let Ok(doc) = searcher.doc(doc_address) {
198 let result =
199 self.doc_to_search_result(&doc, score, snippet_generator.as_ref(), &searcher)?;
200 results.push(result);
201 }
202 }
203
204 Ok(results)
205 }
206
207 pub fn search_prefix(&self, prefix: &str, limit: usize) -> Result<Vec<SearchResult>> {
220 info!("Prefix search for: {}", prefix);
221
222 if prefix.len() < self.config.min_prefix_length {
224 warn!(
225 "Prefix '{}' is too short (min: {}), using standard search",
226 prefix, self.config.min_prefix_length
227 );
228 return self.search_with_limit(prefix, limit);
229 }
230
231 let searcher = self.reader.searcher();
232
233 let query_parser = QueryParser::for_index(&self.index, vec![self.schema.content_field()]);
235 let prefix_query_str = format!("{}*", prefix);
236
237 let query = query_parser
239 .parse_query(&prefix_query_str)
240 .with_context(|| format!("Failed to parse prefix query: {}", prefix_query_str))?;
241
242 let top_docs = searcher
244 .search(&query, &TopDocs::with_limit(limit))
245 .with_context(|| format!("Prefix search failed for: {}", prefix))?;
246
247 debug!("Found {} results for prefix '{}'", top_docs.len(), prefix);
248
249 let snippet_generator = None;
251
252 let mut results = Vec::new();
254 for (score, doc_address) in top_docs {
255 if let Ok(doc) = searcher.doc(doc_address) {
256 let result =
257 self.doc_to_search_result(&doc, score, snippet_generator.as_ref(), &searcher)?;
258 results.push(result);
259 }
260 }
261
262 Ok(results)
263 }
264
265 pub fn search_fuzzy(&self, term: &str, limit: usize) -> Result<Vec<SearchResult>> {
279 info!("Fuzzy search for: {} (distance: {})", term, self.config.fuzzy_distance);
280
281 let searcher = self.reader.searcher();
282
283 let term_obj = Term::from_field_text(self.schema.content_field(), term);
285 let query = FuzzyTermQuery::new(term_obj, self.config.fuzzy_distance, true);
286
287 let top_docs = searcher
289 .search(&query, &TopDocs::with_limit(limit))
290 .with_context(|| format!("Fuzzy search failed for: {}", term))?;
291
292 debug!("Found {} results for fuzzy term '{}'", top_docs.len(), term);
293
294 let snippet_generator = None;
296
297 let mut results = Vec::new();
299 for (score, doc_address) in top_docs {
300 if let Ok(doc) = searcher.doc(doc_address) {
301 let result =
302 self.doc_to_search_result(&doc, score, snippet_generator.as_ref(), &searcher)?;
303 results.push(result);
304 }
305 }
306
307 Ok(results)
308 }
309
310 pub fn search_smart(&self, query_str: &str, limit: usize) -> Result<Vec<SearchResult>> {
328 let (query_type, cleaned_query) = QueryAnalyzer::analyze(query_str);
329
330 match query_type {
331 QueryType::Prefix => {
332 info!("Detected prefix query: {}", cleaned_query);
333 self.search_prefix(&cleaned_query, limit)
334 }
335 QueryType::Fuzzy => {
336 info!("Detected fuzzy query: {}", cleaned_query);
337 self.search_fuzzy(&cleaned_query, limit)
338 }
339 QueryType::Standard => {
340 info!("Using standard search: {}", cleaned_query);
341 self.search_with_limit(&cleaned_query, limit)
342 }
343 }
344 }
345
346 fn doc_to_search_result(
348 &self,
349 doc: &tantivy::TantivyDocument,
350 score: f32,
351 snippet_generator: Option<&SnippetGenerator>,
352 _searcher: &TantivySearcher,
353 ) -> Result<SearchResult> {
354 let path = doc
356 .get_first(self.schema.path_field())
357 .and_then(|v| v.as_str())
358 .map(PathBuf::from)
359 .unwrap_or_default();
360
361 let content =
363 doc.get_first(self.schema.content_field()).and_then(|v| v.as_str()).unwrap_or("");
364
365 let snippet = if let Some(generator) = snippet_generator {
367 let snippet = generator.snippet(content);
368 Some(self.format_snippet(&snippet))
369 } else {
370 None
371 };
372
373 let (line, column) = self.extract_line_info(content, &snippet);
375
376 Ok(SearchResult { path, line, column, score, snippet })
377 }
378
379 fn format_snippet(&self, snippet: &Snippet) -> String {
381 snippet
383 .to_html()
384 .replace("<b>", "\x1b[1;33m") .replace("</b>", "\x1b[0m") }
387
388 fn extract_line_info(
390 &self,
391 content: &str,
392 snippet: &Option<String>,
393 ) -> (Option<usize>, Option<usize>) {
394 if let Some(ref snip) = snippet {
395 let clean_snippet = snip.replace("\x1b[1;33m", "").replace("\x1b[0m", "");
397
398 if let Some(pos) = content.find(clean_snippet.trim()) {
400 let line_number = content[..pos].matches('\n').count() + 1;
401 let line_start = content[..pos].rfind('\n').map(|p| p + 1).unwrap_or(0);
402 let column = pos - line_start + 1;
403 return (Some(line_number), Some(column));
404 }
405 }
406 (None, None)
407 }
408
409 pub fn num_docs(&self) -> u64 {
411 self.reader.searcher().num_docs()
412 }
413
414 pub fn schema(&self) -> &IndexSchema {
416 &self.schema
417 }
418}
419
420impl Default for Searcher {
421 fn default() -> Self {
422 Self::new(Path::new(".xore/index")).expect("Failed to create default searcher")
424 }
425}
426
427pub struct SearchResultIter<'a> {
429 searcher: &'a Searcher,
430 query: String,
431 offset: usize,
432 batch_size: usize,
433 current_batch: Vec<SearchResult>,
434 current_index: usize,
435 exhausted: bool,
436}
437
438impl<'a> SearchResultIter<'a> {
439 pub fn new(searcher: &'a Searcher, query: &str, batch_size: usize) -> Self {
441 Self {
442 searcher,
443 query: query.to_string(),
444 offset: 0,
445 batch_size,
446 current_batch: Vec::new(),
447 current_index: 0,
448 exhausted: false,
449 }
450 }
451}
452
453impl<'a> Iterator for SearchResultIter<'a> {
454 type Item = SearchResult;
455
456 fn next(&mut self) -> Option<Self::Item> {
457 if self.exhausted {
458 return None;
459 }
460
461 if self.current_index >= self.current_batch.len() {
463 match self.searcher.search_with_limit(&self.query, self.offset + self.batch_size) {
464 Ok(results) => {
465 if results.len() <= self.offset {
466 self.exhausted = true;
467 return None;
468 }
469 self.current_batch = results.into_iter().skip(self.offset).collect();
470 self.offset += self.current_batch.len();
471 self.current_index = 0;
472
473 if self.current_batch.is_empty() {
474 self.exhausted = true;
475 return None;
476 }
477 }
478 Err(_) => {
479 self.exhausted = true;
480 return None;
481 }
482 }
483 }
484
485 let result = self.current_batch.get(self.current_index).cloned();
486 self.current_index += 1;
487 result
488 }
489}
490
491pub struct QueryAnalyzer;
495
496impl QueryAnalyzer {
497 pub fn analyze(query_str: &str) -> (QueryType, String) {
521 let trimmed = query_str.trim();
522
523 if trimmed.ends_with('*') && trimmed.len() > 1 {
525 let prefix = trimmed[..trimmed.len() - 1].to_string();
526 return (QueryType::Prefix, prefix);
527 }
528
529 if trimmed.starts_with('~') && trimmed.len() > 1 {
531 let fuzzy_term = trimmed[1..].to_string();
532 return (QueryType::Fuzzy, fuzzy_term);
533 }
534
535 (QueryType::Standard, trimmed.to_string())
537 }
538
539 pub fn detect_type(query_str: &str) -> QueryType {
541 Self::analyze(query_str).0
542 }
543}
544
545#[cfg(test)]
546mod tests {
547 use super::*;
548 use crate::indexer::IndexBuilder;
549 use crate::scanner::ScannedFile;
550 use std::fs::File;
551 use std::io::Write;
552 use std::time::SystemTime;
553 use tempfile::TempDir;
554
555 fn setup_test_index(temp_dir: &TempDir) -> PathBuf {
556 let index_path = temp_dir.path().join("test_index");
557 let files_dir = temp_dir.path().join("files");
558 std::fs::create_dir_all(&files_dir).unwrap();
559
560 let files = vec![
562 ("error.log", "This is an error message\nAnother line with error\n"),
563 ("chinese.txt", "这是一个错误日志\n数据处理完成\n"),
564 ("mixed.txt", "Error 错误 processing data 数据处理\n"),
565 ("hello.rs", "fn main() {\n println!(\"Hello, world!\");\n}\n"),
566 ];
567
568 let mut scanned_files = Vec::new();
569 for (name, content) in files {
570 let path = files_dir.join(name);
571 let mut file = File::create(&path).unwrap();
572 file.write_all(content.as_bytes()).unwrap();
573 scanned_files.push(ScannedFile {
574 path,
575 size: content.len() as u64,
576 modified: Some(SystemTime::now()),
577 is_dir: false,
578 });
579 }
580
581 let mut builder = IndexBuilder::new(&index_path).unwrap();
583 builder.add_documents_batch(&scanned_files).unwrap();
584 builder.build().unwrap();
585
586 index_path
587 }
588
589 #[test]
590 fn test_search_english() {
591 let temp_dir = TempDir::new().unwrap();
592 let index_path = setup_test_index(&temp_dir);
593
594 let searcher = Searcher::new(&index_path).unwrap();
595 let results = searcher.search("error").unwrap();
596
597 assert!(!results.is_empty());
598 assert!(results.iter().any(|r| r.path.to_string_lossy().contains("error.log")));
600 }
601
602 #[test]
603 fn test_search_chinese() {
604 let temp_dir = TempDir::new().unwrap();
605 let index_path = setup_test_index(&temp_dir);
606
607 let searcher = Searcher::new(&index_path).unwrap();
608 let results = searcher.search("错误").unwrap();
609
610 assert!(!results.is_empty());
611 assert!(results.iter().any(|r| r.path.to_string_lossy().contains("chinese.txt")));
613 }
614
615 #[test]
616 fn test_search_mixed() {
617 let temp_dir = TempDir::new().unwrap();
618 let index_path = setup_test_index(&temp_dir);
619
620 let searcher = Searcher::new(&index_path).unwrap();
621 let results = searcher.search("数据").unwrap();
622
623 assert!(!results.is_empty());
624 }
625
626 #[test]
627 fn test_search_with_filter() {
628 let temp_dir = TempDir::new().unwrap();
629 let index_path = setup_test_index(&temp_dir);
630
631 let searcher = Searcher::new(&index_path).unwrap();
632
633 let results = searcher.search_with_filter("error", Some("log"), 100).unwrap();
635
636 assert!(results.iter().all(|r| r.path.extension().map(|e| e == "log").unwrap_or(false)));
638 }
639
640 #[test]
641 fn test_search_no_results() {
642 let temp_dir = TempDir::new().unwrap();
643 let index_path = setup_test_index(&temp_dir);
644
645 let searcher = Searcher::new(&index_path).unwrap();
646 let results = searcher.search("nonexistentterm12345").unwrap();
647
648 assert!(results.is_empty());
649 }
650
651 #[test]
652 fn test_search_score_ordering() {
653 let temp_dir = TempDir::new().unwrap();
654 let index_path = setup_test_index(&temp_dir);
655
656 let searcher = Searcher::new(&index_path).unwrap();
657 let results = searcher.search("error").unwrap();
658
659 for i in 1..results.len() {
661 assert!(results[i - 1].score >= results[i].score);
662 }
663 }
664
665 #[test]
666 fn test_num_docs() {
667 let temp_dir = TempDir::new().unwrap();
668 let index_path = setup_test_index(&temp_dir);
669
670 let searcher = Searcher::new(&index_path).unwrap();
671 assert_eq!(searcher.num_docs(), 4);
672 }
673
674 #[test]
675 fn test_snippet_generation() {
676 let temp_dir = TempDir::new().unwrap();
677 let index_path = setup_test_index(&temp_dir);
678
679 let searcher = Searcher::new(&index_path).unwrap();
680 let results = searcher.search("error").unwrap();
681
682 for result in &results {
684 assert!(result.snippet.is_some());
685 }
686 }
687
688 #[test]
689 fn test_phrase_search() {
690 let temp_dir = TempDir::new().unwrap();
691 let index_path = setup_test_index(&temp_dir);
692
693 let searcher = Searcher::new(&index_path).unwrap();
694
695 let results = searcher.search("\"Hello, world\"").unwrap();
697
698 assert!(!results.is_empty());
699 assert!(results.iter().any(|r| r.path.to_string_lossy().contains("hello.rs")));
700 }
701
702 #[test]
703 fn test_prefix_search() {
704 let temp_dir = TempDir::new().unwrap();
705 let index_path = setup_test_index(&temp_dir);
706
707 let searcher = Searcher::new(&index_path).unwrap();
708
709 let results = searcher.search_prefix("err", 10);
713 assert!(results.is_ok(), "Prefix search API should work without errors");
714
715 if let Ok(res) = results {
717 for r in &res {
718 assert!(r.path.exists() || !r.path.as_os_str().is_empty());
719 }
720 }
721 }
722
723 #[test]
724 fn test_fuzzy_search() {
725 let temp_dir = TempDir::new().unwrap();
726 let files_dir = temp_dir.path().join("files");
727 std::fs::create_dir_all(&files_dir).unwrap();
728
729 let files = vec![
731 ("db.txt", "database connection\ndatabase query\n"),
732 ("typo.txt", "databse error\n"), ];
734
735 let mut scanned_files = Vec::new();
736 for (name, content) in files {
737 let path = files_dir.join(name);
738 let mut file = File::create(&path).unwrap();
739 file.write_all(content.as_bytes()).unwrap();
740 scanned_files.push(ScannedFile {
741 path,
742 size: content.len() as u64,
743 modified: Some(SystemTime::now()),
744 is_dir: false,
745 });
746 }
747
748 let index_path = temp_dir.path().join("fuzzy_index");
749 let mut builder = IndexBuilder::new(&index_path).unwrap();
750 builder.add_documents_batch(&scanned_files).unwrap();
751 builder.build().unwrap();
752
753 let searcher = Searcher::new(&index_path).unwrap();
754
755 let results = searcher.search_fuzzy("databse", 10).unwrap();
757 assert!(!results.is_empty());
758 }
759
760 #[test]
761 fn test_smart_search_prefix() {
762 let temp_dir = TempDir::new().unwrap();
763 let index_path = setup_test_index(&temp_dir);
764
765 let searcher = Searcher::new(&index_path).unwrap();
766
767 let results = searcher.search_smart("err*", 10);
770 assert!(results.is_ok(), "Smart search with prefix should work");
771 }
772
773 #[test]
774 fn test_smart_search_fuzzy() {
775 let temp_dir = TempDir::new().unwrap();
776 let index_path = setup_test_index(&temp_dir);
777
778 let searcher = Searcher::new(&index_path).unwrap();
779
780 let results = searcher.search_smart("~eror", 10).unwrap();
782 assert!(!results.is_empty());
784 }
785
786 #[test]
787 fn test_smart_search_standard() {
788 let temp_dir = TempDir::new().unwrap();
789 let index_path = setup_test_index(&temp_dir);
790
791 let searcher = Searcher::new(&index_path).unwrap();
792
793 let results = searcher.search_smart("error", 10).unwrap();
795 assert!(!results.is_empty());
796 }
797
798 #[test]
799 fn test_query_analyzer_prefix() {
800 let (qtype, query) = QueryAnalyzer::analyze("config*");
801 assert_eq!(qtype, QueryType::Prefix);
802 assert_eq!(query, "config");
803
804 let (qtype, query) = QueryAnalyzer::analyze("test*");
805 assert_eq!(qtype, QueryType::Prefix);
806 assert_eq!(query, "test");
807 }
808
809 #[test]
810 fn test_query_analyzer_fuzzy() {
811 let (qtype, query) = QueryAnalyzer::analyze("~databse");
812 assert_eq!(qtype, QueryType::Fuzzy);
813 assert_eq!(query, "databse");
814
815 let (qtype, query) = QueryAnalyzer::analyze("~eror");
816 assert_eq!(qtype, QueryType::Fuzzy);
817 assert_eq!(query, "eror");
818 }
819
820 #[test]
821 fn test_query_analyzer_standard() {
822 let (qtype, query) = QueryAnalyzer::analyze("error");
823 assert_eq!(qtype, QueryType::Standard);
824 assert_eq!(query, "error");
825
826 let (qtype, query) = QueryAnalyzer::analyze("hello world");
827 assert_eq!(qtype, QueryType::Standard);
828 assert_eq!(query, "hello world");
829 }
830
831 #[test]
832 fn test_query_analyzer_edge_cases() {
833 let (qtype, _) = QueryAnalyzer::analyze("*");
835 assert_eq!(qtype, QueryType::Standard);
836
837 let (qtype, _) = QueryAnalyzer::analyze("~");
839 assert_eq!(qtype, QueryType::Standard);
840
841 let (qtype, query) = QueryAnalyzer::analyze("");
843 assert_eq!(qtype, QueryType::Standard);
844 assert_eq!(query, "");
845
846 let (qtype, query) = QueryAnalyzer::analyze(" config* ");
848 assert_eq!(qtype, QueryType::Prefix);
849 assert_eq!(query, "config");
850 }
851
852 #[test]
853 fn test_query_analyzer_detect_type() {
854 assert_eq!(QueryAnalyzer::detect_type("config*"), QueryType::Prefix);
855 assert_eq!(QueryAnalyzer::detect_type("~databse"), QueryType::Fuzzy);
856 assert_eq!(QueryAnalyzer::detect_type("error"), QueryType::Standard);
857 }
858
859 #[test]
860 fn test_prefix_search_min_length() {
861 let temp_dir = TempDir::new().unwrap();
862 let index_path = setup_test_index(&temp_dir);
863
864 let searcher = Searcher::new(&index_path).unwrap();
865
866 let results = searcher.search_prefix("e", 10);
868 assert!(results.is_ok());
870 }
871}