use std::path::{Path, PathBuf};
use anyhow::{Context, Result};
use tantivy::collector::TopDocs;
use tantivy::query::{BooleanQuery, FuzzyTermQuery, Occur, QueryParser, TermQuery};
use tantivy::schema::{IndexRecordOption, Value};
use tantivy::snippet::{Snippet, SnippetGenerator};
use tantivy::{Index, ReloadPolicy, Searcher as TantivySearcher, Term};
use tracing::{debug, info, warn};
use crate::indexer::{open_index, IndexSchema};
use xore_core::types::SearchResult;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum QueryType {
Standard,
Prefix,
Fuzzy,
}
#[derive(Debug, Clone)]
pub struct SearchConfig {
pub limit: usize,
pub snippet_max_length: usize,
pub enable_highlight: bool,
pub fuzzy_distance: u8,
pub min_prefix_length: usize,
}
impl Default for SearchConfig {
fn default() -> Self {
Self {
limit: 100,
snippet_max_length: 200,
enable_highlight: true,
fuzzy_distance: 2,
min_prefix_length: 2,
}
}
}
pub struct Searcher {
index: Index,
schema: IndexSchema,
reader: tantivy::IndexReader,
config: SearchConfig,
}
impl Searcher {
pub fn new(index_path: &Path) -> Result<Self> {
Self::with_config(index_path, SearchConfig::default())
}
pub fn with_config(index_path: &Path, config: SearchConfig) -> Result<Self> {
let (index, schema) = open_index(index_path).with_context(|| {
format!(
"无法打开搜索索引: {}\n💡 提示: 请先运行 'xore f --index' 建立索引,或使用 '--rebuild' 重建",
index_path.display()
)
})?;
let reader = index
.reader_builder()
.reload_policy(ReloadPolicy::OnCommitWithDelay)
.try_into()
.with_context(|| {
"无法创建索引读取器\n💡 提示: 索引可能已损坏,尝试运行 'xore f --rebuild' 重建"
})?;
Ok(Self { index, schema, reader, config })
}
pub fn search(&self, query_str: &str) -> Result<Vec<SearchResult>> {
self.search_with_limit(query_str, self.config.limit)
}
pub fn search_with_limit(&self, query_str: &str, limit: usize) -> Result<Vec<SearchResult>> {
info!("Searching for: {}", query_str);
let searcher = self.reader.searcher();
let query_parser = QueryParser::for_index(&self.index, vec![self.schema.content_field()]);
let query = query_parser.parse_query(query_str).with_context(|| {
format!(
"查询解析失败: '{}'\n💡 提示: 检查查询语法,特殊字符需要转义(如 +, -, :, *, ?)",
query_str
)
})?;
let top_docs = searcher
.search(&query, &TopDocs::with_limit(limit))
.with_context(|| format!("搜索执行失败: '{}'", query_str))?;
debug!("Found {} results", top_docs.len());
let snippet_generator = if self.config.enable_highlight {
Some(SnippetGenerator::create(&searcher, &query, self.schema.content_field())?)
} else {
None
};
let mut results = Vec::new();
for (score, doc_address) in top_docs {
if let Ok(doc) = searcher.doc(doc_address) {
let result =
self.doc_to_search_result(&doc, score, snippet_generator.as_ref(), &searcher)?;
results.push(result);
}
}
Ok(results)
}
pub fn search_with_filter(
&self,
query_str: &str,
file_type: Option<&str>,
limit: usize,
) -> Result<Vec<SearchResult>> {
info!("Searching for: {} with filter: {:?}", query_str, file_type);
let searcher = self.reader.searcher();
let query_parser = QueryParser::for_index(&self.index, vec![self.schema.content_field()]);
let content_query = query_parser.parse_query(query_str).with_context(|| {
format!("查询解析失败: '{}'\n💡 提示: 检查查询语法,特殊字符需要转义", query_str)
})?;
let final_query: Box<dyn tantivy::query::Query> = if let Some(ft) = file_type {
let type_term = Term::from_field_text(self.schema.file_type_field(), ft);
let type_query = TermQuery::new(type_term, IndexRecordOption::Basic);
Box::new(BooleanQuery::new(vec![
(Occur::Must, content_query),
(Occur::Must, Box::new(type_query)),
]))
} else {
content_query
};
let top_docs = searcher
.search(&*final_query, &TopDocs::with_limit(limit))
.with_context(|| format!("带过滤器的搜索执行失败: '{}'", query_str))?;
let snippet_generator = if self.config.enable_highlight {
let content_query = query_parser.parse_query(query_str)?;
Some(SnippetGenerator::create(&searcher, &content_query, self.schema.content_field())?)
} else {
None
};
let mut results = Vec::new();
for (score, doc_address) in top_docs {
if let Ok(doc) = searcher.doc(doc_address) {
let result =
self.doc_to_search_result(&doc, score, snippet_generator.as_ref(), &searcher)?;
results.push(result);
}
}
Ok(results)
}
pub fn search_prefix(&self, prefix: &str, limit: usize) -> Result<Vec<SearchResult>> {
info!("Prefix search for: {}", prefix);
if prefix.len() < self.config.min_prefix_length {
warn!(
"Prefix '{}' is too short (min: {}), using standard search",
prefix, self.config.min_prefix_length
);
return self.search_with_limit(prefix, limit);
}
let searcher = self.reader.searcher();
let query_parser = QueryParser::for_index(&self.index, vec![self.schema.content_field()]);
let prefix_query_str = format!("{}*", prefix);
let query = query_parser
.parse_query(&prefix_query_str)
.with_context(|| format!("Failed to parse prefix query: {}", prefix_query_str))?;
let top_docs = searcher
.search(&query, &TopDocs::with_limit(limit))
.with_context(|| format!("Prefix search failed for: {}", prefix))?;
debug!("Found {} results for prefix '{}'", top_docs.len(), prefix);
let snippet_generator = None;
let mut results = Vec::new();
for (score, doc_address) in top_docs {
if let Ok(doc) = searcher.doc(doc_address) {
let result =
self.doc_to_search_result(&doc, score, snippet_generator.as_ref(), &searcher)?;
results.push(result);
}
}
Ok(results)
}
pub fn search_fuzzy(&self, term: &str, limit: usize) -> Result<Vec<SearchResult>> {
info!("Fuzzy search for: {} (distance: {})", term, self.config.fuzzy_distance);
let searcher = self.reader.searcher();
let term_obj = Term::from_field_text(self.schema.content_field(), term);
let query = FuzzyTermQuery::new(term_obj, self.config.fuzzy_distance, true);
let top_docs = searcher
.search(&query, &TopDocs::with_limit(limit))
.with_context(|| format!("Fuzzy search failed for: {}", term))?;
debug!("Found {} results for fuzzy term '{}'", top_docs.len(), term);
let snippet_generator = None;
let mut results = Vec::new();
for (score, doc_address) in top_docs {
if let Ok(doc) = searcher.doc(doc_address) {
let result =
self.doc_to_search_result(&doc, score, snippet_generator.as_ref(), &searcher)?;
results.push(result);
}
}
Ok(results)
}
pub fn search_smart(&self, query_str: &str, limit: usize) -> Result<Vec<SearchResult>> {
let (query_type, cleaned_query) = QueryAnalyzer::analyze(query_str);
match query_type {
QueryType::Prefix => {
info!("Detected prefix query: {}", cleaned_query);
self.search_prefix(&cleaned_query, limit)
}
QueryType::Fuzzy => {
info!("Detected fuzzy query: {}", cleaned_query);
self.search_fuzzy(&cleaned_query, limit)
}
QueryType::Standard => {
info!("Using standard search: {}", cleaned_query);
self.search_with_limit(&cleaned_query, limit)
}
}
}
fn doc_to_search_result(
&self,
doc: &tantivy::TantivyDocument,
score: f32,
snippet_generator: Option<&SnippetGenerator>,
_searcher: &TantivySearcher,
) -> Result<SearchResult> {
let path = doc
.get_first(self.schema.path_field())
.and_then(|v| v.as_str())
.map(PathBuf::from)
.unwrap_or_default();
let content =
doc.get_first(self.schema.content_field()).and_then(|v| v.as_str()).unwrap_or("");
let snippet = if let Some(generator) = snippet_generator {
let snippet = generator.snippet(content);
Some(self.format_snippet(&snippet))
} else {
None
};
let (line, column) = self.extract_line_info(content, &snippet);
Ok(SearchResult { path, line, column, score, snippet })
}
fn format_snippet(&self, snippet: &Snippet) -> String {
snippet
.to_html()
.replace("<b>", "\x1b[1;33m") .replace("</b>", "\x1b[0m") }
fn extract_line_info(
&self,
content: &str,
snippet: &Option<String>,
) -> (Option<usize>, Option<usize>) {
if let Some(ref snip) = snippet {
let clean_snippet = snip.replace("\x1b[1;33m", "").replace("\x1b[0m", "");
if let Some(pos) = content.find(clean_snippet.trim()) {
let line_number = content[..pos].matches('\n').count() + 1;
let line_start = content[..pos].rfind('\n').map(|p| p + 1).unwrap_or(0);
let column = pos - line_start + 1;
return (Some(line_number), Some(column));
}
}
(None, None)
}
pub fn num_docs(&self) -> u64 {
self.reader.searcher().num_docs()
}
pub fn schema(&self) -> &IndexSchema {
&self.schema
}
}
impl Default for Searcher {
fn default() -> Self {
Self::new(Path::new(".xore/index")).expect("Failed to create default searcher")
}
}
pub struct SearchResultIter<'a> {
searcher: &'a Searcher,
query: String,
offset: usize,
batch_size: usize,
current_batch: Vec<SearchResult>,
current_index: usize,
exhausted: bool,
}
impl<'a> SearchResultIter<'a> {
pub fn new(searcher: &'a Searcher, query: &str, batch_size: usize) -> Self {
Self {
searcher,
query: query.to_string(),
offset: 0,
batch_size,
current_batch: Vec::new(),
current_index: 0,
exhausted: false,
}
}
}
impl<'a> Iterator for SearchResultIter<'a> {
type Item = SearchResult;
fn next(&mut self) -> Option<Self::Item> {
if self.exhausted {
return None;
}
if self.current_index >= self.current_batch.len() {
match self.searcher.search_with_limit(&self.query, self.offset + self.batch_size) {
Ok(results) => {
if results.len() <= self.offset {
self.exhausted = true;
return None;
}
self.current_batch = results.into_iter().skip(self.offset).collect();
self.offset += self.current_batch.len();
self.current_index = 0;
if self.current_batch.is_empty() {
self.exhausted = true;
return None;
}
}
Err(_) => {
self.exhausted = true;
return None;
}
}
}
let result = self.current_batch.get(self.current_index).cloned();
self.current_index += 1;
result
}
}
pub struct QueryAnalyzer;
impl QueryAnalyzer {
pub fn analyze(query_str: &str) -> (QueryType, String) {
let trimmed = query_str.trim();
if trimmed.ends_with('*') && trimmed.len() > 1 {
let prefix = trimmed[..trimmed.len() - 1].to_string();
return (QueryType::Prefix, prefix);
}
if trimmed.starts_with('~') && trimmed.len() > 1 {
let fuzzy_term = trimmed[1..].to_string();
return (QueryType::Fuzzy, fuzzy_term);
}
(QueryType::Standard, trimmed.to_string())
}
pub fn detect_type(query_str: &str) -> QueryType {
Self::analyze(query_str).0
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::indexer::IndexBuilder;
use crate::scanner::ScannedFile;
use std::fs::File;
use std::io::Write;
use std::time::SystemTime;
use tempfile::TempDir;
fn setup_test_index(temp_dir: &TempDir) -> PathBuf {
let index_path = temp_dir.path().join("test_index");
let files_dir = temp_dir.path().join("files");
std::fs::create_dir_all(&files_dir).unwrap();
let files = vec![
("error.log", "This is an error message\nAnother line with error\n"),
("chinese.txt", "这是一个错误日志\n数据处理完成\n"),
("mixed.txt", "Error 错误 processing data 数据处理\n"),
("hello.rs", "fn main() {\n println!(\"Hello, world!\");\n}\n"),
];
let mut scanned_files = Vec::new();
for (name, content) in files {
let path = files_dir.join(name);
let mut file = File::create(&path).unwrap();
file.write_all(content.as_bytes()).unwrap();
scanned_files.push(ScannedFile {
path,
size: content.len() as u64,
modified: Some(SystemTime::now()),
is_dir: false,
});
}
let mut builder = IndexBuilder::new(&index_path).unwrap();
builder.add_documents_batch(&scanned_files).unwrap();
builder.build().unwrap();
index_path
}
#[test]
fn test_search_english() {
let temp_dir = TempDir::new().unwrap();
let index_path = setup_test_index(&temp_dir);
let searcher = Searcher::new(&index_path).unwrap();
let results = searcher.search("error").unwrap();
assert!(!results.is_empty());
assert!(results.iter().any(|r| r.path.to_string_lossy().contains("error.log")));
}
#[test]
fn test_search_chinese() {
let temp_dir = TempDir::new().unwrap();
let index_path = setup_test_index(&temp_dir);
let searcher = Searcher::new(&index_path).unwrap();
let results = searcher.search("错误").unwrap();
assert!(!results.is_empty());
assert!(results.iter().any(|r| r.path.to_string_lossy().contains("chinese.txt")));
}
#[test]
fn test_search_mixed() {
let temp_dir = TempDir::new().unwrap();
let index_path = setup_test_index(&temp_dir);
let searcher = Searcher::new(&index_path).unwrap();
let results = searcher.search("数据").unwrap();
assert!(!results.is_empty());
}
#[test]
fn test_search_with_filter() {
let temp_dir = TempDir::new().unwrap();
let index_path = setup_test_index(&temp_dir);
let searcher = Searcher::new(&index_path).unwrap();
let results = searcher.search_with_filter("error", Some("log"), 100).unwrap();
assert!(results.iter().all(|r| r.path.extension().map(|e| e == "log").unwrap_or(false)));
}
#[test]
fn test_search_no_results() {
let temp_dir = TempDir::new().unwrap();
let index_path = setup_test_index(&temp_dir);
let searcher = Searcher::new(&index_path).unwrap();
let results = searcher.search("nonexistentterm12345").unwrap();
assert!(results.is_empty());
}
#[test]
fn test_search_score_ordering() {
let temp_dir = TempDir::new().unwrap();
let index_path = setup_test_index(&temp_dir);
let searcher = Searcher::new(&index_path).unwrap();
let results = searcher.search("error").unwrap();
for i in 1..results.len() {
assert!(results[i - 1].score >= results[i].score);
}
}
#[test]
fn test_num_docs() {
let temp_dir = TempDir::new().unwrap();
let index_path = setup_test_index(&temp_dir);
let searcher = Searcher::new(&index_path).unwrap();
assert_eq!(searcher.num_docs(), 4);
}
#[test]
fn test_snippet_generation() {
let temp_dir = TempDir::new().unwrap();
let index_path = setup_test_index(&temp_dir);
let searcher = Searcher::new(&index_path).unwrap();
let results = searcher.search("error").unwrap();
for result in &results {
assert!(result.snippet.is_some());
}
}
#[test]
fn test_phrase_search() {
let temp_dir = TempDir::new().unwrap();
let index_path = setup_test_index(&temp_dir);
let searcher = Searcher::new(&index_path).unwrap();
let results = searcher.search("\"Hello, world\"").unwrap();
assert!(!results.is_empty());
assert!(results.iter().any(|r| r.path.to_string_lossy().contains("hello.rs")));
}
#[test]
fn test_prefix_search() {
let temp_dir = TempDir::new().unwrap();
let index_path = setup_test_index(&temp_dir);
let searcher = Searcher::new(&index_path).unwrap();
let results = searcher.search_prefix("err", 10);
assert!(results.is_ok(), "Prefix search API should work without errors");
if let Ok(res) = results {
for r in &res {
assert!(r.path.exists() || !r.path.as_os_str().is_empty());
}
}
}
#[test]
fn test_fuzzy_search() {
let temp_dir = TempDir::new().unwrap();
let files_dir = temp_dir.path().join("files");
std::fs::create_dir_all(&files_dir).unwrap();
let files = vec![
("db.txt", "database connection\ndatabase query\n"),
("typo.txt", "databse error\n"), ];
let mut scanned_files = Vec::new();
for (name, content) in files {
let path = files_dir.join(name);
let mut file = File::create(&path).unwrap();
file.write_all(content.as_bytes()).unwrap();
scanned_files.push(ScannedFile {
path,
size: content.len() as u64,
modified: Some(SystemTime::now()),
is_dir: false,
});
}
let index_path = temp_dir.path().join("fuzzy_index");
let mut builder = IndexBuilder::new(&index_path).unwrap();
builder.add_documents_batch(&scanned_files).unwrap();
builder.build().unwrap();
let searcher = Searcher::new(&index_path).unwrap();
let results = searcher.search_fuzzy("databse", 10).unwrap();
assert!(!results.is_empty());
}
#[test]
fn test_smart_search_prefix() {
let temp_dir = TempDir::new().unwrap();
let index_path = setup_test_index(&temp_dir);
let searcher = Searcher::new(&index_path).unwrap();
let results = searcher.search_smart("err*", 10);
assert!(results.is_ok(), "Smart search with prefix should work");
}
#[test]
fn test_smart_search_fuzzy() {
let temp_dir = TempDir::new().unwrap();
let index_path = setup_test_index(&temp_dir);
let searcher = Searcher::new(&index_path).unwrap();
let results = searcher.search_smart("~eror", 10).unwrap();
assert!(!results.is_empty());
}
#[test]
fn test_smart_search_standard() {
let temp_dir = TempDir::new().unwrap();
let index_path = setup_test_index(&temp_dir);
let searcher = Searcher::new(&index_path).unwrap();
let results = searcher.search_smart("error", 10).unwrap();
assert!(!results.is_empty());
}
#[test]
fn test_query_analyzer_prefix() {
let (qtype, query) = QueryAnalyzer::analyze("config*");
assert_eq!(qtype, QueryType::Prefix);
assert_eq!(query, "config");
let (qtype, query) = QueryAnalyzer::analyze("test*");
assert_eq!(qtype, QueryType::Prefix);
assert_eq!(query, "test");
}
#[test]
fn test_query_analyzer_fuzzy() {
let (qtype, query) = QueryAnalyzer::analyze("~databse");
assert_eq!(qtype, QueryType::Fuzzy);
assert_eq!(query, "databse");
let (qtype, query) = QueryAnalyzer::analyze("~eror");
assert_eq!(qtype, QueryType::Fuzzy);
assert_eq!(query, "eror");
}
#[test]
fn test_query_analyzer_standard() {
let (qtype, query) = QueryAnalyzer::analyze("error");
assert_eq!(qtype, QueryType::Standard);
assert_eq!(query, "error");
let (qtype, query) = QueryAnalyzer::analyze("hello world");
assert_eq!(qtype, QueryType::Standard);
assert_eq!(query, "hello world");
}
#[test]
fn test_query_analyzer_edge_cases() {
let (qtype, _) = QueryAnalyzer::analyze("*");
assert_eq!(qtype, QueryType::Standard);
let (qtype, _) = QueryAnalyzer::analyze("~");
assert_eq!(qtype, QueryType::Standard);
let (qtype, query) = QueryAnalyzer::analyze("");
assert_eq!(qtype, QueryType::Standard);
assert_eq!(query, "");
let (qtype, query) = QueryAnalyzer::analyze(" config* ");
assert_eq!(qtype, QueryType::Prefix);
assert_eq!(query, "config");
}
#[test]
fn test_query_analyzer_detect_type() {
assert_eq!(QueryAnalyzer::detect_type("config*"), QueryType::Prefix);
assert_eq!(QueryAnalyzer::detect_type("~databse"), QueryType::Fuzzy);
assert_eq!(QueryAnalyzer::detect_type("error"), QueryType::Standard);
}
#[test]
fn test_prefix_search_min_length() {
let temp_dir = TempDir::new().unwrap();
let index_path = setup_test_index(&temp_dir);
let searcher = Searcher::new(&index_path).unwrap();
let results = searcher.search_prefix("e", 10);
assert!(results.is_ok());
}
}