1use crate::core::config::Config;
2use crate::core::error::{Error, Result};
3use crate::index::schema::IndexSchema;
4use crate::search::SearchResult;
5use std::path::Path;
6use tantivy::collector::TopDocs;
7use tantivy::query::{BooleanQuery, BoostQuery, Occur, Query, TermQuery};
8use tantivy::schema::{IndexRecordOption, Value};
9use tantivy::{Index, IndexReader, ReloadPolicy, Term};
10
11#[derive(Clone)]
12pub struct IndexSearcher {
13 reader: IndexReader,
14 schema: IndexSchema,
15 index: Index,
16}
17
18impl IndexSearcher {
19 pub fn open(project_path: &Path) -> Result<Self> {
21 let index_dir = Config::index_dir(project_path)?;
22
23 if !index_dir.join("meta.json").exists() {
24 return Err(Error::IndexNotFound {
25 path: project_path.to_path_buf(),
26 });
27 }
28
29 let schema = IndexSchema::new();
30 let index = Index::open_in_dir(&index_dir).map_err(|e| Error::IndexError {
31 message: e.to_string(),
32 })?;
33
34 let reader = index
35 .reader_builder()
36 .reload_policy(ReloadPolicy::Manual)
37 .try_into()
38 .map_err(|e| Error::IndexError {
39 message: format!("Failed to create reader: {}", e),
40 })?;
41
42 Ok(Self {
43 reader,
44 schema,
45 index,
46 })
47 }
48
49 pub fn exists(project_path: &Path) -> Result<bool> {
51 let index_dir = Config::index_dir(project_path)?;
52 Ok(index_dir.join("meta.json").exists())
53 }
54
55 pub fn search(&self, query_text: &str, limit: usize) -> Result<Vec<SearchResult>> {
57 let searcher = self.reader.searcher();
58
59 let mut tokenizer = self
61 .index
62 .tokenizer_for_field(self.schema.content)
63 .map_err(|e| Error::SearchError {
64 message: e.to_string(),
65 })?;
66
67 let mut tokens = Vec::new();
68 let mut stream = tokenizer.token_stream(query_text);
69 while let Some(token) = stream.next() {
70 tokens.push(token.text.to_string());
71 }
72
73 if tokens.is_empty() {
74 return Ok(Vec::new());
75 }
76
77 let mut subqueries: Vec<(Occur, Box<dyn Query>)> = Vec::new();
79
80 for token in &tokens {
81 let content_term = Term::from_field_text(self.schema.content, token);
83 let content_query = TermQuery::new(content_term, IndexRecordOption::WithFreqs);
84 subqueries.push((Occur::Should, Box::new(content_query)));
85
86 let symbol_term = Term::from_field_text(self.schema.symbol_name, token);
88 let symbol_query = TermQuery::new(symbol_term, IndexRecordOption::WithFreqs);
89 let boosted = BoostQuery::new(Box::new(symbol_query), 3.0);
90 subqueries.push((Occur::Should, Box::new(boosted)));
91 }
92
93 let query = BooleanQuery::new(subqueries);
94
95 let top_docs = searcher
97 .search(&query, &TopDocs::with_limit(limit))
98 .map_err(|e| Error::SearchError {
99 message: e.to_string(),
100 })?;
101
102 let mut results = Vec::new();
104
105 for (score, doc_address) in top_docs {
106 let doc: tantivy::TantivyDocument =
107 searcher.doc(doc_address).map_err(|e| Error::SearchError {
108 message: e.to_string(),
109 })?;
110
111 let path = doc
112 .get_first(self.schema.path)
113 .and_then(|v| v.as_str())
114 .unwrap_or("")
115 .to_string();
116
117 let content = doc
118 .get_first(self.schema.content)
119 .and_then(|v| v.as_str())
120 .unwrap_or("")
121 .to_string();
122
123 let symbol_name = doc
124 .get_first(self.schema.symbol_name)
125 .and_then(|v| v.as_str())
126 .filter(|s| !s.is_empty())
127 .map(String::from);
128
129 let symbol_type = doc
130 .get_first(self.schema.symbol_type)
131 .and_then(|v| v.as_str())
132 .filter(|s| !s.is_empty())
133 .map(String::from);
134
135 let start_line = doc
136 .get_first(self.schema.start_line)
137 .and_then(|v| v.as_u64())
138 .unwrap_or(0) as usize;
139
140 let end_line = doc
141 .get_first(self.schema.end_line)
142 .and_then(|v| v.as_u64())
143 .unwrap_or(0) as usize;
144
145 let language = doc
146 .get_first(self.schema.language)
147 .and_then(|v| v.as_str())
148 .unwrap_or("unknown")
149 .to_string();
150
151 results.push(SearchResult {
152 path,
153 content,
154 symbol_name,
155 symbol_type,
156 start_line,
157 end_line,
158 language,
159 score,
160 });
161 }
162
163 Ok(results)
164 }
165}