1use std::path::Path;
2
3use dk_core::{Error, RepoId, Symbol, SymbolId};
4use tantivy::collector::TopDocs;
5use tantivy::query::{BooleanQuery, Occur, QueryParser, TermQuery};
6use tantivy::schema::*;
7use tantivy::{Directory, Index, IndexReader, IndexWriter, ReloadPolicy, TantivyDocument};
8use uuid::Uuid;
9
10pub struct SearchIndex {
16 index: Index,
17 reader: IndexReader,
18 writer: IndexWriter,
19 f_symbol_id: Field,
21 f_repo_id: Field,
22 f_name: Field,
23 f_qualified_name: Field,
24 f_signature: Field,
25 f_doc_comment: Field,
26 f_file_path: Field,
27 f_kind: Field,
28}
29
30impl SearchIndex {
31 pub fn open(path: &Path) -> dk_core::Result<Self> {
43 let mut schema_builder = Schema::builder();
44
45 let f_symbol_id = schema_builder.add_text_field("symbol_id", STRING | STORED);
46 let f_repo_id = schema_builder.add_text_field("repo_id", STRING);
47 let f_name = schema_builder.add_text_field("name", TEXT);
48 let f_qualified_name = schema_builder.add_text_field("qualified_name", TEXT);
49 let f_signature = schema_builder.add_text_field("signature", TEXT);
50 let f_doc_comment = schema_builder.add_text_field("doc_comment", TEXT);
51 let f_file_path = schema_builder.add_text_field("file_path", TEXT);
52 let f_kind = schema_builder.add_text_field("kind", STRING);
53
54 let schema = schema_builder.build();
55
56 let dir: Box<dyn Directory> = if path.exists() && path.join("meta.json").exists() {
57 Box::new(
58 tantivy::directory::MmapDirectory::open(path)
59 .map_err(|e| Error::Internal(format!("Failed to open index directory: {e}")))?,
60 )
61 } else {
62 std::fs::create_dir_all(path)?;
63 Box::new(
64 tantivy::directory::MmapDirectory::open(path)
65 .map_err(|e| Error::Internal(format!("Failed to open index directory: {e}")))?,
66 )
67 };
68
69 let index = Index::open_or_create(dir, schema.clone())
70 .map_err(|e| Error::Internal(format!("Failed to open or create index: {e}")))?;
71
72 let reader = index
73 .reader_builder()
74 .reload_policy(ReloadPolicy::OnCommitWithDelay)
75 .try_into()
76 .map_err(|e| Error::Internal(format!("Failed to create index reader: {e}")))?;
77
78 let writer = index
79 .writer(50_000_000) .map_err(|e| Error::Internal(format!("Failed to create index writer: {e}")))?;
81
82 Ok(Self {
83 index,
84 reader,
85 writer,
86 f_symbol_id,
87 f_repo_id,
88 f_name,
89 f_qualified_name,
90 f_signature,
91 f_doc_comment,
92 f_file_path,
93 f_kind,
94 })
95 }
96
97 pub fn index_symbol(&mut self, repo_id: RepoId, sym: &Symbol) -> dk_core::Result<()> {
99 let mut doc = TantivyDocument::new();
100 doc.add_text(self.f_symbol_id, sym.id.to_string());
101 doc.add_text(self.f_repo_id, repo_id.to_string());
102 doc.add_text(self.f_name, &sym.name);
103 doc.add_text(self.f_qualified_name, &sym.qualified_name);
104 if let Some(ref sig) = sym.signature {
105 doc.add_text(self.f_signature, sig);
106 }
107 if let Some(ref doc_comment) = sym.doc_comment {
108 doc.add_text(self.f_doc_comment, doc_comment);
109 }
110 doc.add_text(self.f_file_path, sym.file_path.to_string_lossy().as_ref());
111 doc.add_text(self.f_kind, sym.kind.to_string());
112
113 self.writer
114 .add_document(doc)
115 .map_err(|e| Error::Internal(format!("Failed to add document: {e}")))?;
116
117 Ok(())
118 }
119
120 pub fn remove_symbol(&mut self, symbol_id: SymbolId) -> dk_core::Result<()> {
122 let term = tantivy::Term::from_field_text(self.f_symbol_id, &symbol_id.to_string());
123 self.writer.delete_term(term);
124 Ok(())
125 }
126
127 pub fn delete_by_repo(&mut self, repo_id: RepoId) -> dk_core::Result<()> {
132 let term = tantivy::Term::from_field_text(self.f_repo_id, &repo_id.to_string());
133 self.writer.delete_term(term);
134 Ok(())
135 }
136
137 pub fn commit(&mut self) -> dk_core::Result<()> {
140 self.writer
141 .commit()
142 .map_err(|e| Error::Internal(format!("Failed to commit index: {e}")))?;
143
144 self.reader
146 .reload()
147 .map_err(|e| Error::Internal(format!("Failed to reload reader: {e}")))?;
148
149 Ok(())
150 }
151
152 pub fn search(
156 &self,
157 repo_id: RepoId,
158 query: &str,
159 limit: usize,
160 ) -> dk_core::Result<Vec<SymbolId>> {
161 let searcher = self.reader.searcher();
162
163 let repo_term =
165 tantivy::Term::from_field_text(self.f_repo_id, &repo_id.to_string());
166 let repo_query = TermQuery::new(repo_term, IndexRecordOption::Basic);
167
168 let text_fields = vec![
170 self.f_name,
171 self.f_qualified_name,
172 self.f_signature,
173 self.f_doc_comment,
174 self.f_file_path,
175 ];
176 let query_parser = QueryParser::for_index(&self.index, text_fields);
177 let text_query = query_parser
178 .parse_query(query)
179 .map_err(|e| Error::Internal(format!("Failed to parse query: {e}")))?;
180
181 let combined = BooleanQuery::new(vec![
183 (Occur::Must, Box::new(repo_query)),
184 (Occur::Must, text_query),
185 ]);
186
187 let top_docs = searcher
188 .search(&combined, &TopDocs::with_limit(limit))
189 .map_err(|e| Error::Internal(format!("Search failed: {e}")))?;
190
191 let mut results = Vec::with_capacity(top_docs.len());
192 for (_score, doc_address) in top_docs {
193 let doc: TantivyDocument = searcher
194 .doc(doc_address)
195 .map_err(|e| Error::Internal(format!("Failed to retrieve doc: {e}")))?;
196
197 if let Some(id_value) = doc.get_first(self.f_symbol_id) {
198 if let Some(id_str) = id_value.as_str() {
199 let uuid = Uuid::parse_str(id_str).map_err(|e| {
200 Error::Internal(format!("Invalid UUID in index: {e}"))
201 })?;
202 results.push(uuid);
203 }
204 }
205 }
206
207 Ok(results)
208 }
209}