1use crate::cache::storage::CacheStorage;
24use crate::docs::query::{DocQuery, ItemInfo};
25use crate::search::config::{DEFAULT_BUFFER_SIZE, MAX_BUFFER_SIZE, MAX_ITEMS_PER_CRATE};
26use anyhow::{Context, Result};
27use rustdoc_types::Crate;
28use std::path::{Path, PathBuf};
29use tantivy::{
30 Index, IndexWriter, TantivyDocument, doc,
31 schema::{FAST, Field, STORED, STRING, Schema, TEXT},
32};
33
34pub struct SearchIndexer {
36 index: Index,
37 fields: IndexFields,
38 writer: Option<IndexWriter>,
39 index_path: PathBuf,
40 member: Option<String>,
41}
42
43#[derive(Debug, Clone)]
44pub struct IndexFields {
45 name: Field,
46 docs: Field,
47 path: Field,
48 kind: Field,
49 crate_name: Field,
50 version: Field,
51 item_id: Field,
52 visibility: Field,
53 member: Field,
54}
55
56impl SearchIndexer {
57 pub fn new_for_crate(
59 crate_name: &str,
60 version: &str,
61 storage: &CacheStorage,
62 member: Option<&str>,
63 ) -> Result<Self> {
64 let index_path = storage.search_index_path(crate_name, version, member)?;
65
66 let mut indexer = Self::new_at_path(&index_path)?;
67 indexer.member = member.map(|s| s.to_string());
68 Ok(indexer)
69 }
70
71 pub fn new_at_path(index_path: &Path) -> Result<Self> {
73 let mut schema_builder = Schema::builder();
74
75 let name_field = schema_builder.add_text_field("name", TEXT | STORED);
77 let docs_field = schema_builder.add_text_field("docs", TEXT);
78 let path_field = schema_builder.add_text_field("path", TEXT | STORED);
79 let kind_field = schema_builder.add_text_field("kind", STRING | STORED);
80
81 let crate_field = schema_builder.add_text_field("crate", STRING | STORED);
83 let version_field = schema_builder.add_text_field("version", STRING | STORED);
84 let item_id_field = schema_builder.add_u64_field("item_id", FAST | STORED);
85 let visibility_field = schema_builder.add_text_field("visibility", TEXT | STORED);
86 let member_field = schema_builder.add_text_field("member", STRING | STORED);
87
88 let schema = schema_builder.build();
89
90 let fields = IndexFields {
91 name: name_field,
92 docs: docs_field,
93 path: path_field,
94 kind: kind_field,
95 crate_name: crate_field,
96 version: version_field,
97 item_id: item_id_field,
98 visibility: visibility_field,
99 member: member_field,
100 };
101
102 std::fs::create_dir_all(index_path).with_context(|| {
104 format!(
105 "Failed to create search index directory: {}",
106 index_path.display()
107 )
108 })?;
109
110 let index = match Index::open_in_dir(index_path) {
111 Ok(index) => index,
112 Err(_) => Index::create_in_dir(index_path, schema.clone()).with_context(|| {
113 format!("Failed to create search index at: {}", index_path.display())
114 })?,
115 };
116
117 Ok(Self {
118 index,
119 fields,
120 writer: None,
121 index_path: index_path.to_path_buf(),
122 member: None,
123 })
124 }
125
126 fn get_writer(&mut self) -> Result<&mut IndexWriter> {
128 if self.writer.is_none() {
129 let buffer_size = std::cmp::min(DEFAULT_BUFFER_SIZE, MAX_BUFFER_SIZE);
130 let writer = self.index.writer(buffer_size)?;
131 self.writer = Some(writer);
132 }
133 self.writer
134 .as_mut()
135 .ok_or_else(|| anyhow::anyhow!("IndexWriter not initialized"))
136 }
137
138 pub fn add_crate_items(
140 &mut self,
141 crate_name: &str,
142 version: &str,
143 crate_data: &Crate,
144 ) -> Result<()> {
145 let query = DocQuery::new(crate_data.clone());
146 let items = query.list_items(None); if items.len() > MAX_ITEMS_PER_CRATE {
150 return Err(anyhow::anyhow!(
151 "Crate has too many items ({}), max allowed: {}",
152 items.len(),
153 MAX_ITEMS_PER_CRATE
154 ));
155 }
156
157 self.add_items_to_index(crate_name, version, &items)?;
158 Ok(())
159 }
160
161 fn add_items_to_index(
163 &mut self,
164 crate_name: &str,
165 version: &str,
166 items: &[ItemInfo],
167 ) -> Result<()> {
168 let mut documents = Vec::new();
170 for item in items {
171 let doc = self.create_document_from_item(crate_name, version, item)?;
172 documents.push(doc);
173 }
174
175 let writer = self.get_writer()?;
177 for doc in documents {
178 writer.add_document(doc)?;
179 }
180
181 writer.commit()?;
182 Ok(())
183 }
184
185 fn create_document_from_item(
187 &self,
188 crate_name: &str,
189 version: &str,
190 item: &ItemInfo,
191 ) -> Result<TantivyDocument> {
192 let item_id: u64 = item
193 .id
194 .parse()
195 .with_context(|| format!("Failed to parse item ID: {}", item.id))?;
196
197 let path_str = item.path.join("::");
198 let docs_str = item.docs.clone().unwrap_or_default();
199
200 let mut doc = doc!(
201 self.fields.name => item.name.clone(),
202 self.fields.docs => docs_str,
203 self.fields.path => path_str,
204 self.fields.kind => item.kind.clone(),
205 self.fields.crate_name => crate_name.to_string(),
206 self.fields.version => version.to_string(),
207 self.fields.item_id => item_id,
208 self.fields.visibility => item.visibility.clone(),
209 );
210
211 if let Some(member_name) = &self.member {
213 doc.add_text(self.fields.member, member_name.clone());
214 }
215
216 Ok(doc)
217 }
218
219 pub fn has_documents(&self) -> Result<bool> {
221 let reader = self.index.reader()?;
222 let searcher = reader.searcher();
223 let count = searcher.num_docs();
224 Ok(count > 0)
225 }
226
227 pub fn get_index(&self) -> &Index {
229 &self.index
230 }
231
232 pub fn get_name_field(&self) -> Field {
234 self.fields.name
235 }
236
237 pub fn get_docs_field(&self) -> Field {
238 self.fields.docs
239 }
240
241 pub fn get_path_field(&self) -> Field {
242 self.fields.path
243 }
244
245 pub fn get_kind_field(&self) -> Field {
246 self.fields.kind
247 }
248
249 pub fn get_crate_name_field(&self) -> Field {
250 self.fields.crate_name
251 }
252
253 pub fn get_version_field(&self) -> Field {
254 self.fields.version
255 }
256
257 pub fn get_item_id_field(&self) -> Field {
258 self.fields.item_id
259 }
260
261 pub fn get_visibility_field(&self) -> Field {
262 self.fields.visibility
263 }
264
265 pub fn get_member_field(&self) -> Field {
266 self.fields.member
267 }
268}
269
270impl std::fmt::Debug for SearchIndexer {
271 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
272 f.debug_struct("SearchIndexer")
273 .field("index", &"<Index>")
274 .field("fields", &self.fields)
275 .field("writer", &self.writer.is_some())
276 .field("index_path", &self.index_path)
277 .field("member", &self.member)
278 .finish()
279 }
280}
281
282#[cfg(test)]
283mod tests {
284 use super::*;
285 use tempfile::TempDir;
286
287 #[test]
288 fn test_create_indexer() {
289 let temp_dir = TempDir::new().expect("Failed to create temporary directory for test");
290 let index_path = temp_dir.path().join("test_index");
291 let indexer = SearchIndexer::new_at_path(&index_path)
292 .expect("Failed to create search indexer for test");
293 assert!(
294 indexer
295 .get_index()
296 .searchable_segment_ids()
297 .expect("Failed to get searchable segment IDs")
298 .is_empty()
299 );
300 }
301
302 #[test]
303 fn test_crate_name_validation() {
304 let temp_dir = TempDir::new().expect("Failed to create temporary directory for test");
305 let storage = CacheStorage::new(Some(temp_dir.path().to_path_buf()))
306 .expect("Failed to create storage");
307 let indexer = SearchIndexer::new_for_crate("test-crate", "1.0.0", &storage, None)
308 .expect("Failed to create search indexer for test");
309
310 assert!(
313 indexer
314 .get_index()
315 .searchable_segment_ids()
316 .expect("Failed to get searchable segment IDs")
317 .is_empty()
318 );
319 }
320}