rust_docs_mcp/search/
indexer.rs

1//! # Search Indexer Module
2//!
3//! Provides Tantivy-based indexing for Rust documentation search.
4//!
5//! ## Key Components
6//! - [`SearchIndexer`] - Main indexer for creating and managing search indices
7//! - [`IndexFields`] - Schema definition for indexed fields
8//!
9//! ## Example
10//! ```no_run
11//! # use std::path::Path;
12//! # use anyhow::Result;
13//! # use rust_docs_mcp::search::indexer::SearchIndexer;
14//! # use rust_docs_mcp::cache::storage::CacheStorage;
15//! # fn main() -> Result<()> {
16//! let storage = CacheStorage::new(None)?;
17//! let mut indexer = SearchIndexer::new_for_crate("tokio", "1.35.0", &storage, None)?;
18//! // Add crate items to index
19//! # Ok(())
20//! # }
21//! ```
22
23use crate::cache::storage::CacheStorage;
24use crate::docs::query::{DocQuery, ItemInfo};
25use crate::search::config::{DEFAULT_BUFFER_SIZE, MAX_BUFFER_SIZE, MAX_ITEMS_PER_CRATE};
26use anyhow::{Context, Result};
27use rustdoc_types::Crate;
28use std::path::{Path, PathBuf};
29use tantivy::{
30    Index, IndexWriter, TantivyDocument, doc,
31    schema::{FAST, Field, STORED, STRING, Schema, TEXT},
32};
33
34/// Tantivy-based search indexer for Rust documentation
35pub struct SearchIndexer {
36    index: Index,
37    fields: IndexFields,
38    writer: Option<IndexWriter>,
39    index_path: PathBuf,
40    member: Option<String>,
41}
42
43#[derive(Debug, Clone)]
44pub struct IndexFields {
45    name: Field,
46    docs: Field,
47    path: Field,
48    kind: Field,
49    crate_name: Field,
50    version: Field,
51    item_id: Field,
52    visibility: Field,
53    member: Field,
54}
55
56impl SearchIndexer {
57    /// Create a new search indexer instance for a specific crate
58    pub fn new_for_crate(
59        crate_name: &str,
60        version: &str,
61        storage: &CacheStorage,
62        member: Option<&str>,
63    ) -> Result<Self> {
64        let index_path = storage.search_index_path(crate_name, version, member)?;
65
66        let mut indexer = Self::new_at_path(&index_path)?;
67        indexer.member = member.map(|s| s.to_string());
68        Ok(indexer)
69    }
70
71    /// Create a new search indexer instance at a specific path
72    pub fn new_at_path(index_path: &Path) -> Result<Self> {
73        let mut schema_builder = Schema::builder();
74
75        // Searchable fields
76        let name_field = schema_builder.add_text_field("name", TEXT | STORED);
77        let docs_field = schema_builder.add_text_field("docs", TEXT);
78        let path_field = schema_builder.add_text_field("path", TEXT | STORED);
79        let kind_field = schema_builder.add_text_field("kind", STRING | STORED);
80
81        // Metadata fields
82        let crate_field = schema_builder.add_text_field("crate", STRING | STORED);
83        let version_field = schema_builder.add_text_field("version", STRING | STORED);
84        let item_id_field = schema_builder.add_u64_field("item_id", FAST | STORED);
85        let visibility_field = schema_builder.add_text_field("visibility", TEXT | STORED);
86        let member_field = schema_builder.add_text_field("member", STRING | STORED);
87
88        let schema = schema_builder.build();
89
90        let fields = IndexFields {
91            name: name_field,
92            docs: docs_field,
93            path: path_field,
94            kind: kind_field,
95            crate_name: crate_field,
96            version: version_field,
97            item_id: item_id_field,
98            visibility: visibility_field,
99            member: member_field,
100        };
101
102        // Create index directory
103        std::fs::create_dir_all(index_path).with_context(|| {
104            format!(
105                "Failed to create search index directory: {}",
106                index_path.display()
107            )
108        })?;
109
110        let index = match Index::open_in_dir(index_path) {
111            Ok(index) => index,
112            Err(_) => Index::create_in_dir(index_path, schema.clone()).with_context(|| {
113                format!("Failed to create search index at: {}", index_path.display())
114            })?,
115        };
116
117        Ok(Self {
118            index,
119            fields,
120            writer: None,
121            index_path: index_path.to_path_buf(),
122            member: None,
123        })
124    }
125
126    /// Get or create an IndexWriter with proper buffer size
127    fn get_writer(&mut self) -> Result<&mut IndexWriter> {
128        if self.writer.is_none() {
129            let buffer_size = std::cmp::min(DEFAULT_BUFFER_SIZE, MAX_BUFFER_SIZE);
130            let writer = self.index.writer(buffer_size)?;
131            self.writer = Some(writer);
132        }
133        self.writer
134            .as_mut()
135            .ok_or_else(|| anyhow::anyhow!("IndexWriter not initialized"))
136    }
137
138    /// Add crate items to the search index
139    pub fn add_crate_items(
140        &mut self,
141        crate_name: &str,
142        version: &str,
143        crate_data: &Crate,
144    ) -> Result<()> {
145        let query = DocQuery::new(crate_data.clone());
146        let items = query.list_items(None); // Get all items without filtering
147
148        // Limit number of items to prevent resource exhaustion
149        if items.len() > MAX_ITEMS_PER_CRATE {
150            return Err(anyhow::anyhow!(
151                "Crate has too many items ({}), max allowed: {}",
152                items.len(),
153                MAX_ITEMS_PER_CRATE
154            ));
155        }
156
157        self.add_items_to_index(crate_name, version, &items)?;
158        Ok(())
159    }
160
161    /// Add items to the search index
162    fn add_items_to_index(
163        &mut self,
164        crate_name: &str,
165        version: &str,
166        items: &[ItemInfo],
167    ) -> Result<()> {
168        // Create all documents first
169        let mut documents = Vec::new();
170        for item in items {
171            let doc = self.create_document_from_item(crate_name, version, item)?;
172            documents.push(doc);
173        }
174
175        // Then add all documents to the writer
176        let writer = self.get_writer()?;
177        for doc in documents {
178            writer.add_document(doc)?;
179        }
180
181        writer.commit()?;
182        Ok(())
183    }
184
185    /// Create a Tantivy document from an ItemInfo
186    fn create_document_from_item(
187        &self,
188        crate_name: &str,
189        version: &str,
190        item: &ItemInfo,
191    ) -> Result<TantivyDocument> {
192        let item_id: u64 = item
193            .id
194            .parse()
195            .with_context(|| format!("Failed to parse item ID: {}", item.id))?;
196
197        let path_str = item.path.join("::");
198        let docs_str = item.docs.clone().unwrap_or_default();
199
200        let mut doc = doc!(
201            self.fields.name => item.name.clone(),
202            self.fields.docs => docs_str,
203            self.fields.path => path_str,
204            self.fields.kind => item.kind.clone(),
205            self.fields.crate_name => crate_name.to_string(),
206            self.fields.version => version.to_string(),
207            self.fields.item_id => item_id,
208            self.fields.visibility => item.visibility.clone(),
209        );
210
211        // Add member field if present
212        if let Some(member_name) = &self.member {
213            doc.add_text(self.fields.member, member_name.clone());
214        }
215
216        Ok(doc)
217    }
218
219    /// Check if the index has any documents
220    pub fn has_documents(&self) -> Result<bool> {
221        let reader = self.index.reader()?;
222        let searcher = reader.searcher();
223        let count = searcher.num_docs();
224        Ok(count > 0)
225    }
226
227    /// Get the underlying Tantivy index
228    pub fn get_index(&self) -> &Index {
229        &self.index
230    }
231
232    /// Get a specific field by name for external access
233    pub fn get_name_field(&self) -> Field {
234        self.fields.name
235    }
236
237    pub fn get_docs_field(&self) -> Field {
238        self.fields.docs
239    }
240
241    pub fn get_path_field(&self) -> Field {
242        self.fields.path
243    }
244
245    pub fn get_kind_field(&self) -> Field {
246        self.fields.kind
247    }
248
249    pub fn get_crate_name_field(&self) -> Field {
250        self.fields.crate_name
251    }
252
253    pub fn get_version_field(&self) -> Field {
254        self.fields.version
255    }
256
257    pub fn get_item_id_field(&self) -> Field {
258        self.fields.item_id
259    }
260
261    pub fn get_visibility_field(&self) -> Field {
262        self.fields.visibility
263    }
264
265    pub fn get_member_field(&self) -> Field {
266        self.fields.member
267    }
268}
269
270impl std::fmt::Debug for SearchIndexer {
271    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
272        f.debug_struct("SearchIndexer")
273            .field("index", &"<Index>")
274            .field("fields", &self.fields)
275            .field("writer", &self.writer.is_some())
276            .field("index_path", &self.index_path)
277            .field("member", &self.member)
278            .finish()
279    }
280}
281
282#[cfg(test)]
283mod tests {
284    use super::*;
285    use tempfile::TempDir;
286
287    #[test]
288    fn test_create_indexer() {
289        let temp_dir = TempDir::new().expect("Failed to create temporary directory for test");
290        let index_path = temp_dir.path().join("test_index");
291        let indexer = SearchIndexer::new_at_path(&index_path)
292            .expect("Failed to create search indexer for test");
293        assert!(
294            indexer
295                .get_index()
296                .searchable_segment_ids()
297                .expect("Failed to get searchable segment IDs")
298                .is_empty()
299        );
300    }
301
302    #[test]
303    fn test_crate_name_validation() {
304        let temp_dir = TempDir::new().expect("Failed to create temporary directory for test");
305        let storage = CacheStorage::new(Some(temp_dir.path().to_path_buf()))
306            .expect("Failed to create storage");
307        let indexer = SearchIndexer::new_for_crate("test-crate", "1.0.0", &storage, None)
308            .expect("Failed to create search indexer for test");
309
310        // The add_crate_items method is tested integration-wise since it requires a real Crate
311        // Here we just test that the indexer can be created successfully
312        assert!(
313            indexer
314                .get_index()
315                .searchable_segment_ids()
316                .expect("Failed to get searchable segment IDs")
317                .is_empty()
318        );
319    }
320}