use crate::cache::storage::CacheStorage;
use crate::docs::query::{DocQuery, ItemInfo};
use crate::search::config::{DEFAULT_BUFFER_SIZE, MAX_BUFFER_SIZE, MAX_ITEMS_PER_CRATE};
use anyhow::{Context, Result};
use rustdoc_types::Crate;
use std::path::{Path, PathBuf};
use tantivy::{
Index, IndexWriter, TantivyDocument, doc,
schema::{FAST, Field, STORED, STRING, Schema, TEXT},
};
pub struct SearchIndexer {
index: Index,
fields: IndexFields,
writer: Option<IndexWriter>,
index_path: PathBuf,
member: Option<String>,
}
#[derive(Debug, Clone)]
pub struct IndexFields {
name: Field,
docs: Field,
path: Field,
kind: Field,
crate_name: Field,
version: Field,
item_id: Field,
visibility: Field,
member: Field,
}
impl SearchIndexer {
pub fn new_for_crate(
crate_name: &str,
version: &str,
storage: &CacheStorage,
member: Option<&str>,
) -> Result<Self> {
let index_path = storage.search_index_path(crate_name, version, member)?;
let mut indexer = Self::new_at_path(&index_path)?;
indexer.member = member.map(|s| s.to_string());
Ok(indexer)
}
pub fn new_at_path(index_path: &Path) -> Result<Self> {
let mut schema_builder = Schema::builder();
let name_field = schema_builder.add_text_field("name", TEXT | STORED);
let docs_field = schema_builder.add_text_field("docs", TEXT);
let path_field = schema_builder.add_text_field("path", TEXT | STORED);
let kind_field = schema_builder.add_text_field("kind", STRING | STORED);
let crate_field = schema_builder.add_text_field("crate", STRING | STORED);
let version_field = schema_builder.add_text_field("version", STRING | STORED);
let item_id_field = schema_builder.add_u64_field("item_id", FAST | STORED);
let visibility_field = schema_builder.add_text_field("visibility", TEXT | STORED);
let member_field = schema_builder.add_text_field("member", STRING | STORED);
let schema = schema_builder.build();
let fields = IndexFields {
name: name_field,
docs: docs_field,
path: path_field,
kind: kind_field,
crate_name: crate_field,
version: version_field,
item_id: item_id_field,
visibility: visibility_field,
member: member_field,
};
std::fs::create_dir_all(index_path).with_context(|| {
format!(
"Failed to create search index directory: {}",
index_path.display()
)
})?;
let index = match Index::open_in_dir(index_path) {
Ok(index) => index,
Err(_) => Index::create_in_dir(index_path, schema.clone()).with_context(|| {
format!("Failed to create search index at: {}", index_path.display())
})?,
};
Ok(Self {
index,
fields,
writer: None,
index_path: index_path.to_path_buf(),
member: None,
})
}
fn get_writer(&mut self) -> Result<&mut IndexWriter> {
if self.writer.is_none() {
let buffer_size = std::cmp::min(DEFAULT_BUFFER_SIZE, MAX_BUFFER_SIZE);
let writer = self.index.writer(buffer_size)?;
self.writer = Some(writer);
}
self.writer
.as_mut()
.ok_or_else(|| anyhow::anyhow!("IndexWriter not initialized"))
}
pub fn add_crate_items(
&mut self,
crate_name: &str,
version: &str,
crate_data: &Crate,
) -> Result<()> {
let query = DocQuery::new(crate_data.clone());
let items = query.list_items(None);
if items.len() > MAX_ITEMS_PER_CRATE {
return Err(anyhow::anyhow!(
"Crate has too many items ({}), max allowed: {}",
items.len(),
MAX_ITEMS_PER_CRATE
));
}
self.add_items_to_index(crate_name, version, &items)?;
Ok(())
}
fn add_items_to_index(
&mut self,
crate_name: &str,
version: &str,
items: &[ItemInfo],
) -> Result<()> {
let mut documents = Vec::new();
for item in items {
let doc = self.create_document_from_item(crate_name, version, item)?;
documents.push(doc);
}
let writer = self.get_writer()?;
for doc in documents {
writer.add_document(doc)?;
}
writer.commit()?;
Ok(())
}
fn create_document_from_item(
&self,
crate_name: &str,
version: &str,
item: &ItemInfo,
) -> Result<TantivyDocument> {
let item_id: u64 = item
.id
.parse()
.with_context(|| format!("Failed to parse item ID: {}", item.id))?;
let path_str = item.path.join("::");
let docs_str = item.docs.clone().unwrap_or_default();
let mut doc = doc!(
self.fields.name => item.name.clone(),
self.fields.docs => docs_str,
self.fields.path => path_str,
self.fields.kind => item.kind.clone(),
self.fields.crate_name => crate_name.to_string(),
self.fields.version => version.to_string(),
self.fields.item_id => item_id,
self.fields.visibility => item.visibility.clone(),
);
if let Some(member_name) = &self.member {
doc.add_text(self.fields.member, member_name.clone());
}
Ok(doc)
}
pub fn has_documents(&self) -> Result<bool> {
let reader = self.index.reader()?;
let searcher = reader.searcher();
let count = searcher.num_docs();
Ok(count > 0)
}
pub fn get_index(&self) -> &Index {
&self.index
}
pub fn get_name_field(&self) -> Field {
self.fields.name
}
pub fn get_docs_field(&self) -> Field {
self.fields.docs
}
pub fn get_path_field(&self) -> Field {
self.fields.path
}
pub fn get_kind_field(&self) -> Field {
self.fields.kind
}
pub fn get_crate_name_field(&self) -> Field {
self.fields.crate_name
}
pub fn get_version_field(&self) -> Field {
self.fields.version
}
pub fn get_item_id_field(&self) -> Field {
self.fields.item_id
}
pub fn get_visibility_field(&self) -> Field {
self.fields.visibility
}
pub fn get_member_field(&self) -> Field {
self.fields.member
}
}
impl std::fmt::Debug for SearchIndexer {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SearchIndexer")
.field("index", &"<Index>")
.field("fields", &self.fields)
.field("writer", &self.writer.is_some())
.field("index_path", &self.index_path)
.field("member", &self.member)
.finish()
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_create_indexer() {
let temp_dir = TempDir::new().expect("Failed to create temporary directory for test");
let index_path = temp_dir.path().join("test_index");
let indexer = SearchIndexer::new_at_path(&index_path)
.expect("Failed to create search indexer for test");
assert!(
indexer
.get_index()
.searchable_segment_ids()
.expect("Failed to get searchable segment IDs")
.is_empty()
);
}
#[test]
fn test_crate_name_validation() {
let temp_dir = TempDir::new().expect("Failed to create temporary directory for test");
let storage = CacheStorage::new(Some(temp_dir.path().to_path_buf()))
.expect("Failed to create storage");
let indexer = SearchIndexer::new_for_crate("test-crate", "1.0.0", &storage, None)
.expect("Failed to create search indexer for test");
assert!(
indexer
.get_index()
.searchable_segment_ids()
.expect("Failed to get searchable segment IDs")
.is_empty()
);
}
}