pub mod config;
use std::sync::Arc;
use crate::analysis::analyzer::analyzer::Analyzer;
use crate::error::Result;
use crate::lexical::core::document::Document;
use crate::lexical::index::LexicalIndex;
use crate::lexical::index::factory::LexicalIndexFactory;
use crate::lexical::index::inverted::InvertedIndexStats;
use crate::lexical::query::LexicalSearchResults;
use crate::lexical::search::searcher::{LexicalSearchRequest, LexicalSearcher};
use crate::lexical::store::config::LexicalIndexConfig;
use crate::lexical::writer::LexicalIndexWriter;
use crate::storage::Storage;
use parking_lot::Mutex;
use parking_lot::RwLock;
pub struct LexicalStore {
index: Box<dyn LexicalIndex>,
writer_cache: Mutex<Option<Box<dyn LexicalIndexWriter>>>,
searcher_cache: RwLock<Option<Box<dyn LexicalSearcher>>>,
}
impl std::fmt::Debug for LexicalStore {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("LexicalStore")
.field("index", &self.index)
.finish()
}
}
impl LexicalStore {
pub fn new(storage: Arc<dyn Storage>, config: LexicalIndexConfig) -> Result<Self> {
let index = LexicalIndexFactory::open_or_create(storage, config)?;
Ok(Self {
index,
writer_cache: Mutex::new(None),
searcher_cache: RwLock::new(None),
})
}
pub fn upsert_document(&self, internal_id: u64, doc: Document) -> Result<()> {
let mut guard = self.writer_cache.lock();
if guard.is_none() {
*guard = Some(self.index.writer()?);
}
guard.as_mut().unwrap().upsert_document(internal_id, doc)
}
pub(crate) fn delete_document_by_internal_id(&self, internal_id: u64) -> Result<()> {
let mut guard = self.writer_cache.lock();
if guard.is_none() {
*guard = Some(self.index.writer()?);
}
guard.as_mut().unwrap().delete_document(internal_id)
}
pub(crate) fn find_doc_ids_by_term(&self, field: &str, term: &str) -> Result<Vec<u64>> {
let mut ids = Vec::new();
let guard = self.writer_cache.lock();
if let Some(writer) = guard.as_ref()
&& let Some(writer_ids) = writer.find_doc_ids_by_term(field, term)?
{
ids.extend(writer_ids);
}
use crate::lexical::query::Query;
use crate::lexical::query::term::TermQuery;
let query = Box::new(TermQuery::new(field, term)) as Box<dyn Query>;
let request = LexicalSearchRequest::new(query)
.limit(usize::MAX) .load_documents(false);
let results = self.search(request)?;
for hit in results.hits {
if !ids.contains(&hit.doc_id) {
let is_deleted = if let Some(writer) = guard.as_ref() {
writer.is_updated_deleted(hit.doc_id)
} else {
false
};
if !is_deleted {
ids.push(hit.doc_id);
}
}
}
Ok(ids)
}
pub fn commit(&self) -> Result<()> {
if let Some(mut writer) = self.writer_cache.lock().take() {
writer.commit()?;
}
self.index.storage().sync()?;
self.index.refresh()?;
*self.searcher_cache.write() = None;
Ok(())
}
pub fn optimize(&self) -> Result<()> {
self.index.optimize()?;
*self.searcher_cache.write() = None;
Ok(())
}
pub fn refresh(&self) -> Result<()> {
*self.searcher_cache.write() = None;
Ok(())
}
pub fn stats(&self) -> Result<InvertedIndexStats> {
let mut stats = self.index.stats()?;
let guard = self.writer_cache.lock();
if let Some(writer) = guard.as_ref() {
stats.doc_count += writer.pending_docs();
}
Ok(stats)
}
pub fn storage(&self) -> &Arc<dyn Storage> {
self.index.storage()
}
pub fn search(&self, request: LexicalSearchRequest) -> Result<LexicalSearchResults> {
{
let guard = self.searcher_cache.read();
if let Some(ref searcher) = *guard {
return searcher.search(request);
}
}
let mut guard = self.searcher_cache.write();
if guard.is_none() {
*guard = Some(self.index.searcher()?);
}
let guard = parking_lot::RwLockWriteGuard::downgrade(guard);
guard.as_ref().unwrap().search(request)
}
pub fn count(&self, request: LexicalSearchRequest) -> Result<u64> {
{
let guard = self.searcher_cache.read();
if let Some(ref searcher) = *guard {
return searcher.count(request);
}
}
let mut guard = self.searcher_cache.write();
if guard.is_none() {
*guard = Some(self.index.searcher()?);
}
let guard = parking_lot::RwLockWriteGuard::downgrade(guard);
guard.as_ref().unwrap().count(request)
}
pub fn close(&self) -> Result<()> {
*self.writer_cache.lock() = None;
*self.searcher_cache.write() = None;
self.index.close()
}
pub fn is_closed(&self) -> bool {
self.index.is_closed()
}
pub fn analyzer(&self) -> Result<Arc<dyn Analyzer>> {
use crate::lexical::index::inverted::reader::InvertedIndexReader;
let reader = self.index.reader()?;
if let Some(inverted_reader) = reader.as_any().downcast_ref::<InvertedIndexReader>() {
Ok(Arc::clone(inverted_reader.analyzer()))
} else {
use crate::analysis::analyzer::standard::StandardAnalyzer;
Ok(Arc::new(StandardAnalyzer::new()?))
}
}
pub fn query_parser(&self) -> Result<crate::lexical::query::parser::LexicalQueryParser> {
let analyzer = self.analyzer()?;
let mut parser = crate::lexical::query::parser::LexicalQueryParser::new(analyzer);
if let Ok(fields) = self.index.default_fields()
&& !fields.is_empty()
{
parser = parser.with_default_fields(fields);
}
Ok(parser)
}
pub fn last_wal_seq(&self) -> u64 {
self.index.last_wal_seq()
}
pub fn set_last_wal_seq(&self, seq: u64) -> Result<()> {
if let Some(writer) = self.writer_cache.lock().as_mut() {
writer.set_last_wal_seq(seq)?;
} else {
self.index.set_last_wal_seq(seq)?;
}
Ok(())
}
pub fn add_field(
&self,
name: &str,
option: crate::lexical::core::field::FieldOption,
analyzer: Option<Arc<dyn Analyzer>>,
) -> Result<()> {
self.index.add_field(name, option)?;
if let Some(field_analyzer) = analyzer
&& let Ok(index_analyzer) = self.analyzer()
&& let Some(pfa) = index_analyzer
.as_any()
.downcast_ref::<crate::analysis::analyzer::per_field::PerFieldAnalyzer>(
)
{
pfa.add_analyzer(name, field_analyzer);
}
*self.writer_cache.lock() = None;
*self.searcher_cache.write() = None;
Ok(())
}
pub fn delete_field(&self, name: &str) -> Result<()> {
self.index.delete_field(name)?;
if let Ok(index_analyzer) = self.analyzer()
&& let Some(pfa) = index_analyzer
.as_any()
.downcast_ref::<crate::analysis::analyzer::per_field::PerFieldAnalyzer>(
)
{
pfa.remove_analyzer(name);
}
*self.writer_cache.lock() = None;
*self.searcher_cache.write() = None;
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::lexical::query::Query;
use crate::lexical::query::term::TermQuery;
use crate::lexical::store::config::LexicalIndexConfig;
use crate::storage::file::{FileStorage, FileStorageConfig};
use crate::storage::memory::{MemoryStorage, MemoryStorageConfig};
use std::sync::Arc;
use tempfile::TempDir;
fn create_test_document(title: &str, body: &str) -> Document {
Document::builder()
.add_text("title", title)
.add_text("body", body)
.build()
}
#[test]
fn test_search_engine_creation() {
let temp_dir = TempDir::new().unwrap();
let config = LexicalIndexConfig::default();
let storage = Arc::new(
FileStorage::new(temp_dir.path(), FileStorageConfig::new(temp_dir.path())).unwrap(),
);
let engine = LexicalStore::new(storage, config).unwrap();
assert!(!engine.is_closed());
}
#[test]
fn test_search_engine_in_memory() {
let config = LexicalIndexConfig::default();
let storage = Arc::new(MemoryStorage::new(MemoryStorageConfig::default()));
let engine = LexicalStore::new(storage, config).unwrap();
let docs = vec![
create_test_document("Test Document 1", "Content of test document 1"),
create_test_document("Test Document 2", "Content of test document 2"),
];
for (i, doc) in docs.into_iter().enumerate() {
engine.upsert_document((i + 1) as u64, doc).unwrap();
}
engine.commit().unwrap();
let query = Box::new(TermQuery::new("title", "Test")) as Box<dyn Query>;
let request = LexicalSearchRequest::new(query);
let _results = engine.search(request).unwrap();
assert!(!engine.is_closed());
}
#[test]
fn test_search_engine_open() {
let temp_dir = TempDir::new().unwrap();
let config = LexicalIndexConfig::default();
let storage = Arc::new(
FileStorage::new(temp_dir.path(), FileStorageConfig::new(temp_dir.path())).unwrap(),
);
let engine = LexicalStore::new(storage, config.clone()).unwrap();
engine.close().unwrap();
let storage = Arc::new(
FileStorage::new(temp_dir.path(), FileStorageConfig::new(temp_dir.path())).unwrap(),
);
let engine = LexicalStore::new(storage, config).unwrap();
assert!(!engine.is_closed());
}
#[test]
fn test_upsert_document() {
let temp_dir = TempDir::new().unwrap();
let config = LexicalIndexConfig::default();
let storage = Arc::new(
FileStorage::new(temp_dir.path(), FileStorageConfig::new(temp_dir.path())).unwrap(),
);
let engine = LexicalStore::new(storage, config).unwrap();
let doc = create_test_document("Hello World", "This is a test document");
engine.upsert_document(1, doc).unwrap();
engine.commit().unwrap();
let _stats = engine.stats().unwrap();
}
#[test]
fn test_upsert_multiple_documents() {
let temp_dir = TempDir::new().unwrap();
let config = LexicalIndexConfig::default();
let storage = Arc::new(
FileStorage::new(temp_dir.path(), FileStorageConfig::new(temp_dir.path())).unwrap(),
);
let engine = LexicalStore::new(storage, config).unwrap();
let docs = vec![
create_test_document("First Document", "Content of first document"),
create_test_document("Second Document", "Content of second document"),
create_test_document("Third Document", "Content of third document"),
];
for (i, doc) in docs.into_iter().enumerate() {
engine.upsert_document((i + 1) as u64, doc).unwrap();
}
engine.commit().unwrap();
let _stats = engine.stats().unwrap();
}
#[test]
fn test_search_empty_index() {
let temp_dir = TempDir::new().unwrap();
let config = LexicalIndexConfig::default();
let storage = Arc::new(
FileStorage::new(temp_dir.path(), FileStorageConfig::new(temp_dir.path())).unwrap(),
);
let engine = LexicalStore::new(storage, config).unwrap();
let query = Box::new(TermQuery::new("title", "hello")) as Box<dyn Query>;
let request = LexicalSearchRequest::new(query);
let results = engine.search(request).unwrap();
assert_eq!(results.hits.len(), 0);
assert_eq!(results.total_hits, 0);
assert_eq!(results.max_score, 0.0);
}
#[test]
fn test_search_with_documents() {
let temp_dir = TempDir::new().unwrap();
let config = LexicalIndexConfig::default();
let storage = Arc::new(
FileStorage::new(temp_dir.path(), FileStorageConfig::new(temp_dir.path())).unwrap(),
);
let engine = LexicalStore::new(storage, config).unwrap();
let docs = vec![
create_test_document("Hello World", "This is a test document"),
create_test_document("Goodbye World", "This is another test document"),
];
for (i, doc) in docs.into_iter().enumerate() {
engine.upsert_document((i + 1) as u64, doc).unwrap();
}
engine.commit().unwrap();
let query = Box::new(TermQuery::new("title", "Hello")) as Box<dyn Query>;
let request = LexicalSearchRequest::new(query);
let _results = engine.search(request).unwrap();
}
#[test]
fn test_count_query() {
let temp_dir = TempDir::new().unwrap();
let config = LexicalIndexConfig::default();
let storage = Arc::new(
FileStorage::new(temp_dir.path(), FileStorageConfig::new(temp_dir.path())).unwrap(),
);
let engine = LexicalStore::new(storage, config).unwrap();
let query = Box::new(TermQuery::new("title", "hello")) as Box<dyn Query>;
let count = engine.count(LexicalSearchRequest::new(query)).unwrap();
assert_eq!(count, 0);
}
#[test]
fn test_engine_refresh() {
let temp_dir = TempDir::new().unwrap();
let config = LexicalIndexConfig::default();
let storage = Arc::new(
FileStorage::new(temp_dir.path(), FileStorageConfig::new(temp_dir.path())).unwrap(),
);
let engine = LexicalStore::new(storage, config).unwrap();
let doc = create_test_document("Test Document", "Test content");
engine.upsert_document(1, doc).unwrap();
engine.commit().unwrap();
engine.refresh().unwrap();
let query = Box::new(TermQuery::new("title", "Test")) as Box<dyn Query>;
let request = LexicalSearchRequest::new(query);
let _results = engine.search(request).unwrap();
}
#[test]
fn test_engine_stats() {
let temp_dir = TempDir::new().unwrap();
let config = LexicalIndexConfig::default();
let storage = Arc::new(
FileStorage::new(temp_dir.path(), FileStorageConfig::new(temp_dir.path())).unwrap(),
);
let engine = LexicalStore::new(storage, config).unwrap();
let stats = engine.stats().unwrap();
assert!(stats.last_modified > 0);
}
#[test]
fn test_engine_close() {
let temp_dir = TempDir::new().unwrap();
let config = LexicalIndexConfig::default();
let storage = Arc::new(
FileStorage::new(temp_dir.path(), FileStorageConfig::new(temp_dir.path())).unwrap(),
);
let engine = LexicalStore::new(storage, config).unwrap();
assert!(!engine.is_closed());
engine.close().unwrap();
assert!(engine.is_closed());
}
#[test]
fn test_search_request_configuration() {
let temp_dir = TempDir::new().unwrap();
let config = LexicalIndexConfig::default();
let storage = Arc::new(
FileStorage::new(temp_dir.path(), FileStorageConfig::new(temp_dir.path())).unwrap(),
);
let engine = LexicalStore::new(storage, config).unwrap();
let query = Box::new(TermQuery::new("title", "hello")) as Box<dyn Query>;
let request = LexicalSearchRequest::new(query)
.limit(5)
.min_score(0.5)
.load_documents(false);
let results = engine.search(request).unwrap();
assert_eq!(results.hits.len(), 0);
assert_eq!(results.total_hits, 0);
}
#[test]
fn test_search_with_query_parser() {
let temp_dir = TempDir::new().unwrap();
let config = LexicalIndexConfig::default();
let storage = Arc::new(
FileStorage::new(temp_dir.path(), FileStorageConfig::new(temp_dir.path())).unwrap(),
);
let engine = LexicalStore::new(storage, config).unwrap();
let docs = vec![
create_test_document("hello world", "This is a test document"),
create_test_document("goodbye world", "This is another test document"),
];
for (i, doc) in docs.into_iter().enumerate() {
engine.upsert_document((i + 1) as u64, doc).unwrap();
}
engine.commit().unwrap();
use crate::lexical::query::parser::LexicalQueryParser;
let parser = LexicalQueryParser::with_standard_analyzer()
.unwrap()
.with_default_field("title");
let query = parser.parse("Hello").unwrap();
let results = engine.search(LexicalSearchRequest::new(query)).unwrap();
assert_eq!(results.hits.len(), 1);
assert_eq!(results.total_hits, 1);
}
#[test]
fn test_search_field_with_string() {
let temp_dir = TempDir::new().unwrap();
let config = LexicalIndexConfig::default();
let storage = Arc::new(
FileStorage::new(temp_dir.path(), FileStorageConfig::new(temp_dir.path())).unwrap(),
);
let engine = LexicalStore::new(storage, config).unwrap();
use crate::analysis::analyzer::standard::StandardAnalyzer;
use crate::lexical::query::parser::LexicalQueryParser;
let analyzer = Arc::new(StandardAnalyzer::new().unwrap());
let parser = LexicalQueryParser::new(analyzer);
let query = parser.parse_field("title", "hello world").unwrap();
let results = engine.search(LexicalSearchRequest::new(query)).unwrap();
assert_eq!(results.hits.len(), 0);
}
#[test]
fn test_find_doc_ids_by_term() {
let temp_dir = TempDir::new().unwrap();
let config = LexicalIndexConfig::default();
let storage = Arc::new(
FileStorage::new(temp_dir.path(), FileStorageConfig::new(temp_dir.path())).unwrap(),
);
let engine = LexicalStore::new(storage, config).unwrap();
let doc = Document::builder()
.add_text("title", "Test Doc")
.add_text("_id", "ext_1")
.build();
engine.upsert_document(1, doc).unwrap();
engine.commit().unwrap();
let found_ids = engine.find_doc_ids_by_term("_id", "ext_1").unwrap();
assert_eq!(found_ids, vec![1]);
let not_found = engine.find_doc_ids_by_term("_id", "ext_999").unwrap();
assert!(not_found.is_empty());
}
}