lucisearch 0.8.0

Embeddable, in-process search engine — the SQLite/DuckDB of Elasticsearch
Documentation
//! IndexReader: open committed index, provide search snapshots.
//!
//! Reads all committed segments from storage and creates a `Searcher`
//! snapshot for querying.
//!
//! See [[architecture-query-execution]] and [[architecture-overview#Step 10]].

use crate::core::Result;
use crate::storage::Storage;

use crate::segment::reader::SegmentReader;

/// Opens an index for reading by loading all committed segments.
pub struct IndexReader {
    pub(crate) segments: Vec<SegmentReader>,
}

impl IndexReader {
    /// Open an index from storage, loading all committed segments.
    /// Per-field vector indexes live in their own extents (see
    /// [[global-vector-indices]]) and are read separately
    /// via [`crate::storage::Storage::read_vector_index`].
    pub fn open(storage: &dyn Storage) -> Result<Self> {
        let mut segments = Vec::new();
        for entry in storage.segments() {
            let data = storage.read_segment(entry.segment_id)?;
            let reader = SegmentReader::open(data)?;
            segments.push(reader);
        }
        Ok(Self { segments })
    }

    /// Consume the reader and return the segment readers.
    pub fn into_segments(self) -> Vec<SegmentReader> {
        self.segments
    }

    /// Number of loaded segments.
    pub fn num_segments(&self) -> usize {
        self.segments.len()
    }

    /// Total documents across all segments.
    pub fn total_docs(&self) -> u32 {
        self.segments.iter().map(|s| s.doc_count()).sum()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::analysis::AnalyzerRegistry;
    use crate::query::term::TermQuery;
    use crate::writer::IndexWriter;

    use crate::mapping::{FieldType, Mapping};
    use crate::storage::SingleFileDirectory;

    fn test_dir(name: &str) -> std::path::PathBuf {
        let dir =
            std::env::temp_dir().join(format!("luci_reader_test_{}_{name}", std::process::id()));
        let _ = std::fs::remove_dir_all(&dir);
        dir
    }

    fn cleanup(path: &std::path::Path) {
        let _ = std::fs::remove_dir_all(path);
    }

    #[test]
    fn open_and_search() {
        let path = test_dir("open_search");
        let storage = SingleFileDirectory::create(&path).unwrap();
        let schema = Mapping::builder().field("title", FieldType::Text).build();
        let mut writer = IndexWriter::new(storage, schema, AnalyzerRegistry::new());

        writer
            .add(serde_json::json!({"title": "hello world"}))
            .unwrap();
        writer
            .add(serde_json::json!({"title": "hello luci"}))
            .unwrap();
        writer
            .add(serde_json::json!({"title": "goodbye world"}))
            .unwrap();
        writer.commit().unwrap();

        let storage = SingleFileDirectory::open(&path).unwrap();
        let reader = IndexReader::open(&storage).unwrap();
        assert_eq!(reader.total_docs(), 3);

        let store = crate::search::segment_store::SegmentStore::new(
            reader.into_segments(),
            AnalyzerRegistry::new(),
            None,
            None,
        );
        let searcher = crate::search::searcher::Searcher::new(&store);
        let results = searcher
            .search_query(
                &TermQuery {
                    field: "title".into(),
                    value: "hello".into(),
                },
                10,
                0,
            )
            .unwrap();
        assert_eq!(results.total_hits.value, 2);

        cleanup(&path);
    }

    #[test]
    fn multi_segment_reader() {
        let path = test_dir("multi_seg");
        let storage = SingleFileDirectory::create(&path).unwrap();
        let schema = Mapping::builder().field("body", FieldType::Text).build();
        let mut writer = IndexWriter::new(storage, schema, AnalyzerRegistry::new());

        writer
            .add(serde_json::json!({"body": "first segment doc"}))
            .unwrap();
        writer.commit().unwrap();

        writer
            .add(serde_json::json!({"body": "second segment doc"}))
            .unwrap();
        writer.commit().unwrap();

        let storage = SingleFileDirectory::open(&path).unwrap();
        let reader = IndexReader::open(&storage).unwrap();
        assert_eq!(reader.num_segments(), 2);
        assert_eq!(reader.total_docs(), 2);

        let store = crate::search::segment_store::SegmentStore::new(
            reader.into_segments(),
            AnalyzerRegistry::new(),
            None,
            None,
        );
        let searcher = crate::search::searcher::Searcher::new(&store);
        let results = searcher
            .search_query(
                &TermQuery {
                    field: "body".into(),
                    value: "segment".into(),
                },
                10,
                0,
            )
            .unwrap();
        assert_eq!(results.total_hits.value, 2);
        let results = searcher
            .search_query(
                &TermQuery {
                    field: "body".into(),
                    value: "first".into(),
                },
                10,
                0,
            )
            .unwrap();
        assert_eq!(results.total_hits.value, 1);

        cleanup(&path);
    }

    #[test]
    fn empty_index() {
        let path = test_dir("empty");
        let storage = SingleFileDirectory::create(&path).unwrap();
        let schema = Mapping::builder().field("x", FieldType::Text).build();
        let mut writer = IndexWriter::new(storage, schema, AnalyzerRegistry::new());
        writer.commit().unwrap();

        let storage = SingleFileDirectory::open(&path).unwrap();
        let reader = IndexReader::open(&storage).unwrap();
        assert_eq!(reader.num_segments(), 0);
        assert_eq!(reader.total_docs(), 0);

        let store = crate::search::segment_store::SegmentStore::new(
            reader.into_segments(),
            AnalyzerRegistry::new(),
            None,
            None,
        );
        let searcher = crate::search::searcher::Searcher::new(&store);
        let results = searcher
            .search_query(
                &TermQuery {
                    field: "x".into(),
                    value: "anything".into(),
                },
                10,
                0,
            )
            .unwrap();
        assert_eq!(results.total_hits.value, 0);

        cleanup(&path);
    }
}