Skip to main content

luci/
reader.rs

1//! IndexReader: open committed index, provide search snapshots.
2//!
3//! Reads all committed segments from storage and creates a `Searcher`
4//! snapshot for querying.
5//!
6//! See [[architecture-query-execution]] and [[architecture-overview#Step 10]].
7
8use crate::core::Result;
9use crate::storage::Storage;
10
11use crate::segment::reader::SegmentReader;
12
13/// Opens an index for reading by loading all committed segments.
14pub struct IndexReader {
15    pub(crate) segments: Vec<SegmentReader>,
16}
17
18impl IndexReader {
19    /// Open an index from storage, loading all committed segments.
20    /// Per-field vector indexes live in their own extents (see
21    /// [[global-vector-indices]]) and are read separately
22    /// via [`crate::storage::Storage::read_vector_index`].
23    pub fn open(storage: &dyn Storage) -> Result<Self> {
24        let mut segments = Vec::new();
25        for entry in storage.segments() {
26            let data = storage.read_segment(entry.segment_id)?;
27            let reader = SegmentReader::open(data)?;
28            segments.push(reader);
29        }
30        Ok(Self { segments })
31    }
32
33    /// Consume the reader and return the segment readers.
34    pub fn into_segments(self) -> Vec<SegmentReader> {
35        self.segments
36    }
37
38    /// Number of loaded segments.
39    pub fn num_segments(&self) -> usize {
40        self.segments.len()
41    }
42
43    /// Total documents across all segments.
44    pub fn total_docs(&self) -> u32 {
45        self.segments.iter().map(|s| s.doc_count()).sum()
46    }
47}
48
49#[cfg(test)]
50mod tests {
51    use super::*;
52    use crate::analysis::AnalyzerRegistry;
53    use crate::query::term::TermQuery;
54    use crate::writer::IndexWriter;
55
56    use crate::mapping::{FieldType, Mapping};
57    use crate::storage::SingleFileDirectory;
58
59    fn test_dir(name: &str) -> std::path::PathBuf {
60        let dir =
61            std::env::temp_dir().join(format!("luci_reader_test_{}_{name}", std::process::id()));
62        let _ = std::fs::remove_dir_all(&dir);
63        dir
64    }
65
66    fn cleanup(path: &std::path::Path) {
67        let _ = std::fs::remove_dir_all(path);
68    }
69
70    #[test]
71    fn open_and_search() {
72        let path = test_dir("open_search");
73        let storage = SingleFileDirectory::create(&path).unwrap();
74        let schema = Mapping::builder().field("title", FieldType::Text).build();
75        let mut writer = IndexWriter::new(storage, schema, AnalyzerRegistry::new());
76
77        writer
78            .add(serde_json::json!({"title": "hello world"}))
79            .unwrap();
80        writer
81            .add(serde_json::json!({"title": "hello luci"}))
82            .unwrap();
83        writer
84            .add(serde_json::json!({"title": "goodbye world"}))
85            .unwrap();
86        writer.commit().unwrap();
87
88        let storage = SingleFileDirectory::open(&path).unwrap();
89        let reader = IndexReader::open(&storage).unwrap();
90        assert_eq!(reader.total_docs(), 3);
91
92        let store = crate::search::segment_store::SegmentStore::new(
93            reader.into_segments(),
94            AnalyzerRegistry::new(),
95            None,
96            None,
97        );
98        let searcher = crate::search::searcher::Searcher::new(&store);
99        let results = searcher
100            .search_query(
101                &TermQuery {
102                    field: "title".into(),
103                    value: "hello".into(),
104                },
105                10,
106                0,
107            )
108            .unwrap();
109        assert_eq!(results.total_hits.value, 2);
110
111        cleanup(&path);
112    }
113
114    #[test]
115    fn multi_segment_reader() {
116        let path = test_dir("multi_seg");
117        let storage = SingleFileDirectory::create(&path).unwrap();
118        let schema = Mapping::builder().field("body", FieldType::Text).build();
119        let mut writer = IndexWriter::new(storage, schema, AnalyzerRegistry::new());
120
121        writer
122            .add(serde_json::json!({"body": "first segment doc"}))
123            .unwrap();
124        writer.commit().unwrap();
125
126        writer
127            .add(serde_json::json!({"body": "second segment doc"}))
128            .unwrap();
129        writer.commit().unwrap();
130
131        let storage = SingleFileDirectory::open(&path).unwrap();
132        let reader = IndexReader::open(&storage).unwrap();
133        assert_eq!(reader.num_segments(), 2);
134        assert_eq!(reader.total_docs(), 2);
135
136        let store = crate::search::segment_store::SegmentStore::new(
137            reader.into_segments(),
138            AnalyzerRegistry::new(),
139            None,
140            None,
141        );
142        let searcher = crate::search::searcher::Searcher::new(&store);
143        let results = searcher
144            .search_query(
145                &TermQuery {
146                    field: "body".into(),
147                    value: "segment".into(),
148                },
149                10,
150                0,
151            )
152            .unwrap();
153        assert_eq!(results.total_hits.value, 2);
154        let results = searcher
155            .search_query(
156                &TermQuery {
157                    field: "body".into(),
158                    value: "first".into(),
159                },
160                10,
161                0,
162            )
163            .unwrap();
164        assert_eq!(results.total_hits.value, 1);
165
166        cleanup(&path);
167    }
168
169    #[test]
170    fn empty_index() {
171        let path = test_dir("empty");
172        let storage = SingleFileDirectory::create(&path).unwrap();
173        let schema = Mapping::builder().field("x", FieldType::Text).build();
174        let mut writer = IndexWriter::new(storage, schema, AnalyzerRegistry::new());
175        writer.commit().unwrap();
176
177        let storage = SingleFileDirectory::open(&path).unwrap();
178        let reader = IndexReader::open(&storage).unwrap();
179        assert_eq!(reader.num_segments(), 0);
180        assert_eq!(reader.total_docs(), 0);
181
182        let store = crate::search::segment_store::SegmentStore::new(
183            reader.into_segments(),
184            AnalyzerRegistry::new(),
185            None,
186            None,
187        );
188        let searcher = crate::search::searcher::Searcher::new(&store);
189        let results = searcher
190            .search_query(
191                &TermQuery {
192                    field: "x".into(),
193                    value: "anything".into(),
194                },
195                10,
196                0,
197            )
198            .unwrap();
199        assert_eq!(results.total_hits.value, 0);
200
201        cleanup(&path);
202    }
203}