Skip to main content

luci/search/
reader.rs

1//! Reader: content retrieval from committed segments.
2//!
3//! Handles source retrieval, field retrieval, and source bytes access.
4//! Operates on `SegmentStore` but is separate from `Searcher` (scoring).
5//!
6//! See [[architecture-scoring-materialization-separation]].
7
8use crate::core::{DocId, SegmentId};
9
10use crate::columnar::writer::ColumnType;
11use crate::search::results::FieldReaderCache;
12use crate::search::segment_store::SegmentStore;
13
14/// Content retrieval from committed segments.
15///
16/// `pub(crate)` — not exposed to consumers. `Hit` delegates to Reader
17/// for lazy content access.
18pub struct Reader<'a> {
19    store: &'a SegmentStore,
20}
21
22impl<'a> Reader<'a> {
23    pub fn new(store: &'a SegmentStore) -> Self {
24        Self { store }
25    }
26
27    /// Retrieve a document's source JSON from its segment.
28    ///
29    /// LZ4 decompression + JSON parsing happens here.
30    pub fn get_source(&self, segment_id: SegmentId, doc_id: DocId) -> Option<serde_json::Value> {
31        let segment = self
32            .store
33            .segments()
34            .iter()
35            .find(|s| s.segment_id() == segment_id)?;
36        let store = segment.doc_store();
37        let bytes = store.get(doc_id.as_u32())?;
38        serde_json::from_slice(&bytes).ok()
39    }
40
41    /// Get raw source bytes (LZ4-decompressed, unparsed).
42    pub fn get_source_bytes(&self, segment_id: SegmentId, doc_id: DocId) -> Option<Vec<u8>> {
43        let segment = self
44            .store
45            .segments()
46            .iter()
47            .find(|s| s.segment_id() == segment_id)?;
48        let store = segment.doc_store();
49        store.get(doc_id.as_u32())
50    }
51
52    /// Retrieve typed field values from the columnar store.
53    ///
54    /// Each value is wrapped in a JSON array (ES `fields` API compat).
55    pub fn retrieve_fields(
56        &self,
57        segment_id: SegmentId,
58        doc_id: DocId,
59        field_names: &[String],
60    ) -> serde_json::Map<String, serde_json::Value> {
61        let segment = match self
62            .store
63            .segments()
64            .iter()
65            .find(|s| s.segment_id() == segment_id)
66        {
67            Some(s) => s,
68            None => return serde_json::Map::new(),
69        };
70        let doc = doc_id.as_u32();
71        let mut result = serde_json::Map::new();
72        for name in field_names {
73            let field_id = match segment
74                .header()
75                .fields
76                .iter()
77                .find(|f| f.field_name == *name)
78                .map(|f| f.field_id)
79            {
80                Some(id) => id,
81                None => continue,
82            };
83            let col = match segment.column(field_id) {
84                Some(c) => c,
85                None => continue,
86            };
87            let value = match col.col_type() {
88                ColumnType::Keyword | ColumnType::KeywordBlocked => match col.keyword_value(doc) {
89                    Some(s) => serde_json::json!([s]),
90                    None => continue,
91                },
92                ColumnType::F64 | ColumnType::ConstantF64 => match col.f64_value(doc) {
93                    Some(n) => serde_json::json!([n]),
94                    None => continue,
95                },
96                ColumnType::I64 | ColumnType::BitpackedI64 | ColumnType::ConstantI64 => {
97                    match col.i64_value(doc) {
98                        Some(n) => serde_json::json!([n]),
99                        None => continue,
100                    }
101                }
102                ColumnType::Bool => match col.bool_value(doc) {
103                    Some(b) => serde_json::json!([b]),
104                    None => continue,
105                },
106                _ => continue,
107            };
108            result.insert(name.clone(), value);
109        }
110        result
111    }
112
113    /// Retrieve typed field values using a per-`SearchResults` cache so
114    /// the `ColumnReader::open` cost is paid once per `(segment, field)`
115    /// instead of once per hit. Behaviour is identical to
116    /// [`retrieve_fields`][Self::retrieve_fields]; only the column-open
117    /// path differs.
118    ///
119    /// See [[optimize-hit-id-column-reader-cache]].
120    pub(crate) fn retrieve_fields_cached(
121        &self,
122        cache: &FieldReaderCache,
123        segment_id: SegmentId,
124        doc_id: DocId,
125        field_names: &[String],
126    ) -> serde_json::Map<String, serde_json::Value> {
127        let segment = match self
128            .store
129            .segments()
130            .iter()
131            .find(|s| s.segment_id() == segment_id)
132        {
133            Some(s) => s,
134            None => return serde_json::Map::new(),
135        };
136        let doc = doc_id.as_u32();
137        let mut result = serde_json::Map::new();
138        for name in field_names {
139            let field_id = match segment
140                .header()
141                .fields
142                .iter()
143                .find(|f| f.field_name == *name)
144                .map(|f| f.field_id)
145            {
146                Some(id) => id,
147                None => continue,
148            };
149            let value = cache.with_column(segment, field_id, |col| match col.col_type() {
150                ColumnType::Keyword | ColumnType::KeywordBlocked => col
151                    .keyword_value(doc)
152                    .map(|s| serde_json::json!([s.to_owned()])),
153                ColumnType::F64 | ColumnType::ConstantF64 => {
154                    col.f64_value(doc).map(|n| serde_json::json!([n]))
155                }
156                ColumnType::I64 | ColumnType::BitpackedI64 | ColumnType::ConstantI64 => {
157                    col.i64_value(doc).map(|n| serde_json::json!([n]))
158                }
159                ColumnType::Bool => col.bool_value(doc).map(|b| serde_json::json!([b])),
160                _ => None,
161            });
162            if let Some(Some(v)) = value {
163                result.insert(name.clone(), v);
164            }
165        }
166        result
167    }
168}