Skip to main content

hermes_core/query/
prefix.rs

1//! Prefix query — matches all documents containing any term that starts with a
2//! given prefix. Materializes the union of matching posting lists into a sorted
3//! doc ID set, giving O(log N) seek via `SortedVecDocSet`. Score is always 1.0
4//! (filter-style, like `RangeQuery`).
5
6use std::sync::Arc;
7
8use crate::dsl::Field;
9use crate::segment::SegmentReader;
10use crate::structures::{BlockPostingList, TERMINATED};
11use crate::{DocId, Score};
12
13use super::docset::{DocSet, SortedVecDocSet};
14use super::traits::{CountFuture, EmptyScorer, Query, Scorer, ScorerFuture};
15
16/// Prefix query — matches documents containing any term starting with `prefix`.
17#[derive(Debug, Clone)]
18pub struct PrefixQuery {
19    pub field: Field,
20    pub prefix: Vec<u8>,
21}
22
23impl std::fmt::Display for PrefixQuery {
24    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
25        write!(
26            f,
27            "Prefix({}:\"{}*\")",
28            self.field.0,
29            String::from_utf8_lossy(&self.prefix)
30        )
31    }
32}
33
34impl PrefixQuery {
35    /// Create from raw bytes.
36    pub fn new(field: Field, prefix: impl Into<Vec<u8>>) -> Self {
37        Self {
38            field,
39            prefix: prefix.into(),
40        }
41    }
42
43    /// Create from text — lowercased to match default tokenization.
44    pub fn text(field: Field, text: &str) -> Self {
45        Self {
46            field,
47            prefix: text.to_lowercase().into_bytes(),
48        }
49    }
50}
51
52impl Query for PrefixQuery {
53    fn scorer<'a>(&self, reader: &'a SegmentReader, _limit: usize) -> ScorerFuture<'a> {
54        let field = self.field;
55        let prefix = self.prefix.clone();
56        Box::pin(async move {
57            let postings = reader.get_prefix_postings(field, &prefix).await?;
58            if postings.is_empty() {
59                return Ok(Box::new(EmptyScorer) as Box<dyn Scorer>);
60            }
61            let docs = materialize_union(&postings);
62            if docs.is_empty() {
63                return Ok(Box::new(EmptyScorer) as Box<dyn Scorer>);
64            }
65            Ok(Box::new(PrefixScorer::new(docs)) as Box<dyn Scorer>)
66        })
67    }
68
69    #[cfg(feature = "sync")]
70    fn scorer_sync<'a>(
71        &self,
72        reader: &'a SegmentReader,
73        _limit: usize,
74    ) -> crate::Result<Box<dyn Scorer + 'a>> {
75        let postings = reader.get_prefix_postings_sync(self.field, &self.prefix)?;
76        if postings.is_empty() {
77            return Ok(Box::new(EmptyScorer) as Box<dyn Scorer>);
78        }
79        let docs = materialize_union(&postings);
80        if docs.is_empty() {
81            return Ok(Box::new(EmptyScorer) as Box<dyn Scorer>);
82        }
83        Ok(Box::new(PrefixScorer::new(docs)) as Box<dyn Scorer>)
84    }
85
86    fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a> {
87        let field = self.field;
88        let prefix = self.prefix.clone();
89        Box::pin(async move {
90            let postings = reader.get_prefix_postings(field, &prefix).await?;
91            Ok(postings.iter().map(|p| p.doc_count()).sum())
92        })
93    }
94
95    fn is_filter(&self) -> bool {
96        true
97    }
98
99    #[cfg(feature = "sync")]
100    fn as_doc_predicate<'a>(&self, reader: &'a SegmentReader) -> Option<super::DocPredicate<'a>> {
101        let bitset = self.as_doc_bitset(reader)?;
102        Some(Box::new(move |doc_id: DocId| bitset.contains(doc_id)))
103    }
104
105    #[cfg(feature = "sync")]
106    fn as_doc_bitset(&self, reader: &SegmentReader) -> Option<super::DocBitset> {
107        let postings = reader
108            .get_prefix_postings_sync(self.field, &self.prefix)
109            .ok()?;
110        let mut bitset = super::DocBitset::new(reader.num_docs());
111        for posting in &postings {
112            let mut iter = posting.iterator();
113            loop {
114                let d = iter.doc();
115                if d == TERMINATED {
116                    break;
117                }
118                bitset.set(d);
119                iter.advance();
120            }
121        }
122        Some(bitset)
123    }
124}
125
126// ── PrefixScorer ────────────────────────────────────────────────────────
127
128/// Scorer backed by a pre-materialized sorted doc ID set.
129struct PrefixScorer {
130    inner: SortedVecDocSet,
131}
132
133impl PrefixScorer {
134    fn new(docs: Vec<u32>) -> Self {
135        Self {
136            inner: SortedVecDocSet::new(Arc::new(docs)),
137        }
138    }
139}
140
141impl DocSet for PrefixScorer {
142    #[inline]
143    fn doc(&self) -> DocId {
144        self.inner.doc()
145    }
146
147    #[inline]
148    fn advance(&mut self) -> DocId {
149        self.inner.advance()
150    }
151
152    fn seek(&mut self, target: DocId) -> DocId {
153        self.inner.seek(target)
154    }
155
156    fn size_hint(&self) -> u32 {
157        self.inner.size_hint()
158    }
159}
160
161impl Scorer for PrefixScorer {
162    fn score(&self) -> Score {
163        1.0
164    }
165}
166
167// ── Helpers ─────────────────────────────────────────────────────────────
168
169/// Iterate all posting lists, collect doc IDs, sort, and deduplicate.
170fn materialize_union(postings: &[BlockPostingList]) -> Vec<u32> {
171    let total: usize = postings.iter().map(|p| p.doc_count() as usize).sum();
172    let mut docs = Vec::with_capacity(total);
173
174    for posting in postings {
175        let mut iter = posting.iterator();
176        loop {
177            let d = iter.doc();
178            if d == TERMINATED {
179                break;
180            }
181            docs.push(d);
182            iter.advance();
183        }
184    }
185
186    docs.sort_unstable();
187    docs.dedup();
188    docs
189}
190
191#[cfg(test)]
192mod tests {
193    use super::*;
194
195    #[test]
196    fn test_materialize_union_empty() {
197        let docs = materialize_union(&[]);
198        assert!(docs.is_empty());
199    }
200
201    #[test]
202    fn test_prefix_scorer_basic() {
203        let mut scorer = PrefixScorer::new(vec![1, 5, 10, 20]);
204        assert_eq!(scorer.doc(), 1);
205        assert_eq!(scorer.score(), 1.0);
206        assert_eq!(scorer.advance(), 5);
207        assert_eq!(scorer.seek(10), 10);
208        assert_eq!(scorer.advance(), 20);
209        assert_eq!(scorer.advance(), TERMINATED);
210    }
211
212    #[test]
213    fn test_prefix_scorer_seek_past() {
214        let mut scorer = PrefixScorer::new(vec![1, 5, 10, 20]);
215        assert_eq!(scorer.seek(7), 10);
216        assert_eq!(scorer.seek(100), TERMINATED);
217    }
218
219    #[test]
220    fn test_prefix_query_display() {
221        let q = PrefixQuery::text(Field(0), "abc");
222        assert_eq!(format!("{}", q), "Prefix(0:\"abc*\")");
223    }
224
225    #[test]
226    fn test_prefix_query_is_filter() {
227        let q = PrefixQuery::text(Field(0), "test");
228        assert!(q.is_filter());
229    }
230}