hermes_core/query/
prefix.rs1use std::sync::Arc;
7
8use crate::dsl::Field;
9use crate::segment::SegmentReader;
10use crate::structures::{BlockPostingList, TERMINATED};
11use crate::{DocId, Score};
12
13use super::docset::{DocSet, SortedVecDocSet};
14use super::traits::{CountFuture, EmptyScorer, Query, Scorer, ScorerFuture};
15
16#[derive(Debug, Clone)]
18pub struct PrefixQuery {
19 pub field: Field,
20 pub prefix: Vec<u8>,
21}
22
23impl std::fmt::Display for PrefixQuery {
24 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
25 write!(
26 f,
27 "Prefix({}:\"{}*\")",
28 self.field.0,
29 String::from_utf8_lossy(&self.prefix)
30 )
31 }
32}
33
34impl PrefixQuery {
35 pub fn new(field: Field, prefix: impl Into<Vec<u8>>) -> Self {
37 Self {
38 field,
39 prefix: prefix.into(),
40 }
41 }
42
43 pub fn text(field: Field, text: &str) -> Self {
45 Self {
46 field,
47 prefix: text.to_lowercase().into_bytes(),
48 }
49 }
50}
51
52impl Query for PrefixQuery {
53 fn scorer<'a>(&self, reader: &'a SegmentReader, _limit: usize) -> ScorerFuture<'a> {
54 let field = self.field;
55 let prefix = self.prefix.clone();
56 Box::pin(async move {
57 let postings = reader.get_prefix_postings(field, &prefix).await?;
58 if postings.is_empty() {
59 return Ok(Box::new(EmptyScorer) as Box<dyn Scorer>);
60 }
61 let docs = materialize_union(&postings);
62 if docs.is_empty() {
63 return Ok(Box::new(EmptyScorer) as Box<dyn Scorer>);
64 }
65 Ok(Box::new(PrefixScorer::new(docs)) as Box<dyn Scorer>)
66 })
67 }
68
69 #[cfg(feature = "sync")]
70 fn scorer_sync<'a>(
71 &self,
72 reader: &'a SegmentReader,
73 _limit: usize,
74 ) -> crate::Result<Box<dyn Scorer + 'a>> {
75 let postings = reader.get_prefix_postings_sync(self.field, &self.prefix)?;
76 if postings.is_empty() {
77 return Ok(Box::new(EmptyScorer) as Box<dyn Scorer>);
78 }
79 let docs = materialize_union(&postings);
80 if docs.is_empty() {
81 return Ok(Box::new(EmptyScorer) as Box<dyn Scorer>);
82 }
83 Ok(Box::new(PrefixScorer::new(docs)) as Box<dyn Scorer>)
84 }
85
86 fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a> {
87 let field = self.field;
88 let prefix = self.prefix.clone();
89 Box::pin(async move {
90 let postings = reader.get_prefix_postings(field, &prefix).await?;
91 Ok(postings.iter().map(|p| p.doc_count()).sum())
92 })
93 }
94
95 fn is_filter(&self) -> bool {
96 true
97 }
98
99 #[cfg(feature = "sync")]
100 fn as_doc_predicate<'a>(&self, reader: &'a SegmentReader) -> Option<super::DocPredicate<'a>> {
101 let bitset = self.as_doc_bitset(reader)?;
102 Some(Box::new(move |doc_id: DocId| bitset.contains(doc_id)))
103 }
104
105 #[cfg(feature = "sync")]
106 fn as_doc_bitset(&self, reader: &SegmentReader) -> Option<super::DocBitset> {
107 let postings = reader
108 .get_prefix_postings_sync(self.field, &self.prefix)
109 .ok()?;
110 let mut bitset = super::DocBitset::new(reader.num_docs());
111 for posting in &postings {
112 let mut iter = posting.iterator();
113 loop {
114 let d = iter.doc();
115 if d == TERMINATED {
116 break;
117 }
118 bitset.set(d);
119 iter.advance();
120 }
121 }
122 Some(bitset)
123 }
124}
125
126struct PrefixScorer {
130 inner: SortedVecDocSet,
131}
132
133impl PrefixScorer {
134 fn new(docs: Vec<u32>) -> Self {
135 Self {
136 inner: SortedVecDocSet::new(Arc::new(docs)),
137 }
138 }
139}
140
141impl DocSet for PrefixScorer {
142 #[inline]
143 fn doc(&self) -> DocId {
144 self.inner.doc()
145 }
146
147 #[inline]
148 fn advance(&mut self) -> DocId {
149 self.inner.advance()
150 }
151
152 fn seek(&mut self, target: DocId) -> DocId {
153 self.inner.seek(target)
154 }
155
156 fn size_hint(&self) -> u32 {
157 self.inner.size_hint()
158 }
159}
160
161impl Scorer for PrefixScorer {
162 fn score(&self) -> Score {
163 1.0
164 }
165}
166
167fn materialize_union(postings: &[BlockPostingList]) -> Vec<u32> {
171 let total: usize = postings.iter().map(|p| p.doc_count() as usize).sum();
172 let mut docs = Vec::with_capacity(total);
173
174 for posting in postings {
175 let mut iter = posting.iterator();
176 loop {
177 let d = iter.doc();
178 if d == TERMINATED {
179 break;
180 }
181 docs.push(d);
182 iter.advance();
183 }
184 }
185
186 docs.sort_unstable();
187 docs.dedup();
188 docs
189}
190
191#[cfg(test)]
192mod tests {
193 use super::*;
194
195 #[test]
196 fn test_materialize_union_empty() {
197 let docs = materialize_union(&[]);
198 assert!(docs.is_empty());
199 }
200
201 #[test]
202 fn test_prefix_scorer_basic() {
203 let mut scorer = PrefixScorer::new(vec![1, 5, 10, 20]);
204 assert_eq!(scorer.doc(), 1);
205 assert_eq!(scorer.score(), 1.0);
206 assert_eq!(scorer.advance(), 5);
207 assert_eq!(scorer.seek(10), 10);
208 assert_eq!(scorer.advance(), 20);
209 assert_eq!(scorer.advance(), TERMINATED);
210 }
211
212 #[test]
213 fn test_prefix_scorer_seek_past() {
214 let mut scorer = PrefixScorer::new(vec![1, 5, 10, 20]);
215 assert_eq!(scorer.seek(7), 10);
216 assert_eq!(scorer.seek(100), TERMINATED);
217 }
218
219 #[test]
220 fn test_prefix_query_display() {
221 let q = PrefixQuery::text(Field(0), "abc");
222 assert_eq!(format!("{}", q), "Prefix(0:\"abc*\")");
223 }
224
225 #[test]
226 fn test_prefix_query_is_filter() {
227 let q = PrefixQuery::text(Field(0), "test");
228 assert!(q.is_filter());
229 }
230}