Skip to main content

hermes_core/query/
range.rs

1//! Range query for fast-field numeric filtering.
2//!
3//! `RangeQuery` produces a `RangeScorer` that scans a fast-field column and
4//! yields documents whose value falls within the specified bounds. Score is
5//! always 1.0 — this is a pure filter query.
6//!
7//! Supports u64, i64, and f64 fields. For i64/f64, bounds are encoded to the
8//! same sortable-u64 representation used by fast fields so that a single raw
9//! u64 comparison covers all types.
10//!
11//! When placed in a `BooleanQuery` MUST clause, the `BooleanScorer`'s
12//! seek-based intersection makes this efficient even on large segments.
13
14use crate::dsl::Field;
15use crate::segment::SegmentReader;
16use crate::structures::TERMINATED;
17use crate::structures::fast_field::{FAST_FIELD_MISSING, f64_to_sortable_u64, zigzag_encode};
18use crate::{DocId, Score};
19
20use super::docset::DocSet;
21use super::traits::{CountFuture, Query, Scorer, ScorerFuture};
22
23// ── Typed range bounds ───────────────────────────────────────────────────
24
25/// Inclusive range bounds in the user's type domain.
26#[derive(Debug, Clone)]
27pub enum RangeBound {
28    /// u64 range — stored raw
29    U64 { min: Option<u64>, max: Option<u64> },
30    /// i64 range — will be zigzag-encoded for comparison
31    I64 { min: Option<i64>, max: Option<i64> },
32    /// f64 range — will be sortable-encoded for comparison
33    F64 { min: Option<f64>, max: Option<f64> },
34}
35
36impl RangeBound {
37    /// Compile to raw u64 inclusive bounds suitable for direct fast-field comparison.
38    ///
39    /// Returns `(low, high)` where both are inclusive. Missing bounds become
40    /// 0 / u64::MAX-1 (reserving u64::MAX for FAST_FIELD_MISSING sentinel).
41    fn compile(&self) -> (u64, u64) {
42        match self {
43            RangeBound::U64 { min, max } => {
44                let lo = min.unwrap_or(0);
45                let hi = max.unwrap_or(u64::MAX - 1);
46                (lo, hi)
47            }
48            RangeBound::I64 { min, max } => {
49                // zigzag encoding preserves magnitude, not order.
50                // For correct range comparison on i64, we use sortable encoding
51                // (same as f64 but cast through bits). However, fast fields store
52                // i64 as zigzag. So we must decode per-doc and compare in i64 domain.
53                // We store the raw i64 bounds and handle comparison in the scorer.
54                //
55                // Sentinel: use a special marker to tell the scorer to use i64 path.
56                // We'll handle this in the scorer directly.
57                let lo = min.map(zigzag_encode).unwrap_or(0);
58                let hi = max.map(zigzag_encode).unwrap_or(u64::MAX - 1);
59                (lo, hi)
60            }
61            RangeBound::F64 { min, max } => {
62                let lo = min.map(f64_to_sortable_u64).unwrap_or(0);
63                let hi = max.map(f64_to_sortable_u64).unwrap_or(u64::MAX - 1);
64                (lo, hi)
65            }
66        }
67    }
68
69    /// Whether this bound requires per-doc i64 decoding (zigzag doesn't preserve order).
70    fn is_i64(&self) -> bool {
71        matches!(self, RangeBound::I64 { .. })
72    }
73
74    /// Get the raw i64 bounds for the i64 path.
75    fn i64_bounds(&self) -> (i64, i64) {
76        match self {
77            RangeBound::I64 { min, max } => (min.unwrap_or(i64::MIN), max.unwrap_or(i64::MAX)),
78            _ => (i64::MIN, i64::MAX),
79        }
80    }
81}
82
83// ── RangeQuery ───────────────────────────────────────────────────────────
84
85/// Fast-field range query.
86///
87/// Scans all documents in a segment and yields those whose fast-field value
88/// falls within `[min, max]` (inclusive). Score is always 1.0.
89#[derive(Debug, Clone)]
90pub struct RangeQuery {
91    pub field: Field,
92    pub bound: RangeBound,
93}
94
95impl std::fmt::Display for RangeQuery {
96    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
97        match &self.bound {
98            RangeBound::U64 { min, max } => write!(
99                f,
100                "Range({}:[{} TO {}])",
101                self.field.0,
102                min.map_or("*".to_string(), |v| v.to_string()),
103                max.map_or("*".to_string(), |v| v.to_string()),
104            ),
105            RangeBound::I64 { min, max } => write!(
106                f,
107                "Range({}:[{} TO {}])",
108                self.field.0,
109                min.map_or("*".to_string(), |v| v.to_string()),
110                max.map_or("*".to_string(), |v| v.to_string()),
111            ),
112            RangeBound::F64 { min, max } => write!(
113                f,
114                "Range({}:[{} TO {}])",
115                self.field.0,
116                min.map_or("*".to_string(), |v| v.to_string()),
117                max.map_or("*".to_string(), |v| v.to_string()),
118            ),
119        }
120    }
121}
122
123impl RangeQuery {
124    pub fn new(field: Field, bound: RangeBound) -> Self {
125        Self { field, bound }
126    }
127
128    /// Convenience: u64 range
129    pub fn u64(field: Field, min: Option<u64>, max: Option<u64>) -> Self {
130        Self::new(field, RangeBound::U64 { min, max })
131    }
132
133    /// Convenience: i64 range
134    pub fn i64(field: Field, min: Option<i64>, max: Option<i64>) -> Self {
135        Self::new(field, RangeBound::I64 { min, max })
136    }
137
138    /// Convenience: f64 range
139    pub fn f64(field: Field, min: Option<f64>, max: Option<f64>) -> Self {
140        Self::new(field, RangeBound::F64 { min, max })
141    }
142}
143
144impl Query for RangeQuery {
145    fn scorer<'a>(&self, reader: &'a SegmentReader, _limit: usize) -> ScorerFuture<'a> {
146        let field = self.field;
147        let bound = self.bound.clone();
148        Box::pin(async move {
149            match RangeScorer::new(reader, field, &bound) {
150                Ok(scorer) => Ok(Box::new(scorer) as Box<dyn Scorer>),
151                Err(_) => Ok(Box::new(EmptyRangeScorer) as Box<dyn Scorer>),
152            }
153        })
154    }
155
156    #[cfg(feature = "sync")]
157    fn scorer_sync<'a>(
158        &self,
159        reader: &'a SegmentReader,
160        _limit: usize,
161    ) -> crate::Result<Box<dyn Scorer + 'a>> {
162        match RangeScorer::new(reader, self.field, &self.bound) {
163            Ok(scorer) => Ok(Box::new(scorer) as Box<dyn Scorer + 'a>),
164            Err(_) => Ok(Box::new(EmptyRangeScorer) as Box<dyn Scorer + 'a>),
165        }
166    }
167
168    fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a> {
169        let num_docs = reader.num_docs();
170        // Rough estimate: half the segment (we don't know selectivity)
171        Box::pin(async move { Ok(num_docs / 2) })
172    }
173
174    fn is_filter(&self) -> bool {
175        true
176    }
177
178    fn as_doc_predicate<'a>(&self, reader: &'a SegmentReader) -> Option<super::DocPredicate<'a>> {
179        let fast_field = reader.fast_field(self.field.0)?;
180        let (raw_lo, raw_hi) = self.bound.compile();
181        let use_i64 = self.bound.is_i64();
182        let (i64_lo, i64_hi) = self.bound.i64_bounds();
183
184        Some(Box::new(move |doc_id: DocId| -> bool {
185            let raw = fast_field.get_u64(doc_id);
186            if raw == FAST_FIELD_MISSING {
187                return false;
188            }
189            if use_i64 {
190                let val = crate::structures::fast_field::zigzag_decode(raw);
191                val >= i64_lo && val <= i64_hi
192            } else {
193                raw >= raw_lo && raw <= raw_hi
194            }
195        }))
196    }
197
198    fn as_doc_bitset(&self, reader: &SegmentReader) -> Option<super::DocBitset> {
199        // Build bitset from fast-field scan: O(N). Slower than posting-list-based
200        // bitsets but still faster than per-call predicate in BMP (~2ns lookup vs ~30ns).
201        let pred = self.as_doc_predicate(reader)?;
202        Some(super::DocBitset::from_predicate(reader.num_docs(), &*pred))
203    }
204}
205
206// ── RangeScorer ──────────────────────────────────────────────────────────
207
208/// Scorer that scans a fast-field column and yields matching docs.
209///
210/// For u64 and f64 fields, comparison is done in the raw u64 domain (both
211/// use order-preserving encodings). For i64 fields, zigzag encoding does NOT
212/// preserve order, so we decode each value and compare in i64 domain.
213struct RangeScorer<'a> {
214    /// Cached fast-field reader — avoids HashMap lookup per doc in matches()
215    fast_field: &'a crate::structures::fast_field::FastFieldReader,
216    /// For u64/f64: compiled raw bounds. For i64: unused.
217    raw_lo: u64,
218    raw_hi: u64,
219    /// For i64 only: decoded bounds.
220    i64_lo: i64,
221    i64_hi: i64,
222    /// Whether to use i64 comparison path.
223    use_i64: bool,
224    /// Current document position.
225    current: u32,
226    num_docs: u32,
227}
228
229/// Empty scorer returned when the field has no fast-field data.
230struct EmptyRangeScorer;
231
232impl<'a> RangeScorer<'a> {
233    fn new(
234        reader: &'a SegmentReader,
235        field: Field,
236        bound: &RangeBound,
237    ) -> Result<Self, EmptyRangeScorer> {
238        let fast_field = reader.fast_field(field.0).ok_or(EmptyRangeScorer)?;
239        let num_docs = reader.num_docs();
240        let (raw_lo, raw_hi) = bound.compile();
241        let use_i64 = bound.is_i64();
242        let (i64_lo, i64_hi) = bound.i64_bounds();
243
244        let mut scorer = Self {
245            fast_field,
246            raw_lo,
247            raw_hi,
248            i64_lo,
249            i64_hi,
250            use_i64,
251            current: 0,
252            num_docs,
253        };
254
255        // Position on first matching doc
256        if num_docs > 0 && !scorer.matches(0) {
257            scorer.scan_forward();
258        }
259        Ok(scorer)
260    }
261
262    #[inline]
263    fn matches(&self, doc_id: DocId) -> bool {
264        let raw = self.fast_field.get_u64(doc_id);
265        if raw == FAST_FIELD_MISSING {
266            return false;
267        }
268
269        if self.use_i64 {
270            let val = crate::structures::fast_field::zigzag_decode(raw);
271            val >= self.i64_lo && val <= self.i64_hi
272        } else {
273            raw >= self.raw_lo && raw <= self.raw_hi
274        }
275    }
276
277    /// Advance current past non-matching docs.
278    fn scan_forward(&mut self) {
279        loop {
280            self.current += 1;
281            if self.current >= self.num_docs {
282                self.current = self.num_docs;
283                return;
284            }
285            if self.matches(self.current) {
286                return;
287            }
288        }
289    }
290}
291
292impl DocSet for RangeScorer<'_> {
293    fn doc(&self) -> DocId {
294        if self.current >= self.num_docs {
295            TERMINATED
296        } else {
297            self.current
298        }
299    }
300
301    fn advance(&mut self) -> DocId {
302        self.scan_forward();
303        self.doc()
304    }
305
306    fn seek(&mut self, target: DocId) -> DocId {
307        if self.current >= self.num_docs {
308            return TERMINATED;
309        }
310        if target <= self.current {
311            return self.current;
312        }
313        // Position just before target so scan_forward starts at target
314        self.current = target - 1;
315        self.scan_forward();
316        self.doc()
317    }
318
319    fn size_hint(&self) -> u32 {
320        // Upper bound: remaining docs
321        self.num_docs.saturating_sub(self.current)
322    }
323}
324
325impl Scorer for RangeScorer<'_> {
326    fn score(&self) -> Score {
327        1.0
328    }
329}
330
331impl DocSet for EmptyRangeScorer {
332    fn doc(&self) -> DocId {
333        TERMINATED
334    }
335    fn advance(&mut self) -> DocId {
336        TERMINATED
337    }
338    fn seek(&mut self, _target: DocId) -> DocId {
339        TERMINATED
340    }
341    fn size_hint(&self) -> u32 {
342        0
343    }
344}
345
346impl Scorer for EmptyRangeScorer {
347    fn score(&self) -> Score {
348        0.0
349    }
350}
351
352#[cfg(test)]
353mod tests {
354    use super::*;
355
356    #[test]
357    fn test_range_bound_u64_compile() {
358        let b = RangeBound::U64 {
359            min: Some(10),
360            max: Some(100),
361        };
362        let (lo, hi) = b.compile();
363        assert_eq!(lo, 10);
364        assert_eq!(hi, 100);
365    }
366
367    #[test]
368    fn test_range_bound_f64_compile_preserves_order() {
369        let b1 = RangeBound::F64 {
370            min: Some(-1.0),
371            max: Some(1.0),
372        };
373        let (lo1, hi1) = b1.compile();
374        assert!(lo1 < hi1);
375
376        let b2 = RangeBound::F64 {
377            min: Some(0.0),
378            max: Some(100.0),
379        };
380        let (lo2, hi2) = b2.compile();
381        assert!(lo2 < hi2);
382    }
383
384    #[test]
385    fn test_range_bound_open_bounds() {
386        let b = RangeBound::U64 {
387            min: None,
388            max: None,
389        };
390        let (lo, hi) = b.compile();
391        assert_eq!(lo, 0);
392        assert_eq!(hi, u64::MAX - 1);
393    }
394
395    #[test]
396    fn test_range_query_constructors() {
397        let q = RangeQuery::u64(Field(0), Some(10), Some(100));
398        assert_eq!(q.field, Field(0));
399        assert!(matches!(
400            q.bound,
401            RangeBound::U64 {
402                min: Some(10),
403                max: Some(100)
404            }
405        ));
406
407        let q = RangeQuery::i64(Field(1), Some(-50), Some(50));
408        assert!(matches!(
409            q.bound,
410            RangeBound::I64 {
411                min: Some(-50),
412                max: Some(50)
413            }
414        ));
415
416        let q = RangeQuery::f64(Field(2), Some(0.5), Some(9.5));
417        assert!(matches!(q.bound, RangeBound::F64 { .. }));
418    }
419}