Skip to main content

hermes_core/query/
range.rs

1//! Range query for fast-field numeric filtering.
2//!
3//! `RangeQuery` produces a `RangeScorer` that scans a fast-field column and
4//! yields documents whose value falls within the specified bounds. Score is
5//! always 1.0 — this is a pure filter query.
6//!
7//! Supports u64, i64, and f64 fields. For i64/f64, bounds are encoded to the
8//! same sortable-u64 representation used by fast fields so that a single raw
9//! u64 comparison covers all types.
10//!
11//! When placed in a `BooleanQuery` MUST clause, the `BooleanScorer`'s
12//! seek-based intersection makes this efficient even on large segments.
13
14use crate::dsl::Field;
15use crate::segment::SegmentReader;
16use crate::structures::TERMINATED;
17use crate::structures::fast_field::{FAST_FIELD_MISSING, f64_to_sortable_u64, zigzag_encode};
18use crate::{DocId, Score};
19
20use super::docset::DocSet;
21use super::traits::{CountFuture, Query, Scorer, ScorerFuture};
22
23// ── Typed range bounds ───────────────────────────────────────────────────
24
25/// Inclusive range bounds in the user's type domain.
26#[derive(Debug, Clone)]
27pub enum RangeBound {
28    /// u64 range — stored raw
29    U64 { min: Option<u64>, max: Option<u64> },
30    /// i64 range — will be zigzag-encoded for comparison
31    I64 { min: Option<i64>, max: Option<i64> },
32    /// f64 range — will be sortable-encoded for comparison
33    F64 { min: Option<f64>, max: Option<f64> },
34}
35
36impl RangeBound {
37    /// Compile to raw u64 inclusive bounds suitable for direct fast-field comparison.
38    ///
39    /// Returns `(low, high)` where both are inclusive. Missing bounds become
40    /// 0 / u64::MAX-1 (reserving u64::MAX for FAST_FIELD_MISSING sentinel).
41    fn compile(&self) -> (u64, u64) {
42        match self {
43            RangeBound::U64 { min, max } => {
44                let lo = min.unwrap_or(0);
45                let hi = max.unwrap_or(u64::MAX - 1);
46                (lo, hi)
47            }
48            RangeBound::I64 { min, max } => {
49                // zigzag encoding preserves magnitude, not order.
50                // For correct range comparison on i64, we use sortable encoding
51                // (same as f64 but cast through bits). However, fast fields store
52                // i64 as zigzag. So we must decode per-doc and compare in i64 domain.
53                // We store the raw i64 bounds and handle comparison in the scorer.
54                //
55                // Sentinel: use a special marker to tell the scorer to use i64 path.
56                // We'll handle this in the scorer directly.
57                let lo = min.map(zigzag_encode).unwrap_or(0);
58                let hi = max.map(zigzag_encode).unwrap_or(u64::MAX - 1);
59                (lo, hi)
60            }
61            RangeBound::F64 { min, max } => {
62                let lo = min.map(f64_to_sortable_u64).unwrap_or(0);
63                let hi = max.map(f64_to_sortable_u64).unwrap_or(u64::MAX - 1);
64                (lo, hi)
65            }
66        }
67    }
68
69    /// Whether this bound requires per-doc i64 decoding (zigzag doesn't preserve order).
70    fn is_i64(&self) -> bool {
71        matches!(self, RangeBound::I64 { .. })
72    }
73
74    /// Get the raw i64 bounds for the i64 path.
75    fn i64_bounds(&self) -> (i64, i64) {
76        match self {
77            RangeBound::I64 { min, max } => (min.unwrap_or(i64::MIN), max.unwrap_or(i64::MAX)),
78            _ => (i64::MIN, i64::MAX),
79        }
80    }
81}
82
83// ── RangeQuery ───────────────────────────────────────────────────────────
84
85/// Fast-field range query.
86///
87/// Scans all documents in a segment and yields those whose fast-field value
88/// falls within `[min, max]` (inclusive). Score is always 1.0.
89#[derive(Debug, Clone)]
90pub struct RangeQuery {
91    pub field: Field,
92    pub bound: RangeBound,
93}
94
95impl std::fmt::Display for RangeQuery {
96    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
97        match &self.bound {
98            RangeBound::U64 { min, max } => write!(
99                f,
100                "Range({}:[{} TO {}])",
101                self.field.0,
102                min.map_or("*".to_string(), |v| v.to_string()),
103                max.map_or("*".to_string(), |v| v.to_string()),
104            ),
105            RangeBound::I64 { min, max } => write!(
106                f,
107                "Range({}:[{} TO {}])",
108                self.field.0,
109                min.map_or("*".to_string(), |v| v.to_string()),
110                max.map_or("*".to_string(), |v| v.to_string()),
111            ),
112            RangeBound::F64 { min, max } => write!(
113                f,
114                "Range({}:[{} TO {}])",
115                self.field.0,
116                min.map_or("*".to_string(), |v| v.to_string()),
117                max.map_or("*".to_string(), |v| v.to_string()),
118            ),
119        }
120    }
121}
122
123impl RangeQuery {
124    pub fn new(field: Field, bound: RangeBound) -> Self {
125        Self { field, bound }
126    }
127
128    /// Convenience: u64 range
129    pub fn u64(field: Field, min: Option<u64>, max: Option<u64>) -> Self {
130        Self::new(field, RangeBound::U64 { min, max })
131    }
132
133    /// Convenience: i64 range
134    pub fn i64(field: Field, min: Option<i64>, max: Option<i64>) -> Self {
135        Self::new(field, RangeBound::I64 { min, max })
136    }
137
138    /// Convenience: f64 range
139    pub fn f64(field: Field, min: Option<f64>, max: Option<f64>) -> Self {
140        Self::new(field, RangeBound::F64 { min, max })
141    }
142}
143
144impl Query for RangeQuery {
145    fn scorer<'a>(&self, reader: &'a SegmentReader, _limit: usize) -> ScorerFuture<'a> {
146        let field = self.field;
147        let bound = self.bound.clone();
148        Box::pin(async move {
149            match RangeScorer::new(reader, field, &bound) {
150                Ok(scorer) => Ok(Box::new(scorer) as Box<dyn Scorer>),
151                Err(_) => Ok(Box::new(EmptyRangeScorer) as Box<dyn Scorer>),
152            }
153        })
154    }
155
156    #[cfg(feature = "sync")]
157    fn scorer_sync<'a>(
158        &self,
159        reader: &'a SegmentReader,
160        _limit: usize,
161    ) -> crate::Result<Box<dyn Scorer + 'a>> {
162        match RangeScorer::new(reader, self.field, &self.bound) {
163            Ok(scorer) => Ok(Box::new(scorer) as Box<dyn Scorer + 'a>),
164            Err(_) => Ok(Box::new(EmptyRangeScorer) as Box<dyn Scorer + 'a>),
165        }
166    }
167
168    fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a> {
169        let num_docs = reader.num_docs();
170        // Rough estimate: half the segment (we don't know selectivity)
171        Box::pin(async move { Ok(num_docs / 2) })
172    }
173
174    fn is_filter(&self) -> bool {
175        true
176    }
177
178    fn as_doc_predicate<'a>(&self, reader: &'a SegmentReader) -> Option<super::DocPredicate<'a>> {
179        let fast_field = reader.fast_field(self.field.0)?;
180        let (raw_lo, raw_hi) = self.bound.compile();
181        let use_i64 = self.bound.is_i64();
182        let (i64_lo, i64_hi) = self.bound.i64_bounds();
183
184        Some(Box::new(move |doc_id: DocId| -> bool {
185            let raw = fast_field.get_u64(doc_id);
186            if raw == FAST_FIELD_MISSING {
187                return false;
188            }
189            if use_i64 {
190                let val = crate::structures::fast_field::zigzag_decode(raw);
191                val >= i64_lo && val <= i64_hi
192            } else {
193                raw >= raw_lo && raw <= raw_hi
194            }
195        }))
196    }
197}
198
199// ── RangeScorer ──────────────────────────────────────────────────────────
200
201/// Scorer that scans a fast-field column and yields matching docs.
202///
203/// For u64 and f64 fields, comparison is done in the raw u64 domain (both
204/// use order-preserving encodings). For i64 fields, zigzag encoding does NOT
205/// preserve order, so we decode each value and compare in i64 domain.
206struct RangeScorer<'a> {
207    /// Cached fast-field reader — avoids HashMap lookup per doc in matches()
208    fast_field: &'a crate::structures::fast_field::FastFieldReader,
209    /// For u64/f64: compiled raw bounds. For i64: unused.
210    raw_lo: u64,
211    raw_hi: u64,
212    /// For i64 only: decoded bounds.
213    i64_lo: i64,
214    i64_hi: i64,
215    /// Whether to use i64 comparison path.
216    use_i64: bool,
217    /// Current document position.
218    current: u32,
219    num_docs: u32,
220}
221
222/// Empty scorer returned when the field has no fast-field data.
223struct EmptyRangeScorer;
224
225impl<'a> RangeScorer<'a> {
226    fn new(
227        reader: &'a SegmentReader,
228        field: Field,
229        bound: &RangeBound,
230    ) -> Result<Self, EmptyRangeScorer> {
231        let fast_field = reader.fast_field(field.0).ok_or(EmptyRangeScorer)?;
232        let num_docs = reader.num_docs();
233        let (raw_lo, raw_hi) = bound.compile();
234        let use_i64 = bound.is_i64();
235        let (i64_lo, i64_hi) = bound.i64_bounds();
236
237        let mut scorer = Self {
238            fast_field,
239            raw_lo,
240            raw_hi,
241            i64_lo,
242            i64_hi,
243            use_i64,
244            current: 0,
245            num_docs,
246        };
247
248        // Position on first matching doc
249        if num_docs > 0 && !scorer.matches(0) {
250            scorer.scan_forward();
251        }
252        Ok(scorer)
253    }
254
255    #[inline]
256    fn matches(&self, doc_id: DocId) -> bool {
257        let raw = self.fast_field.get_u64(doc_id);
258        if raw == FAST_FIELD_MISSING {
259            return false;
260        }
261
262        if self.use_i64 {
263            let val = crate::structures::fast_field::zigzag_decode(raw);
264            val >= self.i64_lo && val <= self.i64_hi
265        } else {
266            raw >= self.raw_lo && raw <= self.raw_hi
267        }
268    }
269
270    /// Advance current past non-matching docs.
271    fn scan_forward(&mut self) {
272        loop {
273            self.current += 1;
274            if self.current >= self.num_docs {
275                self.current = self.num_docs;
276                return;
277            }
278            if self.matches(self.current) {
279                return;
280            }
281        }
282    }
283}
284
285impl DocSet for RangeScorer<'_> {
286    fn doc(&self) -> DocId {
287        if self.current >= self.num_docs {
288            TERMINATED
289        } else {
290            self.current
291        }
292    }
293
294    fn advance(&mut self) -> DocId {
295        self.scan_forward();
296        self.doc()
297    }
298
299    fn seek(&mut self, target: DocId) -> DocId {
300        if self.current >= self.num_docs {
301            return TERMINATED;
302        }
303        if target <= self.current {
304            return self.current;
305        }
306        // Position just before target so scan_forward starts at target
307        self.current = target - 1;
308        self.scan_forward();
309        self.doc()
310    }
311
312    fn size_hint(&self) -> u32 {
313        // Upper bound: remaining docs
314        self.num_docs.saturating_sub(self.current)
315    }
316}
317
318impl Scorer for RangeScorer<'_> {
319    fn score(&self) -> Score {
320        1.0
321    }
322}
323
324impl DocSet for EmptyRangeScorer {
325    fn doc(&self) -> DocId {
326        TERMINATED
327    }
328    fn advance(&mut self) -> DocId {
329        TERMINATED
330    }
331    fn seek(&mut self, _target: DocId) -> DocId {
332        TERMINATED
333    }
334    fn size_hint(&self) -> u32 {
335        0
336    }
337}
338
339impl Scorer for EmptyRangeScorer {
340    fn score(&self) -> Score {
341        0.0
342    }
343}
344
345#[cfg(test)]
346mod tests {
347    use super::*;
348
349    #[test]
350    fn test_range_bound_u64_compile() {
351        let b = RangeBound::U64 {
352            min: Some(10),
353            max: Some(100),
354        };
355        let (lo, hi) = b.compile();
356        assert_eq!(lo, 10);
357        assert_eq!(hi, 100);
358    }
359
360    #[test]
361    fn test_range_bound_f64_compile_preserves_order() {
362        let b1 = RangeBound::F64 {
363            min: Some(-1.0),
364            max: Some(1.0),
365        };
366        let (lo1, hi1) = b1.compile();
367        assert!(lo1 < hi1);
368
369        let b2 = RangeBound::F64 {
370            min: Some(0.0),
371            max: Some(100.0),
372        };
373        let (lo2, hi2) = b2.compile();
374        assert!(lo2 < hi2);
375    }
376
377    #[test]
378    fn test_range_bound_open_bounds() {
379        let b = RangeBound::U64 {
380            min: None,
381            max: None,
382        };
383        let (lo, hi) = b.compile();
384        assert_eq!(lo, 0);
385        assert_eq!(hi, u64::MAX - 1);
386    }
387
388    #[test]
389    fn test_range_query_constructors() {
390        let q = RangeQuery::u64(Field(0), Some(10), Some(100));
391        assert_eq!(q.field, Field(0));
392        assert!(matches!(
393            q.bound,
394            RangeBound::U64 {
395                min: Some(10),
396                max: Some(100)
397            }
398        ));
399
400        let q = RangeQuery::i64(Field(1), Some(-50), Some(50));
401        assert!(matches!(
402            q.bound,
403            RangeBound::I64 {
404                min: Some(-50),
405                max: Some(50)
406            }
407        ));
408
409        let q = RangeQuery::f64(Field(2), Some(0.5), Some(9.5));
410        assert!(matches!(q.bound, RangeBound::F64 { .. }));
411    }
412}