Skip to main content

hermes_core/query/
range.rs

1//! Range query for fast-field numeric filtering.
2//!
3//! `RangeQuery` produces a `RangeScorer` that scans a fast-field column and
4//! yields documents whose value falls within the specified bounds. Score is
5//! always 1.0 — this is a pure filter query.
6//!
7//! Supports u64, i64, and f64 fields. For i64/f64, bounds are encoded to the
8//! same sortable-u64 representation used by fast fields so that a single raw
9//! u64 comparison covers all types.
10//!
11//! When placed in a `BooleanQuery` MUST clause, the `BooleanScorer`'s
12//! seek-based intersection makes this efficient even on large segments.
13
14use crate::dsl::Field;
15use crate::segment::SegmentReader;
16use crate::structures::TERMINATED;
17use crate::structures::fast_field::{FAST_FIELD_MISSING, f64_to_sortable_u64, zigzag_encode};
18use crate::{DocId, Score};
19
20use super::docset::DocSet;
21use super::traits::{CountFuture, Query, Scorer, ScorerFuture};
22
23// ── Typed range bounds ───────────────────────────────────────────────────
24
25/// Inclusive range bounds in the user's type domain.
26#[derive(Debug, Clone)]
27pub enum RangeBound {
28    /// u64 range — stored raw
29    U64 { min: Option<u64>, max: Option<u64> },
30    /// i64 range — will be zigzag-encoded for comparison
31    I64 { min: Option<i64>, max: Option<i64> },
32    /// f64 range — will be sortable-encoded for comparison
33    F64 { min: Option<f64>, max: Option<f64> },
34}
35
36impl RangeBound {
37    /// Compile to raw u64 inclusive bounds suitable for direct fast-field comparison.
38    ///
39    /// Returns `(low, high)` where both are inclusive. Missing bounds become
40    /// 0 / u64::MAX-1 (reserving u64::MAX for FAST_FIELD_MISSING sentinel).
41    fn compile(&self) -> (u64, u64) {
42        match self {
43            RangeBound::U64 { min, max } => {
44                let lo = min.unwrap_or(0);
45                let hi = max.unwrap_or(u64::MAX - 1);
46                (lo, hi)
47            }
48            RangeBound::I64 { min, max } => {
49                // zigzag encoding preserves magnitude, not order.
50                // For correct range comparison on i64, we use sortable encoding
51                // (same as f64 but cast through bits). However, fast fields store
52                // i64 as zigzag. So we must decode per-doc and compare in i64 domain.
53                // We store the raw i64 bounds and handle comparison in the scorer.
54                //
55                // Sentinel: use a special marker to tell the scorer to use i64 path.
56                // We'll handle this in the scorer directly.
57                let lo = min.map(zigzag_encode).unwrap_or(0);
58                let hi = max.map(zigzag_encode).unwrap_or(u64::MAX - 1);
59                (lo, hi)
60            }
61            RangeBound::F64 { min, max } => {
62                let lo = min.map(f64_to_sortable_u64).unwrap_or(0);
63                let hi = max.map(f64_to_sortable_u64).unwrap_or(u64::MAX - 1);
64                (lo, hi)
65            }
66        }
67    }
68
69    /// Whether this bound requires per-doc i64 decoding (zigzag doesn't preserve order).
70    fn is_i64(&self) -> bool {
71        matches!(self, RangeBound::I64 { .. })
72    }
73
74    /// Get the raw i64 bounds for the i64 path.
75    fn i64_bounds(&self) -> (i64, i64) {
76        match self {
77            RangeBound::I64 { min, max } => (min.unwrap_or(i64::MIN), max.unwrap_or(i64::MAX)),
78            _ => (i64::MIN, i64::MAX),
79        }
80    }
81}
82
83// ── RangeQuery ───────────────────────────────────────────────────────────
84
85/// Fast-field range query.
86///
87/// Scans all documents in a segment and yields those whose fast-field value
88/// falls within `[min, max]` (inclusive). Score is always 1.0.
89#[derive(Debug, Clone)]
90pub struct RangeQuery {
91    pub field: Field,
92    pub bound: RangeBound,
93}
94
95impl std::fmt::Display for RangeQuery {
96    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
97        match &self.bound {
98            RangeBound::U64 { min, max } => write!(
99                f,
100                "Range({}:[{} TO {}])",
101                self.field.0,
102                min.map_or("*".to_string(), |v| v.to_string()),
103                max.map_or("*".to_string(), |v| v.to_string()),
104            ),
105            RangeBound::I64 { min, max } => write!(
106                f,
107                "Range({}:[{} TO {}])",
108                self.field.0,
109                min.map_or("*".to_string(), |v| v.to_string()),
110                max.map_or("*".to_string(), |v| v.to_string()),
111            ),
112            RangeBound::F64 { min, max } => write!(
113                f,
114                "Range({}:[{} TO {}])",
115                self.field.0,
116                min.map_or("*".to_string(), |v| v.to_string()),
117                max.map_or("*".to_string(), |v| v.to_string()),
118            ),
119        }
120    }
121}
122
123impl RangeQuery {
124    pub fn new(field: Field, bound: RangeBound) -> Self {
125        Self { field, bound }
126    }
127
128    /// Convenience: u64 range
129    pub fn u64_range(field: Field, min: Option<u64>, max: Option<u64>) -> Self {
130        Self::new(field, RangeBound::U64 { min, max })
131    }
132
133    /// Convenience: i64 range
134    pub fn i64_range(field: Field, min: Option<i64>, max: Option<i64>) -> Self {
135        Self::new(field, RangeBound::I64 { min, max })
136    }
137
138    /// Convenience: f64 range
139    pub fn f64_range(field: Field, min: Option<f64>, max: Option<f64>) -> Self {
140        Self::new(field, RangeBound::F64 { min, max })
141    }
142}
143
144impl Query for RangeQuery {
145    fn scorer<'a>(&self, reader: &'a SegmentReader, _limit: usize) -> ScorerFuture<'a> {
146        let field = self.field;
147        let bound = self.bound.clone();
148        Box::pin(async move {
149            match RangeScorer::new(reader, field, &bound) {
150                Ok(scorer) => Ok(Box::new(scorer) as Box<dyn Scorer>),
151                Err(_) => Ok(Box::new(EmptyRangeScorer) as Box<dyn Scorer>),
152            }
153        })
154    }
155
156    #[cfg(feature = "sync")]
157    fn scorer_sync<'a>(
158        &self,
159        reader: &'a SegmentReader,
160        _limit: usize,
161    ) -> crate::Result<Box<dyn Scorer + 'a>> {
162        match RangeScorer::new(reader, self.field, &self.bound) {
163            Ok(scorer) => Ok(Box::new(scorer) as Box<dyn Scorer + 'a>),
164            Err(_) => Ok(Box::new(EmptyRangeScorer) as Box<dyn Scorer + 'a>),
165        }
166    }
167
168    fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a> {
169        let num_docs = reader.num_docs();
170        // Rough estimate: half the segment (we don't know selectivity)
171        Box::pin(async move { Ok(num_docs / 2) })
172    }
173}
174
175// ── RangeScorer ──────────────────────────────────────────────────────────
176
177/// Scorer that scans a fast-field column and yields matching docs.
178///
179/// For u64 and f64 fields, comparison is done in the raw u64 domain (both
180/// use order-preserving encodings). For i64 fields, zigzag encoding does NOT
181/// preserve order, so we decode each value and compare in i64 domain.
182struct RangeScorer<'a> {
183    /// Cached fast-field reader — avoids HashMap lookup per doc in matches()
184    fast_field: &'a crate::structures::fast_field::FastFieldReader,
185    /// For u64/f64: compiled raw bounds. For i64: unused.
186    raw_lo: u64,
187    raw_hi: u64,
188    /// For i64 only: decoded bounds.
189    i64_lo: i64,
190    i64_hi: i64,
191    /// Whether to use i64 comparison path.
192    use_i64: bool,
193    /// Current document position.
194    current: u32,
195    num_docs: u32,
196}
197
198/// Empty scorer returned when the field has no fast-field data.
199struct EmptyRangeScorer;
200
201impl<'a> RangeScorer<'a> {
202    fn new(
203        reader: &'a SegmentReader,
204        field: Field,
205        bound: &RangeBound,
206    ) -> Result<Self, EmptyRangeScorer> {
207        let fast_field = reader.fast_field(field.0).ok_or(EmptyRangeScorer)?;
208        let num_docs = reader.num_docs();
209        let (raw_lo, raw_hi) = bound.compile();
210        let use_i64 = bound.is_i64();
211        let (i64_lo, i64_hi) = bound.i64_bounds();
212
213        let mut scorer = Self {
214            fast_field,
215            raw_lo,
216            raw_hi,
217            i64_lo,
218            i64_hi,
219            use_i64,
220            current: 0,
221            num_docs,
222        };
223
224        // Position on first matching doc
225        if num_docs > 0 && !scorer.matches(0) {
226            scorer.scan_forward();
227        }
228        Ok(scorer)
229    }
230
231    #[inline]
232    fn matches(&self, doc_id: DocId) -> bool {
233        let raw = self.fast_field.get_u64(doc_id);
234        if raw == FAST_FIELD_MISSING {
235            return false;
236        }
237
238        if self.use_i64 {
239            let val = crate::structures::fast_field::zigzag_decode(raw);
240            val >= self.i64_lo && val <= self.i64_hi
241        } else {
242            raw >= self.raw_lo && raw <= self.raw_hi
243        }
244    }
245
246    /// Advance current past non-matching docs.
247    fn scan_forward(&mut self) {
248        loop {
249            self.current += 1;
250            if self.current >= self.num_docs {
251                self.current = self.num_docs;
252                return;
253            }
254            if self.matches(self.current) {
255                return;
256            }
257        }
258    }
259}
260
261impl DocSet for RangeScorer<'_> {
262    fn doc(&self) -> DocId {
263        if self.current >= self.num_docs {
264            TERMINATED
265        } else {
266            self.current
267        }
268    }
269
270    fn advance(&mut self) -> DocId {
271        self.scan_forward();
272        self.doc()
273    }
274
275    fn seek(&mut self, target: DocId) -> DocId {
276        if self.current >= self.num_docs {
277            return TERMINATED;
278        }
279        if target <= self.current {
280            return self.current;
281        }
282        // Position just before target so scan_forward starts at target
283        self.current = target - 1;
284        self.scan_forward();
285        self.doc()
286    }
287
288    fn size_hint(&self) -> u32 {
289        // Upper bound: remaining docs
290        self.num_docs.saturating_sub(self.current)
291    }
292}
293
294impl Scorer for RangeScorer<'_> {
295    fn score(&self) -> Score {
296        1.0
297    }
298}
299
300impl DocSet for EmptyRangeScorer {
301    fn doc(&self) -> DocId {
302        TERMINATED
303    }
304    fn advance(&mut self) -> DocId {
305        TERMINATED
306    }
307    fn seek(&mut self, _target: DocId) -> DocId {
308        TERMINATED
309    }
310    fn size_hint(&self) -> u32 {
311        0
312    }
313}
314
315impl Scorer for EmptyRangeScorer {
316    fn score(&self) -> Score {
317        0.0
318    }
319}
320
321#[cfg(test)]
322mod tests {
323    use super::*;
324
325    #[test]
326    fn test_range_bound_u64_compile() {
327        let b = RangeBound::U64 {
328            min: Some(10),
329            max: Some(100),
330        };
331        let (lo, hi) = b.compile();
332        assert_eq!(lo, 10);
333        assert_eq!(hi, 100);
334    }
335
336    #[test]
337    fn test_range_bound_f64_compile_preserves_order() {
338        let b1 = RangeBound::F64 {
339            min: Some(-1.0),
340            max: Some(1.0),
341        };
342        let (lo1, hi1) = b1.compile();
343        assert!(lo1 < hi1);
344
345        let b2 = RangeBound::F64 {
346            min: Some(0.0),
347            max: Some(100.0),
348        };
349        let (lo2, hi2) = b2.compile();
350        assert!(lo2 < hi2);
351    }
352
353    #[test]
354    fn test_range_bound_open_bounds() {
355        let b = RangeBound::U64 {
356            min: None,
357            max: None,
358        };
359        let (lo, hi) = b.compile();
360        assert_eq!(lo, 0);
361        assert_eq!(hi, u64::MAX - 1);
362    }
363
364    #[test]
365    fn test_range_query_constructors() {
366        let q = RangeQuery::u64_range(Field(0), Some(10), Some(100));
367        assert_eq!(q.field, Field(0));
368        assert!(matches!(
369            q.bound,
370            RangeBound::U64 {
371                min: Some(10),
372                max: Some(100)
373            }
374        ));
375
376        let q = RangeQuery::i64_range(Field(1), Some(-50), Some(50));
377        assert!(matches!(
378            q.bound,
379            RangeBound::I64 {
380                min: Some(-50),
381                max: Some(50)
382            }
383        ));
384
385        let q = RangeQuery::f64_range(Field(2), Some(0.5), Some(9.5));
386        assert!(matches!(q.bound, RangeBound::F64 { .. }));
387    }
388}