hermes-core 1.8.33

Core async search engine library with WASM support
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
//! Range query for fast-field numeric filtering.
//!
//! `RangeQuery` produces a `RangeScorer` that scans a fast-field column and
//! yields documents whose value falls within the specified bounds. Score is
//! always 1.0 — this is a pure filter query.
//!
//! Supports u64, i64, and f64 fields. For i64/f64, bounds are encoded to the
//! same sortable-u64 representation used by fast fields so that a single raw
//! u64 comparison covers all types.
//!
//! When placed in a `BooleanQuery` MUST clause, the `BooleanScorer`'s
//! seek-based intersection makes this efficient even on large segments.

use crate::dsl::Field;
use crate::segment::SegmentReader;
use crate::structures::TERMINATED;
use crate::structures::fast_field::{FAST_FIELD_MISSING, f64_to_sortable_u64, zigzag_encode};
use crate::{DocId, Score};

use super::docset::DocSet;
use super::traits::{CountFuture, Query, Scorer, ScorerFuture};

// ── Typed range bounds ───────────────────────────────────────────────────

/// Inclusive range bounds in the user's type domain.
#[derive(Debug, Clone)]
pub enum RangeBound {
    /// u64 range — stored raw
    U64 { min: Option<u64>, max: Option<u64> },
    /// i64 range — will be zigzag-encoded for comparison
    I64 { min: Option<i64>, max: Option<i64> },
    /// f64 range — will be sortable-encoded for comparison
    F64 { min: Option<f64>, max: Option<f64> },
}

impl RangeBound {
    /// Compile to raw u64 inclusive bounds suitable for direct fast-field comparison.
    ///
    /// Returns `(low, high)` where both are inclusive. Missing bounds become
    /// 0 / u64::MAX-1 (reserving u64::MAX for FAST_FIELD_MISSING sentinel).
    fn compile(&self) -> (u64, u64) {
        match self {
            RangeBound::U64 { min, max } => {
                let lo = min.unwrap_or(0);
                let hi = max.unwrap_or(u64::MAX - 1);
                (lo, hi)
            }
            RangeBound::I64 { min, max } => {
                // zigzag encoding preserves magnitude, not order.
                // For correct range comparison on i64, we use sortable encoding
                // (same as f64 but cast through bits). However, fast fields store
                // i64 as zigzag. So we must decode per-doc and compare in i64 domain.
                // We store the raw i64 bounds and handle comparison in the scorer.
                //
                // Sentinel: use a special marker to tell the scorer to use i64 path.
                // We'll handle this in the scorer directly.
                let lo = min.map(zigzag_encode).unwrap_or(0);
                let hi = max.map(zigzag_encode).unwrap_or(u64::MAX - 1);
                (lo, hi)
            }
            RangeBound::F64 { min, max } => {
                let lo = min.map(f64_to_sortable_u64).unwrap_or(0);
                let hi = max.map(f64_to_sortable_u64).unwrap_or(u64::MAX - 1);
                (lo, hi)
            }
        }
    }

    /// Whether this bound requires per-doc i64 decoding (zigzag doesn't preserve order).
    fn is_i64(&self) -> bool {
        matches!(self, RangeBound::I64 { .. })
    }

    /// Get the raw i64 bounds for the i64 path.
    fn i64_bounds(&self) -> (i64, i64) {
        match self {
            RangeBound::I64 { min, max } => (min.unwrap_or(i64::MIN), max.unwrap_or(i64::MAX)),
            _ => (i64::MIN, i64::MAX),
        }
    }
}

// ── RangeQuery ───────────────────────────────────────────────────────────

/// Fast-field range query.
///
/// Scans all documents in a segment and yields those whose fast-field value
/// falls within `[min, max]` (inclusive). Score is always 1.0.
#[derive(Debug, Clone)]
pub struct RangeQuery {
    pub field: Field,
    pub bound: RangeBound,
}

impl std::fmt::Display for RangeQuery {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match &self.bound {
            RangeBound::U64 { min, max } => write!(
                f,
                "Range({}:[{} TO {}])",
                self.field.0,
                min.map_or("*".to_string(), |v| v.to_string()),
                max.map_or("*".to_string(), |v| v.to_string()),
            ),
            RangeBound::I64 { min, max } => write!(
                f,
                "Range({}:[{} TO {}])",
                self.field.0,
                min.map_or("*".to_string(), |v| v.to_string()),
                max.map_or("*".to_string(), |v| v.to_string()),
            ),
            RangeBound::F64 { min, max } => write!(
                f,
                "Range({}:[{} TO {}])",
                self.field.0,
                min.map_or("*".to_string(), |v| v.to_string()),
                max.map_or("*".to_string(), |v| v.to_string()),
            ),
        }
    }
}

impl RangeQuery {
    pub fn new(field: Field, bound: RangeBound) -> Self {
        Self { field, bound }
    }

    /// Convenience: u64 range
    pub fn u64(field: Field, min: Option<u64>, max: Option<u64>) -> Self {
        Self::new(field, RangeBound::U64 { min, max })
    }

    /// Convenience: i64 range
    pub fn i64(field: Field, min: Option<i64>, max: Option<i64>) -> Self {
        Self::new(field, RangeBound::I64 { min, max })
    }

    /// Convenience: f64 range
    pub fn f64(field: Field, min: Option<f64>, max: Option<f64>) -> Self {
        Self::new(field, RangeBound::F64 { min, max })
    }
}

impl Query for RangeQuery {
    fn scorer<'a>(&self, reader: &'a SegmentReader, _limit: usize) -> ScorerFuture<'a> {
        let field = self.field;
        let bound = self.bound.clone();
        Box::pin(async move {
            match RangeScorer::new(reader, field, &bound) {
                Ok(scorer) => Ok(Box::new(scorer) as Box<dyn Scorer>),
                Err(_) => Ok(Box::new(EmptyRangeScorer) as Box<dyn Scorer>),
            }
        })
    }

    #[cfg(feature = "sync")]
    fn scorer_sync<'a>(
        &self,
        reader: &'a SegmentReader,
        _limit: usize,
    ) -> crate::Result<Box<dyn Scorer + 'a>> {
        match RangeScorer::new(reader, self.field, &self.bound) {
            Ok(scorer) => Ok(Box::new(scorer) as Box<dyn Scorer + 'a>),
            Err(_) => Ok(Box::new(EmptyRangeScorer) as Box<dyn Scorer + 'a>),
        }
    }

    fn count_estimate<'a>(&self, reader: &'a SegmentReader) -> CountFuture<'a> {
        let num_docs = reader.num_docs();
        // Rough estimate: half the segment (we don't know selectivity)
        Box::pin(async move { Ok(num_docs / 2) })
    }

    fn is_filter(&self) -> bool {
        true
    }

    fn as_doc_predicate<'a>(&self, reader: &'a SegmentReader) -> Option<super::DocPredicate<'a>> {
        let fast_field = reader.fast_field(self.field.0)?;
        let (raw_lo, raw_hi) = self.bound.compile();
        let use_i64 = self.bound.is_i64();
        let (i64_lo, i64_hi) = self.bound.i64_bounds();

        Some(Box::new(move |doc_id: DocId| -> bool {
            let raw = fast_field.get_u64(doc_id);
            if raw == FAST_FIELD_MISSING {
                return false;
            }
            if use_i64 {
                let val = crate::structures::fast_field::zigzag_decode(raw);
                val >= i64_lo && val <= i64_hi
            } else {
                raw >= raw_lo && raw <= raw_hi
            }
        }))
    }

    fn as_doc_bitset(&self, reader: &SegmentReader) -> Option<super::DocBitset> {
        // Build bitset from fast-field scan: O(N). Slower than posting-list-based
        // bitsets but still faster than per-call predicate in BMP (~2ns lookup vs ~30ns).
        let pred = self.as_doc_predicate(reader)?;
        Some(super::DocBitset::from_predicate(reader.num_docs(), &*pred))
    }
}

// ── RangeScorer ──────────────────────────────────────────────────────────

/// Scorer that scans a fast-field column and yields matching docs.
///
/// For u64 and f64 fields, comparison is done in the raw u64 domain (both
/// use order-preserving encodings). For i64 fields, zigzag encoding does NOT
/// preserve order, so we decode each value and compare in i64 domain.
struct RangeScorer<'a> {
    /// Cached fast-field reader — avoids HashMap lookup per doc in matches()
    fast_field: &'a crate::structures::fast_field::FastFieldReader,
    /// For u64/f64: compiled raw bounds. For i64: unused.
    raw_lo: u64,
    raw_hi: u64,
    /// For i64 only: decoded bounds.
    i64_lo: i64,
    i64_hi: i64,
    /// Whether to use i64 comparison path.
    use_i64: bool,
    /// Current document position.
    current: u32,
    num_docs: u32,
}

/// Empty scorer returned when the field has no fast-field data.
struct EmptyRangeScorer;

impl<'a> RangeScorer<'a> {
    fn new(
        reader: &'a SegmentReader,
        field: Field,
        bound: &RangeBound,
    ) -> Result<Self, EmptyRangeScorer> {
        let fast_field = reader.fast_field(field.0).ok_or(EmptyRangeScorer)?;
        let num_docs = reader.num_docs();
        let (raw_lo, raw_hi) = bound.compile();
        let use_i64 = bound.is_i64();
        let (i64_lo, i64_hi) = bound.i64_bounds();

        let mut scorer = Self {
            fast_field,
            raw_lo,
            raw_hi,
            i64_lo,
            i64_hi,
            use_i64,
            current: 0,
            num_docs,
        };

        // Position on first matching doc
        if num_docs > 0 && !scorer.matches(0) {
            scorer.scan_forward();
        }
        Ok(scorer)
    }

    #[inline]
    fn matches(&self, doc_id: DocId) -> bool {
        let raw = self.fast_field.get_u64(doc_id);
        if raw == FAST_FIELD_MISSING {
            return false;
        }

        if self.use_i64 {
            let val = crate::structures::fast_field::zigzag_decode(raw);
            val >= self.i64_lo && val <= self.i64_hi
        } else {
            raw >= self.raw_lo && raw <= self.raw_hi
        }
    }

    /// Advance current past non-matching docs.
    fn scan_forward(&mut self) {
        loop {
            self.current += 1;
            if self.current >= self.num_docs {
                self.current = self.num_docs;
                return;
            }
            if self.matches(self.current) {
                return;
            }
        }
    }
}

impl DocSet for RangeScorer<'_> {
    fn doc(&self) -> DocId {
        if self.current >= self.num_docs {
            TERMINATED
        } else {
            self.current
        }
    }

    fn advance(&mut self) -> DocId {
        self.scan_forward();
        self.doc()
    }

    fn seek(&mut self, target: DocId) -> DocId {
        if self.current >= self.num_docs {
            return TERMINATED;
        }
        if target <= self.current {
            return self.current;
        }
        // Position just before target so scan_forward starts at target
        self.current = target - 1;
        self.scan_forward();
        self.doc()
    }

    fn size_hint(&self) -> u32 {
        // Upper bound: remaining docs
        self.num_docs.saturating_sub(self.current)
    }
}

impl Scorer for RangeScorer<'_> {
    fn score(&self) -> Score {
        1.0
    }
}

impl DocSet for EmptyRangeScorer {
    fn doc(&self) -> DocId {
        TERMINATED
    }
    fn advance(&mut self) -> DocId {
        TERMINATED
    }
    fn seek(&mut self, _target: DocId) -> DocId {
        TERMINATED
    }
    fn size_hint(&self) -> u32 {
        0
    }
}

impl Scorer for EmptyRangeScorer {
    fn score(&self) -> Score {
        0.0
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_range_bound_u64_compile() {
        let b = RangeBound::U64 {
            min: Some(10),
            max: Some(100),
        };
        let (lo, hi) = b.compile();
        assert_eq!(lo, 10);
        assert_eq!(hi, 100);
    }

    #[test]
    fn test_range_bound_f64_compile_preserves_order() {
        let b1 = RangeBound::F64 {
            min: Some(-1.0),
            max: Some(1.0),
        };
        let (lo1, hi1) = b1.compile();
        assert!(lo1 < hi1);

        let b2 = RangeBound::F64 {
            min: Some(0.0),
            max: Some(100.0),
        };
        let (lo2, hi2) = b2.compile();
        assert!(lo2 < hi2);
    }

    #[test]
    fn test_range_bound_open_bounds() {
        let b = RangeBound::U64 {
            min: None,
            max: None,
        };
        let (lo, hi) = b.compile();
        assert_eq!(lo, 0);
        assert_eq!(hi, u64::MAX - 1);
    }

    #[test]
    fn test_range_query_constructors() {
        let q = RangeQuery::u64(Field(0), Some(10), Some(100));
        assert_eq!(q.field, Field(0));
        assert!(matches!(
            q.bound,
            RangeBound::U64 {
                min: Some(10),
                max: Some(100)
            }
        ));

        let q = RangeQuery::i64(Field(1), Some(-50), Some(50));
        assert!(matches!(
            q.bound,
            RangeBound::I64 {
                min: Some(-50),
                max: Some(50)
            }
        ));

        let q = RangeQuery::f64(Field(2), Some(0.5), Some(9.5));
        assert!(matches!(q.bound, RangeBound::F64 { .. }));
    }
}