lucisearch 0.8.1

Embeddable, in-process search engine — the SQLite/DuckDB of search
Documentation
//! Range query: match documents where a numeric field falls within a range.
//!
//! Supports gte (>=), gt (>), lte (<=), lt (<) bounds. Scans the columnar
//! store for matching doc IDs — no inverted index involved.
//!
//! See [[elasticsearch-parity]] and [[columnar-storage]].

use crate::core::{DocId, NO_MORE_DOCS, Result, ScoreMode, Scorer, TwoPhaseIterator};

use crate::query::{BoundQuery, Query, ScorerSupplier};
use crate::search::searcher::Searcher;
use crate::segment::reader::SegmentReader;

/// Numeric range query.
pub struct RangeQuery {
    pub field: String,
    pub gte: Option<f64>,
    pub gt: Option<f64>,
    pub lte: Option<f64>,
    pub lt: Option<f64>,
}

impl Query for RangeQuery {
    fn bind(&self, _searcher: &Searcher, _score_mode: ScoreMode) -> Result<Box<dyn BoundQuery>> {
        Ok(Box::new(BoundRangeQuery {
            field: self.field.clone(),
            gte: self.gte,
            gt: self.gt,
            lte: self.lte,
            lt: self.lt,
        }))
    }
}

struct BoundRangeQuery {
    field: String,
    gte: Option<f64>,
    gt: Option<f64>,
    lte: Option<f64>,
    lt: Option<f64>,
}

impl BoundRangeQuery {
    /// Check if a value matches the range bounds.
    fn matches(&self, value: f64) -> bool {
        if let Some(gte) = self.gte {
            if value < gte {
                return false;
            }
        }
        if let Some(gt) = self.gt {
            if value <= gt {
                return false;
            }
        }
        if let Some(lte) = self.lte {
            if value > lte {
                return false;
            }
        }
        if let Some(lt) = self.lt {
            if value >= lt {
                return false;
            }
        }
        true
    }
}

impl BoundQuery for BoundRangeQuery {
    fn scorer_supplier(&self, reader: &SegmentReader) -> Result<Option<Box<dyn ScorerSupplier>>> {
        let field_id = match reader
            .header()
            .fields
            .iter()
            .find(|f| f.field_name == self.field)
            .map(|f| f.field_id)
        {
            Some(id) => id,
            None => return Ok(None),
        };

        let col = match reader.column(field_id) {
            Some(c) => c,
            None => return Ok(None),
        };

        // Check zonemaps: skip segment if range doesn't intersect column stats
        if let Some(stats) = col.stats() {
            let segment_min = stats.min;
            let segment_max = stats.max;

            // If our lower bound is above the segment max, no matches
            if let Some(gte) = self.gte {
                if gte > segment_max {
                    return Ok(None);
                }
            }
            if let Some(gt) = self.gt {
                if gt >= segment_max {
                    return Ok(None);
                }
            }
            // If our upper bound is below the segment min, no matches
            if let Some(lte) = self.lte {
                if lte < segment_min {
                    return Ok(None);
                }
            }
            if let Some(lt) = self.lt {
                if lt <= segment_min {
                    return Ok(None);
                }
            }
        }

        // Pre-load matching doc IDs into a sorted vec.
        // This is O(doc_count) but avoids per-doc column reads during scoring.
        let doc_count = col.doc_count();
        let mut matching_docs: Vec<u32> = Vec::new();
        for i in 0..doc_count {
            if let Some(v) = col.numeric_value(i) {
                if self.matches(v) {
                    matching_docs.push(i);
                }
            }
        }

        if matching_docs.is_empty() {
            return Ok(None);
        }

        Ok(Some(Box::new(RangeScorerSupplier { matching_docs })))
    }
}

struct RangeScorerSupplier {
    matching_docs: Vec<u32>,
}

impl ScorerSupplier for RangeScorerSupplier {
    fn cost(&self) -> u64 {
        self.matching_docs.len() as u64
    }

    fn scorer(self: Box<Self>) -> Result<Box<dyn Scorer>> {
        Ok(Box::new(RangeScorer {
            matching_docs: self.matching_docs,
            pos: 0,
        }))
    }
}

struct RangeScorer {
    matching_docs: Vec<u32>,
    pos: usize,
}

impl Scorer for RangeScorer {
    fn doc_id(&self) -> DocId {
        if self.pos < self.matching_docs.len() {
            DocId::new(self.matching_docs[self.pos])
        } else {
            NO_MORE_DOCS
        }
    }

    fn next(&mut self) -> DocId {
        self.pos += 1;
        self.doc_id()
    }

    fn advance(&mut self, target: DocId) -> DocId {
        let target_u32 = target.as_u32();
        while self.pos < self.matching_docs.len() && self.matching_docs[self.pos] < target_u32 {
            self.pos += 1;
        }
        self.doc_id()
    }

    fn score(&mut self) -> f32 {
        1.0 // Range queries are filter-context (constant score)
    }

    fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
        None
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::columnar::writer::ColumnValue;
    use crate::core::SegmentId;
    use crate::mapping::{FieldType, Mapping};
    use crate::segment::builder::SegmentBuilder;

    fn build_numeric_segment(values: &[f64]) -> SegmentReader {
        let schema = Mapping::builder().field("price", FieldType::Float).build();
        let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
        let field_id = schema.field_id("price").unwrap();

        for &v in values {
            builder.add_document(&[], b"{}");
            builder.add_column_value(field_id, ColumnValue::F64(v));
        }

        SegmentReader::open(builder.build()).unwrap()
    }

    #[test]
    fn range_gte_lte() {
        let reader = build_numeric_segment(&[1.0, 5.0, 10.0, 15.0, 20.0]);
        let store = crate::search::segment_store::SegmentStore::new(
            vec![reader],
            crate::analysis::AnalyzerRegistry::new(),
            None,
            None,
        );
        let searcher = Searcher::new(&store);

        let query = RangeQuery {
            field: "price".into(),
            gte: Some(5.0),
            gt: None,
            lte: Some(15.0),
            lt: None,
        };

        let results = searcher.search_query(&query, 10, 0).unwrap();
        assert_eq!(results.total_hits.value, 3); // 5.0, 10.0, 15.0
    }

    #[test]
    fn range_gt_lt() {
        let reader = build_numeric_segment(&[1.0, 5.0, 10.0, 15.0, 20.0]);
        let store = crate::search::segment_store::SegmentStore::new(
            vec![reader],
            crate::analysis::AnalyzerRegistry::new(),
            None,
            None,
        );
        let searcher = Searcher::new(&store);

        let query = RangeQuery {
            field: "price".into(),
            gte: None,
            gt: Some(5.0),
            lte: None,
            lt: Some(15.0),
        };

        let results = searcher.search_query(&query, 10, 0).unwrap();
        assert_eq!(results.total_hits.value, 1); // 10.0 only
    }

    #[test]
    fn range_no_matches() {
        let reader = build_numeric_segment(&[1.0, 2.0, 3.0]);
        let store = crate::search::segment_store::SegmentStore::new(
            vec![reader],
            crate::analysis::AnalyzerRegistry::new(),
            None,
            None,
        );
        let searcher = Searcher::new(&store);

        let query = RangeQuery {
            field: "price".into(),
            gte: Some(100.0),
            gt: None,
            lte: None,
            lt: None,
        };

        let results = searcher.search_query(&query, 10, 0).unwrap();
        assert_eq!(results.total_hits.value, 0);
    }

    #[test]
    fn range_all_match() {
        let reader = build_numeric_segment(&[5.0, 10.0, 15.0]);
        let store = crate::search::segment_store::SegmentStore::new(
            vec![reader],
            crate::analysis::AnalyzerRegistry::new(),
            None,
            None,
        );
        let searcher = Searcher::new(&store);

        let query = RangeQuery {
            field: "price".into(),
            gte: Some(0.0),
            gt: None,
            lte: Some(100.0),
            lt: None,
        };

        let results = searcher.search_query(&query, 10, 0).unwrap();
        assert_eq!(results.total_hits.value, 3);
    }

    #[test]
    fn range_open_ended() {
        let reader = build_numeric_segment(&[1.0, 5.0, 10.0, 15.0, 20.0]);
        let store = crate::search::segment_store::SegmentStore::new(
            vec![reader],
            crate::analysis::AnalyzerRegistry::new(),
            None,
            None,
        );
        let searcher = Searcher::new(&store);

        // gte only (no upper bound)
        let query = RangeQuery {
            field: "price".into(),
            gte: Some(10.0),
            gt: None,
            lte: None,
            lt: None,
        };

        let results = searcher.search_query(&query, 10, 0).unwrap();
        assert_eq!(results.total_hits.value, 3); // 10.0, 15.0, 20.0
    }

    #[test]
    fn range_missing_field() {
        let reader = build_numeric_segment(&[1.0, 5.0]);
        let store = crate::search::segment_store::SegmentStore::new(
            vec![reader],
            crate::analysis::AnalyzerRegistry::new(),
            None,
            None,
        );
        let searcher = Searcher::new(&store);

        let query = RangeQuery {
            field: "nonexistent".into(),
            gte: Some(0.0),
            gt: None,
            lte: None,
            lt: None,
        };

        let results = searcher.search_query(&query, 10, 0).unwrap();
        assert_eq!(results.total_hits.value, 0);
    }
}