lucisearch 0.8.1

Embeddable, in-process search engine — the SQLite/DuckDB of search
Documentation
use crate::core::DocId;

/// Iterates over matching documents and produces scores.
///
/// The `Scorer` owns its iteration state. There is no separate
/// `DocIterator` — `doc_id()`, `next()`, `advance()` are on this trait
/// directly. See [[architecture-query-execution#Core Traits]].
///
/// Object-safe: used as `Box<dyn Scorer>` in the query execution engine.
/// Vtable dispatch cost is amortized by block-of-128 posting processing.
///
/// `Send` but not `Sync`: scorers are mutable (iteration state) and used
/// by one thread at a time within a rayon segment task.
pub trait Scorer: Send {
    /// Current document ID. Valid after `next()` or `advance()` has been
    /// called. Before the first call, the value is unspecified.
    fn doc_id(&self) -> DocId;

    /// Advance to the next matching document.
    /// Returns the new doc ID, or `NO_MORE_DOCS` if exhausted.
    fn next(&mut self) -> DocId;

    /// Advance to the first matching document at or after `target`.
    /// Returns the new doc ID, or `NO_MORE_DOCS` if no match >= target.
    ///
    /// Contract: `target` must be > current `doc_id()`. Callers must not
    /// call `advance()` with a target <= the current position.
    fn advance(&mut self, target: DocId) -> DocId;

    /// Score of the current document.
    /// Only meaningful when `ScoreMode::needs_scores()` is true.
    fn score(&mut self) -> f32;

    /// Optional two-phase iteration support.
    ///
    /// If `Some`, the scorer's `next()`/`advance()` produce an approximate
    /// (superset) of true matches. The caller must invoke `matches()` on the
    /// returned [`TwoPhaseIterator`] to confirm each candidate.
    ///
    /// The return value is determined at construction time and cached —
    /// implementations return the same variant on every call. See
    /// [[architecture-query-execution#Two-Phase Iteration Lifecycle]].
    fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator>;

    /// Upper bound on the score this scorer can produce for any document.
    /// Used by WAND/MaxScore to skip non-competitive documents.
    /// Default: `f32::MAX` (no bound known).
    fn max_score(&self) -> f32 {
        f32::MAX
    }

    /// Upper bound on score for documents in the block containing `doc`.
    /// Returns `max_score()` by default (no block-level information).
    /// Overridden by BlockMaxBm25Scorer for tighter per-block bounds.
    ///
    /// See [[architecture-query-execution#WAND / MaxScore Optimization]].
    fn block_max_score(&mut self, _doc: DocId) -> f32 {
        self.max_score()
    }

    /// Inform this scorer of the minimum competitive score. Documents scoring
    /// below this threshold may be skipped. Default: no-op.
    ///
    /// Used by TopDocsCollector to propagate the heap minimum back to
    /// WAND-aware scorers. See [[architecture-query-execution#WAND / MaxScore Optimization]].
    fn set_min_competitive_score(&mut self, _min_score: f32) {}
}

/// Two-phase verification for approximate scorers.
///
/// Used by phrase queries, geo distance queries, regex queries, and other
/// scorers where the fast approximation (posting list intersection) produces
/// a superset of true matches and each candidate needs expensive
/// verification. See [[architecture-query-execution#Core Traits]].
pub trait TwoPhaseIterator: Send {
    /// Verify that the current document (from the parent `Scorer`) truly
    /// matches. Called after the scorer's `next()`/`advance()` positions on
    /// a candidate.
    fn matches(&mut self) -> bool;

    /// Estimated cost of a single `matches()` call, relative to the cost
    /// of `advance()`. Used to order multiple two-phase scorers
    /// cheapest-first in conjunctions.
    fn match_cost(&self) -> f32;
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::core::NO_MORE_DOCS;

    /// Scorer that immediately reports no matches. Validates that the
    /// Scorer trait is object-safe (`Box<dyn Scorer>` compiles).
    struct EmptyScorer;

    impl Scorer for EmptyScorer {
        fn doc_id(&self) -> DocId {
            NO_MORE_DOCS
        }

        fn next(&mut self) -> DocId {
            NO_MORE_DOCS
        }

        fn advance(&mut self, _target: DocId) -> DocId {
            NO_MORE_DOCS
        }

        fn score(&mut self) -> f32 {
            0.0
        }

        fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
            None
        }
    }

    /// Scorer backed by a sorted Vec of DocIds. Validates the next()/advance()
    /// contract.
    struct VecScorer {
        docs: Vec<DocId>,
        pos: usize,
    }

    impl VecScorer {
        fn new(docs: Vec<DocId>) -> Self {
            Self { docs, pos: 0 }
        }

        fn current(&self) -> DocId {
            if self.pos < self.docs.len() {
                self.docs[self.pos]
            } else {
                NO_MORE_DOCS
            }
        }
    }

    impl Scorer for VecScorer {
        fn doc_id(&self) -> DocId {
            self.current()
        }

        fn next(&mut self) -> DocId {
            if self.pos < self.docs.len() {
                self.pos += 1;
            }
            self.current()
        }

        fn advance(&mut self, target: DocId) -> DocId {
            while self.pos < self.docs.len() && self.docs[self.pos] < target {
                self.pos += 1;
            }
            self.current()
        }

        fn score(&mut self) -> f32 {
            1.0
        }

        fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
            None
        }
    }

    #[test]
    fn empty_scorer_is_object_safe() {
        let scorer: Box<dyn Scorer> = Box::new(EmptyScorer);
        assert_eq!(scorer.doc_id(), NO_MORE_DOCS);
    }

    #[test]
    fn empty_scorer_returns_no_more_docs() {
        let mut scorer = EmptyScorer;
        assert_eq!(scorer.next(), NO_MORE_DOCS);
        assert_eq!(scorer.advance(DocId(0)), NO_MORE_DOCS);
    }

    #[test]
    fn vec_scorer_iterates_in_order() {
        let mut scorer = VecScorer::new(vec![DocId(1), DocId(5), DocId(10)]);
        assert_eq!(scorer.doc_id(), DocId(1));
        assert_eq!(scorer.next(), DocId(5));
        assert_eq!(scorer.next(), DocId(10));
        assert_eq!(scorer.next(), NO_MORE_DOCS);
    }

    #[test]
    fn vec_scorer_advance_skips() {
        let mut scorer = VecScorer::new(vec![DocId(1), DocId(5), DocId(10), DocId(20)]);
        assert_eq!(scorer.advance(DocId(5)), DocId(5));
        assert_eq!(scorer.advance(DocId(15)), DocId(20));
        assert_eq!(scorer.advance(DocId(21)), NO_MORE_DOCS);
    }

    #[test]
    fn vec_scorer_advance_past_end() {
        let mut scorer = VecScorer::new(vec![DocId(1), DocId(2)]);
        assert_eq!(scorer.advance(DocId(100)), NO_MORE_DOCS);
    }

    #[test]
    fn vec_scorer_default_max_score() {
        let scorer = VecScorer::new(vec![]);
        assert_eq!(scorer.max_score(), f32::MAX);
    }
}