lucisearch 0.8.0

Embeddable, in-process search engine — the SQLite/DuckDB of Elasticsearch
Documentation
//! Query DSL: AST types, JSON parser, and execution framework.
//!
//! Implements the `Query → BoundQuery → ScorerSupplier → Scorer` pipeline
//! from [[architecture-query-execution]]. Queries are parsed from JSON, bound to
//! index-level statistics, then per-segment scorers are built.
//!
//! See [[query-dsl]] and [[architecture-query-execution|milestone-2]].

pub mod ast;
pub mod boolean;
pub mod boost;
pub mod boosting;
pub mod constant_score;
pub mod convert;
pub mod dis_max;
pub mod exists;
pub mod function_score;
pub mod fuzzy;
pub mod match_query;
pub mod multi_term;
pub mod nested;
pub mod parser;
pub mod phrase;
pub mod prefix;
pub mod range;
pub mod regex_automaton;
pub mod regexp;
pub mod script_score;
pub mod span;
pub mod term;
pub mod wildcard;

use crate::core::{Result, ScoreMode, Scorer};

use crate::search::searcher::Searcher;
use crate::segment::reader::SegmentReader;

/// A parsed query that can be bound to index statistics for execution.
///
/// Corresponds to `Query` in [[architecture-query-execution#Core Traits]].
/// Object-safe — used as `Box<dyn Query>` in the execution engine.
pub(crate) trait Query: Send + Sync {
    /// Bind this query to index-level statistics, producing a BoundQuery.
    fn bind(&self, searcher: &Searcher, score_mode: ScoreMode) -> Result<Box<dyn BoundQuery>>;
}

/// Segment-independent query state bound to index statistics. Created
/// once per query, then asked for per-segment scorer suppliers.
///
/// Corresponds to `BoundQuery` in [[architecture-query-execution#Core Traits]].
pub(crate) trait BoundQuery: Send + Sync {
    /// Create a scorer supplier for a segment, or `None` if this weight
    /// cannot match any documents in the segment.
    fn scorer_supplier(&self, reader: &SegmentReader) -> Result<Option<Box<dyn ScorerSupplier>>>;

    /// Returns true if this weight matches all documents in every segment.
    /// Used to bypass scorer iteration in agg-only paths.
    fn is_match_all(&self) -> bool {
        false
    }

    /// Optional: score all matching documents directly into a collector,
    /// bypassing the doc-at-a-time Scorer interface. Returns total hits
    /// if supported, None to fall back to doc-at-a-time.
    fn bulk_score(
        &self,
        _reader: &SegmentReader,
        _collector: &mut crate::search::collector::TopDocsCollector,
        _segment_id: crate::core::SegmentId,
    ) -> Result<Option<u64>> {
        Ok(None)
    }

    /// Explain the score for a specific document.
    ///
    /// Default: builds a scorer, advances to the doc, returns a generic
    /// explanation. Override for detailed BM25/boolean breakdowns.
    /// See [[feature-search-explain]].
    fn explain(
        &self,
        reader: &SegmentReader,
        doc: crate::core::DocId,
    ) -> Result<crate::search::Explanation> {
        let supplier = match self.scorer_supplier(reader)? {
            Some(s) => s,
            None => {
                return Ok(crate::search::Explanation::no_match(
                    "no matching docs in segment".into(),
                ));
            }
        };
        let mut scorer = supplier.scorer()?;
        let found = scorer.advance(doc);
        if found != doc {
            return Ok(crate::search::Explanation::no_match(format!(
                "doc {} not matched",
                doc.as_u32()
            )));
        }
        let score = scorer.score();
        Ok(crate::search::Explanation::leaf(
            score,
            format!("score(doc={})", doc.as_u32()),
        ))
    }
}

/// Knows the estimated cost of scoring before a scorer is built.
///
/// This enables cost-based clause ordering in boolean conjunctions:
/// the cheapest supplier becomes the lead iterator.
///
/// Corresponds to `ScorerSupplier` in [[architecture-query-execution#Core Traits]].
pub trait ScorerSupplier: Send {
    /// Estimated number of matching documents in this segment.
    fn cost(&self) -> u64;

    /// Build the actual scorer. `lead_cost` is the cost of the lead
    /// iterator in a conjunction — implementations may use it to choose
    /// between algorithms.
    fn scorer(self: Box<Self>) -> Result<Box<dyn Scorer>>;
}

/// A query whose scorers expose position-level Spans, making it
/// composable under span operators (``SpanFirst``, ``SpanNot``).
///
/// This is the runtime-layer counterpart to the AST's
/// ``SpanExpression`` enum. ``SpanQuery: Query`` via trait upcasting
/// (Rust ≥ 1.86), so a ``Box<dyn SpanQuery>`` can be used wherever a
/// ``Box<dyn Query>`` is expected. Only the four concrete span types
/// implement it: ``SpanTermQuery``, ``SpanNearQuery``,
/// ``SpanNotQuery``, ``SpanFirstQuery``.
///
/// ``SpanFirstQuery.inner`` and ``SpanNotQuery.include/exclude`` hold
/// ``Box<dyn SpanQuery>`` (not ``Box<dyn Query>``), so the type
/// system prevents wrapping a non-span query in a span operator.
pub(crate) trait SpanQuery: Query {
    /// Bind to index stats, returning a span-typed ``BoundSpanQuery``
    /// so span composition preserves typing through the pipeline.
    fn bind_span(
        &self,
        searcher: &Searcher,
        score_mode: ScoreMode,
    ) -> Result<Box<dyn BoundSpanQuery>>;
}

/// Segment-independent span query. ``BoundSpanQuery: BoundQuery`` so
/// it flows through the general pipeline, but adds the required
/// ``span_scorer_supplier`` method used by SpanFirst to apply an end
/// constraint via the ``FilterSpans`` wrapper.
///
/// Implemented only by the bound span types
/// (``BoundSpanTermQuery`` etc.), produced by ``SpanQuery::bind_span``.
pub(crate) trait BoundSpanQuery: BoundQuery {
    /// Build a scorer supplier that additionally filters emitted
    /// spans by end position. SpanFirst uses this to apply its
    /// ``end`` constraint without downcasting. Each span type wires
    /// its concrete ``Spans`` iterator through a ``FilterSpans``
    /// wrapper so the constraint reaches the position level.
    fn span_scorer_supplier(
        &self,
        reader: &SegmentReader,
        max_end: u32,
    ) -> Result<Option<Box<dyn ScorerSupplier>>>;
}

#[cfg(test)]
mod tests {
    use super::*;

    // Verify trait object safety
    #[test]
    fn query_is_object_safe() {
        fn _takes_query(_q: &dyn Query) {}
    }

    #[test]
    fn bound_query_is_object_safe() {
        fn _takes_bound_query(_w: &dyn BoundQuery) {}
    }

    #[test]
    fn scorer_supplier_is_object_safe() {
        fn _takes_ss(_ss: Box<dyn ScorerSupplier>) {}
    }
}