lucisearch 0.8.0

Embeddable, in-process search engine — the SQLite/DuckDB of Elasticsearch
Documentation
//! Nested query: search within nested object arrays with cross-object isolation.
//!
//! Runs an inner query against nested (hidden) documents, then joins matches
//! back to parent documents. This prevents the cross-object matching problem.
//!
//! See [[nested-documents]] and [[architecture-overview|milestone-7]].

use crate::core::{DocId, NO_MORE_DOCS, Result, ScoreMode, Scorer, TwoPhaseIterator};

use crate::query::{BoundQuery, Query, ScorerSupplier};
use crate::search::searcher::Searcher;
use crate::segment::reader::SegmentReader;

/// Query within nested documents at a given path.
pub struct NestedQuery {
    pub path: String,
    pub(crate) inner: Box<dyn Query>,
    pub inner_hits: Option<crate::query::ast::InnerHitsConfig>,
}

impl Query for NestedQuery {
    fn bind(&self, searcher: &Searcher, score_mode: ScoreMode) -> Result<Box<dyn BoundQuery>> {
        let inner_weight = self.inner.bind(searcher, score_mode)?;
        // If inner_hits requested, create a second weight for resolution
        let inner_hits_weight = if self.inner_hits.is_some() {
            Some(self.inner.bind(searcher, score_mode)?)
        } else {
            None
        };
        Ok(Box::new(BoundNestedQuery {
            _path: self.path.clone(),
            inner: inner_weight,
            _inner_hits_config: self.inner_hits.clone(),
            _inner_hits_weight: inner_hits_weight,
        }))
    }
}

/// Metadata for resolving inner hits after search completes.
pub struct InnerHitSpec {
    pub name: String,
    pub path: String,
    pub config: crate::query::ast::InnerHitsConfig,
    pub(crate) weight: Box<dyn BoundQuery>,
}

struct BoundNestedQuery {
    _path: String,
    inner: Box<dyn BoundQuery>,
    _inner_hits_config: Option<crate::query::ast::InnerHitsConfig>,
    _inner_hits_weight: Option<Box<dyn BoundQuery>>,
}

impl BoundQuery for BoundNestedQuery {
    fn scorer_supplier(&self, reader: &SegmentReader) -> Result<Option<Box<dyn ScorerSupplier>>> {
        // Get the parent bitset from the segment
        let parent_bitset = match reader.parent_bitset() {
            Some(b) => b,
            None => return Ok(None), // no nested docs in this segment
        };

        // Get the inner scorer
        let inner_supplier = match self.inner.scorer_supplier(reader)? {
            Some(s) => s,
            None => return Ok(None),
        };

        let inner_scorer = inner_supplier.scorer()?;

        // Pre-materialize: iterate inner scorer, map each nested doc to its parent
        let mut parent_matches: Vec<(u32, f32)> = Vec::new();
        let mut scorer = inner_scorer;
        let mut last_parent = u32::MAX;

        while scorer.doc_id() != NO_MORE_DOCS {
            let nested_doc = scorer.doc_id().as_u32();
            let score = scorer.score();

            // Find parent: scan backward in parent_bitset
            let parent_doc = find_parent(&parent_bitset, nested_doc);

            if parent_doc != last_parent {
                parent_matches.push((parent_doc, score));
                last_parent = parent_doc;
            } else if let Some(last) = parent_matches.last_mut() {
                // Same parent — take max score (simplified score_mode)
                if score > last.1 {
                    last.1 = score;
                }
            }

            scorer.next();
        }

        if parent_matches.is_empty() {
            return Ok(None);
        }

        // Deduplicate parent matches (inner query might match multiple nested docs per parent)
        parent_matches.dedup_by_key(|m| m.0);

        Ok(Some(Box::new(NestedScorerSupplier {
            matches: parent_matches,
        })))
    }
}

/// Find the parent doc_id for a nested doc_id by scanning the parent bitset.
fn find_parent(parent_bitset: &[bool], nested_doc: u32) -> u32 {
    let mut i = nested_doc as usize;
    // Nested docs are always after their parent, so scan backward
    while i > 0 {
        if i < parent_bitset.len() && parent_bitset[i] {
            return i as u32;
        }
        i -= 1;
    }
    // If we reach 0 and it's a parent, return 0
    if !parent_bitset.is_empty() && parent_bitset[0] {
        return 0;
    }
    0
}

struct NestedScorerSupplier {
    matches: Vec<(u32, f32)>,
}

impl ScorerSupplier for NestedScorerSupplier {
    fn cost(&self) -> u64 {
        self.matches.len() as u64
    }

    fn scorer(self: Box<Self>) -> Result<Box<dyn Scorer>> {
        Ok(Box::new(NestedScorer {
            matches: self.matches,
            pos: 0,
        }))
    }
}

struct NestedScorer {
    matches: Vec<(u32, f32)>,
    pos: usize,
}

impl Scorer for NestedScorer {
    fn doc_id(&self) -> DocId {
        if self.pos < self.matches.len() {
            DocId::new(self.matches[self.pos].0)
        } else {
            NO_MORE_DOCS
        }
    }
    fn next(&mut self) -> DocId {
        if self.pos < self.matches.len() {
            self.pos += 1;
        }
        self.doc_id()
    }
    fn advance(&mut self, target: DocId) -> DocId {
        while self.pos < self.matches.len() && self.matches[self.pos].0 < target.as_u32() {
            self.pos += 1;
        }
        self.doc_id()
    }
    fn score(&mut self) -> f32 {
        if self.pos < self.matches.len() {
            self.matches[self.pos].1
        } else {
            0.0
        }
    }
    fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
        None
    }
}