Skip to main content

luci/query/
nested.rs

1//! Nested query: search within nested object arrays with cross-object isolation.
2//!
3//! Runs an inner query against nested (hidden) documents, then joins matches
4//! back to parent documents. This prevents the cross-object matching problem.
5//!
6//! See [[nested-documents]] and [[architecture-overview|milestone-7]].
7
8use crate::core::{DocId, NO_MORE_DOCS, Result, ScoreMode, Scorer, TwoPhaseIterator};
9
10use crate::query::{BoundQuery, Query, ScorerSupplier};
11use crate::search::searcher::Searcher;
12use crate::segment::reader::SegmentReader;
13
14/// Query within nested documents at a given path.
15pub struct NestedQuery {
16    pub path: String,
17    pub(crate) inner: Box<dyn Query>,
18    pub inner_hits: Option<crate::query::ast::InnerHitsConfig>,
19}
20
21impl Query for NestedQuery {
22    fn bind(&self, searcher: &Searcher, score_mode: ScoreMode) -> Result<Box<dyn BoundQuery>> {
23        let inner_weight = self.inner.bind(searcher, score_mode)?;
24        // If inner_hits requested, create a second weight for resolution
25        let inner_hits_weight = if self.inner_hits.is_some() {
26            Some(self.inner.bind(searcher, score_mode)?)
27        } else {
28            None
29        };
30        Ok(Box::new(BoundNestedQuery {
31            _path: self.path.clone(),
32            inner: inner_weight,
33            _inner_hits_config: self.inner_hits.clone(),
34            _inner_hits_weight: inner_hits_weight,
35        }))
36    }
37}
38
39/// Metadata for resolving inner hits after search completes.
40pub struct InnerHitSpec {
41    pub name: String,
42    pub path: String,
43    pub config: crate::query::ast::InnerHitsConfig,
44    pub(crate) weight: Box<dyn BoundQuery>,
45}
46
47struct BoundNestedQuery {
48    _path: String,
49    inner: Box<dyn BoundQuery>,
50    _inner_hits_config: Option<crate::query::ast::InnerHitsConfig>,
51    _inner_hits_weight: Option<Box<dyn BoundQuery>>,
52}
53
54impl BoundQuery for BoundNestedQuery {
55    fn scorer_supplier(&self, reader: &SegmentReader) -> Result<Option<Box<dyn ScorerSupplier>>> {
56        // Get the parent bitset from the segment
57        let parent_bitset = match reader.parent_bitset() {
58            Some(b) => b,
59            None => return Ok(None), // no nested docs in this segment
60        };
61
62        // Get the inner scorer
63        let inner_supplier = match self.inner.scorer_supplier(reader)? {
64            Some(s) => s,
65            None => return Ok(None),
66        };
67
68        let inner_scorer = inner_supplier.scorer()?;
69
70        // Pre-materialize: iterate inner scorer, map each nested doc to its parent
71        let mut parent_matches: Vec<(u32, f32)> = Vec::new();
72        let mut scorer = inner_scorer;
73        let mut last_parent = u32::MAX;
74
75        while scorer.doc_id() != NO_MORE_DOCS {
76            let nested_doc = scorer.doc_id().as_u32();
77            let score = scorer.score();
78
79            // Find parent: scan backward in parent_bitset
80            let parent_doc = find_parent(&parent_bitset, nested_doc);
81
82            if parent_doc != last_parent {
83                parent_matches.push((parent_doc, score));
84                last_parent = parent_doc;
85            } else if let Some(last) = parent_matches.last_mut() {
86                // Same parent — take max score (simplified score_mode)
87                if score > last.1 {
88                    last.1 = score;
89                }
90            }
91
92            scorer.next();
93        }
94
95        if parent_matches.is_empty() {
96            return Ok(None);
97        }
98
99        // Deduplicate parent matches (inner query might match multiple nested docs per parent)
100        parent_matches.dedup_by_key(|m| m.0);
101
102        Ok(Some(Box::new(NestedScorerSupplier {
103            matches: parent_matches,
104        })))
105    }
106}
107
108/// Find the parent doc_id for a nested doc_id by scanning the parent bitset.
109fn find_parent(parent_bitset: &[bool], nested_doc: u32) -> u32 {
110    let mut i = nested_doc as usize;
111    // Nested docs are always after their parent, so scan backward
112    while i > 0 {
113        if i < parent_bitset.len() && parent_bitset[i] {
114            return i as u32;
115        }
116        i -= 1;
117    }
118    // If we reach 0 and it's a parent, return 0
119    if !parent_bitset.is_empty() && parent_bitset[0] {
120        return 0;
121    }
122    0
123}
124
125struct NestedScorerSupplier {
126    matches: Vec<(u32, f32)>,
127}
128
129impl ScorerSupplier for NestedScorerSupplier {
130    fn cost(&self) -> u64 {
131        self.matches.len() as u64
132    }
133
134    fn scorer(self: Box<Self>) -> Result<Box<dyn Scorer>> {
135        Ok(Box::new(NestedScorer {
136            matches: self.matches,
137            pos: 0,
138        }))
139    }
140}
141
142struct NestedScorer {
143    matches: Vec<(u32, f32)>,
144    pos: usize,
145}
146
147impl Scorer for NestedScorer {
148    fn doc_id(&self) -> DocId {
149        if self.pos < self.matches.len() {
150            DocId::new(self.matches[self.pos].0)
151        } else {
152            NO_MORE_DOCS
153        }
154    }
155    fn next(&mut self) -> DocId {
156        if self.pos < self.matches.len() {
157            self.pos += 1;
158        }
159        self.doc_id()
160    }
161    fn advance(&mut self, target: DocId) -> DocId {
162        while self.pos < self.matches.len() && self.matches[self.pos].0 < target.as_u32() {
163            self.pos += 1;
164        }
165        self.doc_id()
166    }
167    fn score(&mut self) -> f32 {
168        if self.pos < self.matches.len() {
169            self.matches[self.pos].1
170        } else {
171            0.0
172        }
173    }
174    fn two_phase(&mut self) -> Option<&mut dyn TwoPhaseIterator> {
175        None
176    }
177}