Skip to main content

luci/query/
match_query.rs

1//! MatchQuery: analyze query text, build bool of term queries.
2//!
3//! Analyzes the query string through the field's analyzer, then:
4//! - 1 token → TermQuery
5//! - N tokens → BoolQuery { should: [TermQuery * N] }
6//!
7//! See [[query-dsl#Full-Text Queries]] and [[architecture-query-execution#Step 8]].
8
9use crate::core::{Result, ScoreMode};
10
11use crate::query::boolean::BoolQuery;
12use crate::query::prefix::PrefixQuery;
13use crate::query::term::TermQuery;
14use crate::query::{BoundQuery, Query};
15use crate::search::searcher::Searcher;
16
17/// Full-text match query: analyze and search.
18pub struct MatchQuery {
19    pub field: String,
20    pub query_text: String,
21    pub analyzer: Option<String>,
22}
23
24impl Query for MatchQuery {
25    fn bind(&self, searcher: &Searcher, score_mode: ScoreMode) -> Result<Box<dyn BoundQuery>> {
26        // Resolve search-time analyzer via field mapping resolution chain.
27        // See [[feature-analysis-pipeline#Analyzer Resolution Order]].
28        let analyzer_name = searcher.resolve_search_analyzer(&self.field, self.analyzer.as_deref());
29        let analyzers = searcher.analyzers();
30        let analyzer = analyzers.get(analyzer_name);
31
32        // Analyze query text
33        let tokens = analyzer.analyze(&self.query_text);
34
35        if tokens.is_empty() {
36            // No terms after analysis → match nothing
37            return MatchNoneQuery.bind(searcher, score_mode);
38        }
39
40        if tokens.len() == 1 {
41            // Single term → rewrite to TermQuery
42            let term_query = TermQuery {
43                field: self.field.clone(),
44                value: tokens[0].text.clone(),
45            };
46            return term_query.bind(searcher, score_mode);
47        }
48
49        // Multiple terms → bool { should: [TermQuery * N] }
50        let should: Vec<Box<dyn Query>> = tokens
51            .iter()
52            .map(|t| -> Box<dyn Query> {
53                Box::new(TermQuery {
54                    field: self.field.clone(),
55                    value: t.text.clone(),
56                })
57            })
58            .collect();
59
60        let bool_query = BoolQuery {
61            must: vec![],
62            should,
63            must_not: vec![],
64            filter: vec![],
65            minimum_should_match: None,
66        };
67        bool_query.bind(searcher, score_mode)
68    }
69}
70
71/// match_bool_prefix: analyze text, all tokens except last become term
72/// queries, last becomes prefix query, wrapped in bool/should.
73///
74/// Defers analysis to `bind()` so the searcher's analyzer registry
75/// (including custom and field-configured analyzers) is used.
76/// See [[investigation-20260405-06-match-bool-prefix-analyzer]].
77pub struct MatchBoolPrefixQuery {
78    pub field: String,
79    pub query_text: String,
80    pub analyzer: Option<String>,
81}
82
83impl Query for MatchBoolPrefixQuery {
84    fn bind(&self, searcher: &Searcher, score_mode: ScoreMode) -> Result<Box<dyn BoundQuery>> {
85        let analyzer_name = searcher.resolve_search_analyzer(&self.field, self.analyzer.as_deref());
86        let analyzers = searcher.analyzers();
87        let analyzer = analyzers.get(analyzer_name);
88        let tokens = analyzer.analyze(&self.query_text);
89
90        if tokens.is_empty() {
91            return MatchNoneQuery.bind(searcher, score_mode);
92        }
93
94        let last_idx = tokens.len() - 1;
95        let mut should: Vec<Box<dyn Query>> = Vec::with_capacity(tokens.len());
96        for (i, token) in tokens.iter().enumerate() {
97            if i == last_idx {
98                should.push(Box::new(PrefixQuery {
99                    field: self.field.clone(),
100                    value: token.text.clone(),
101                }));
102            } else {
103                should.push(Box::new(TermQuery {
104                    field: self.field.clone(),
105                    value: token.text.clone(),
106                }));
107            }
108        }
109
110        BoolQuery {
111            must: vec![],
112            should,
113            must_not: vec![],
114            filter: vec![],
115            minimum_should_match: None,
116        }
117        .bind(searcher, score_mode)
118    }
119}
120
121/// Match-nothing query.
122struct MatchNoneQuery;
123
124impl Query for MatchNoneQuery {
125    fn bind(&self, _searcher: &Searcher, _score_mode: ScoreMode) -> Result<Box<dyn BoundQuery>> {
126        Ok(Box::new(BoundMatchNoneQuery))
127    }
128}
129
130struct BoundMatchNoneQuery;
131
132impl BoundQuery for BoundMatchNoneQuery {
133    fn scorer_supplier(
134        &self,
135        _reader: &crate::segment::reader::SegmentReader,
136    ) -> Result<Option<Box<dyn crate::query::ScorerSupplier>>> {
137        Ok(None)
138    }
139}
140
141#[cfg(test)]
142mod tests {
143    use super::*;
144    use crate::analysis::AnalyzerRegistry;
145    use crate::analysis::Token;
146    use crate::core::{FieldId, NO_MORE_DOCS, SegmentId};
147    use crate::mapping::{FieldType, Mapping};
148    use crate::segment::builder::SegmentBuilder;
149
150    fn make_tokens(terms: &[&str]) -> Vec<Token> {
151        terms
152            .iter()
153            .enumerate()
154            .map(|(i, t)| Token::new(*t, 0, t.len(), i as u32))
155            .collect()
156    }
157
158    fn build_test_store() -> crate::search::segment_store::SegmentStore {
159        let schema = Mapping::builder().field("body", FieldType::Text).build();
160        let mut builder = SegmentBuilder::new(SegmentId::new(1), &schema);
161
162        builder.add_document(
163            &[(
164                FieldId::new(0),
165                make_tokens(&["the", "quick", "brown", "fox"]),
166            )],
167            b"{}",
168        );
169        builder.add_document(
170            &[(FieldId::new(0), make_tokens(&["the", "lazy", "dog"]))],
171            b"{}",
172        );
173        builder.add_document(
174            &[(FieldId::new(0), make_tokens(&["quick", "search", "engine"]))],
175            b"{}",
176        );
177
178        let reader = crate::segment::reader::SegmentReader::open(builder.build()).unwrap();
179        crate::search::segment_store::SegmentStore::new(
180            vec![reader],
181            AnalyzerRegistry::new(),
182            None,
183            None,
184        )
185    }
186
187    fn collect_doc_ids(scorer: &mut dyn crate::core::Scorer) -> Vec<u32> {
188        let mut ids = Vec::new();
189        while scorer.doc_id() != NO_MORE_DOCS {
190            ids.push(scorer.doc_id().as_u32());
191            scorer.next();
192        }
193        ids
194    }
195
196    #[test]
197    fn match_single_term() {
198        let store = build_test_store();
199        let searcher = Searcher::new(&store);
200        let query = MatchQuery {
201            field: "body".into(),
202            query_text: "Fox".into(), // standard analyzer lowercases
203            analyzer: None,
204        };
205
206        let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
207        let supplier = weight
208            .scorer_supplier(&searcher.segments()[0])
209            .unwrap()
210            .unwrap();
211        let mut scorer = supplier.scorer().unwrap();
212
213        let ids = collect_doc_ids(scorer.as_mut());
214        assert_eq!(ids, vec![0]); // "fox" only in doc 0
215    }
216
217    #[test]
218    fn match_multi_term() {
219        let store = build_test_store();
220        let searcher = Searcher::new(&store);
221        let query = MatchQuery {
222            field: "body".into(),
223            query_text: "quick lazy".into(),
224            analyzer: None,
225        };
226
227        let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
228        let supplier = weight
229            .scorer_supplier(&searcher.segments()[0])
230            .unwrap()
231            .unwrap();
232        let mut scorer = supplier.scorer().unwrap();
233
234        // "quick" in docs 0,2 and "lazy" in doc 1 → should match all three
235        let ids = collect_doc_ids(scorer.as_mut());
236        assert_eq!(ids, vec![0, 1, 2]);
237    }
238
239    #[test]
240    fn match_empty_query() {
241        let store = build_test_store();
242        let searcher = Searcher::new(&store);
243        let query = MatchQuery {
244            field: "body".into(),
245            query_text: "".into(),
246            analyzer: None,
247        };
248
249        let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
250        let supplier = weight.scorer_supplier(&searcher.segments()[0]).unwrap();
251        assert!(supplier.is_none()); // no terms → no matches
252    }
253
254    #[test]
255    fn match_case_normalization() {
256        let store = build_test_store();
257        let searcher = Searcher::new(&store);
258        let query = MatchQuery {
259            field: "body".into(),
260            query_text: "THE".into(),
261            analyzer: None,
262        };
263
264        let weight = query.bind(&searcher, ScoreMode::Complete).unwrap();
265        let supplier = weight
266            .scorer_supplier(&searcher.segments()[0])
267            .unwrap()
268            .unwrap();
269        let mut scorer = supplier.scorer().unwrap();
270
271        // "the" in docs 0, 1
272        let ids = collect_doc_ids(scorer.as_mut());
273        assert_eq!(ids, vec![0, 1]);
274    }
275}