summavy/query/boolean_query/
boolean_query.rs

1use async_trait::async_trait;
2
3use super::boolean_weight::BooleanWeight;
4use crate::query::{EnableScoring, Occur, Query, SumWithCoordsCombiner, TermQuery, Weight};
5use crate::schema::{IndexRecordOption, Term};
6
7/// The boolean query returns a set of documents
8/// that matches the Boolean combination of constituent subqueries.
9///
10/// The documents matched by the boolean query are
11/// those which
12/// * match all of the sub queries associated with the
13/// `Must` occurrence
14/// * match none of the sub queries associated with the
15/// `MustNot` occurrence.
16/// * match at least one of the subqueries that is not
17/// a `MustNot` occurrence.
18///
19///
20/// You can combine other query types and their `Occur`ances into one `BooleanQuery`
21///
22/// ```rust
23/// use tantivy::collector::Count;
24/// use tantivy::doc;
25/// use tantivy::query::{BooleanQuery, Occur, PhraseQuery, Query, TermQuery};
26/// use tantivy::schema::{IndexRecordOption, Schema, TEXT};
27/// use tantivy::Term;
28/// use tantivy::Index;
29///
30/// fn main() -> tantivy::Result<()> {
31///    let mut schema_builder = Schema::builder();
32///    let title = schema_builder.add_text_field("title", TEXT);
33///    let body = schema_builder.add_text_field("body", TEXT);
34///    let schema = schema_builder.build();
35///    let index = Index::create_in_ram(schema);
36///    {
37///        let mut index_writer = index.writer(3_000_000)?;
38///        index_writer.add_document(doc!(
39///            title => "The Name of the Wind",
40///        ))?;
41///        index_writer.add_document(doc!(
42///            title => "The Diary of Muadib",
43///        ))?;
44///        index_writer.add_document(doc!(
45///            title => "A Dairy Cow",
46///            body => "hidden",
47///        ))?;
48///        index_writer.add_document(doc!(
49///            title => "A Dairy Cow",
50///            body => "found",
51///        ))?;
52///        index_writer.add_document(doc!(
53///            title => "The Diary of a Young Girl",
54///        ))?;
55///        index_writer.commit()?;
56///    }
57///
58///    let reader = index.reader()?;
59///    let searcher = reader.searcher();
60///
61///    // Make TermQuery's for "girl" and "diary" in the title
62///    let girl_term_query: Box<dyn Query> = Box::new(TermQuery::new(
63///        Term::from_field_text(title, "girl"),
64///        IndexRecordOption::Basic,
65///    ));
66///    let diary_term_query: Box<dyn Query> = Box::new(TermQuery::new(
67///        Term::from_field_text(title, "diary"),
68///        IndexRecordOption::Basic,
69///    ));
70///    // A TermQuery with "found" in the body
71///    let body_term_query: Box<dyn Query> = Box::new(TermQuery::new(
72///        Term::from_field_text(body, "found"),
73///        IndexRecordOption::Basic,
74///    ));
75///    // TermQuery "diary" must and "girl" must not be present
76///    let queries_with_occurs1 = vec![
77///        (Occur::Must, diary_term_query.box_clone()),
78///        (Occur::MustNot, girl_term_query),
79///    ];
80///    // Make a BooleanQuery equivalent to
81///    // title:+diary title:-girl
82///    let diary_must_and_girl_mustnot = BooleanQuery::new(queries_with_occurs1);
83///    let count1 = searcher.search(&diary_must_and_girl_mustnot, &Count)?;
84///    assert_eq!(count1, 1);
85///
86///    // TermQuery for "cow" in the title
87///    let cow_term_query: Box<dyn Query> = Box::new(TermQuery::new(
88///        Term::from_field_text(title, "cow"),
89///        IndexRecordOption::Basic,
90///    ));
91///    // "title:diary OR title:cow"
92///    let title_diary_or_cow = BooleanQuery::new(vec![
93///        (Occur::Should, diary_term_query.box_clone()),
94///        (Occur::Should, cow_term_query),
95///    ]);
96///    let count2 = searcher.search(&title_diary_or_cow, &Count)?;
97///    assert_eq!(count2, 4);
98///
99///    // Make a `PhraseQuery` from a vector of `Term`s
100///    let phrase_query: Box<dyn Query> = Box::new(PhraseQuery::new(vec![
101///        Term::from_field_text(title, "dairy"),
102///        Term::from_field_text(title, "cow"),
103///    ]));
104///    // You can combine subqueries of different types into 1 BooleanQuery:
105///    // `TermQuery` and `PhraseQuery`
106///    // "title:diary OR "dairy cow"
107///    let term_of_phrase_query = BooleanQuery::new(vec![
108///        (Occur::Should, diary_term_query.box_clone()),
109///        (Occur::Should, phrase_query.box_clone()),
110///    ]);
111///    let count3 = searcher.search(&term_of_phrase_query, &Count)?;
112///    assert_eq!(count3, 4);
113///
114///    // You can nest one BooleanQuery inside another
115///    // body:found AND ("title:diary OR "dairy cow")
116///    let nested_query = BooleanQuery::new(vec![
117///        (Occur::Must, body_term_query),
118///        (Occur::Must, Box::new(term_of_phrase_query))
119///    ]);
120///    let count4 = searcher.search(&nested_query, &Count)?;
121///    assert_eq!(count4, 1);
122///    Ok(())
123/// }
124/// ```
125#[derive(Debug)]
126pub struct BooleanQuery {
127    subqueries: Vec<(Occur, Box<dyn Query>)>,
128}
129
130impl Clone for BooleanQuery {
131    fn clone(&self) -> Self {
132        self.subqueries
133            .iter()
134            .map(|(occur, subquery)| (*occur, subquery.box_clone()))
135            .collect::<Vec<_>>()
136            .into()
137    }
138}
139
140impl From<Vec<(Occur, Box<dyn Query>)>> for BooleanQuery {
141    fn from(subqueries: Vec<(Occur, Box<dyn Query>)>) -> BooleanQuery {
142        BooleanQuery::new(subqueries)
143    }
144}
145
146#[async_trait]
147impl Query for BooleanQuery {
148    fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
149        let sub_weights = match enable_scoring {
150            EnableScoring::Enabled(searcher) => searcher.index().search_executor().map(
151                |&(ref occur, ref subquery)| Ok((*occur, subquery.weight(enable_scoring)?)),
152                self.subqueries.iter(),
153            )?,
154            EnableScoring::Disabled { .. } => self
155                .subqueries
156                .iter()
157                .map(|&(ref occur, ref subquery)| Ok((*occur, subquery.weight(enable_scoring)?)))
158                .collect::<crate::Result<_>>()?,
159        };
160        Ok(Box::new(BooleanWeight::new(
161            sub_weights,
162            enable_scoring.is_scoring_enabled(),
163            Box::new(SumWithCoordsCombiner::default),
164        )))
165    }
166
167    #[cfg(feature = "quickwit")]
168    async fn weight_async(
169        &self,
170        enable_scoring: EnableScoring<'_>,
171    ) -> crate::Result<Box<dyn Weight>> {
172        let sub_weights = futures::future::join_all(self.subqueries.iter().map(
173            |&(ref occur, ref subquery)| async move {
174                Ok((*occur, subquery.weight_async(enable_scoring).await?))
175            },
176        ))
177        .await
178        .into_iter()
179        .collect::<crate::Result<Vec<_>>>()?;
180        Ok(Box::new(BooleanWeight::new(
181            sub_weights,
182            enable_scoring.is_scoring_enabled(),
183            Box::new(SumWithCoordsCombiner::default),
184        )))
185    }
186
187    fn query_terms<'a>(&'a self, visitor: &mut dyn FnMut(&'a Term, bool)) {
188        for (_occur, subquery) in &self.subqueries {
189            subquery.query_terms(visitor);
190        }
191    }
192}
193
194impl BooleanQuery {
195    /// Creates a new boolean query.
196    pub fn new(subqueries: Vec<(Occur, Box<dyn Query>)>) -> BooleanQuery {
197        BooleanQuery { subqueries }
198    }
199
200    /// Returns the intersection of the queries.
201    pub fn intersection(queries: Vec<Box<dyn Query>>) -> BooleanQuery {
202        let subqueries = queries.into_iter().map(|s| (Occur::Must, s)).collect();
203        BooleanQuery::new(subqueries)
204    }
205
206    /// Returns the union of the queries.
207    pub fn union(queries: Vec<Box<dyn Query>>) -> BooleanQuery {
208        let subqueries = queries.into_iter().map(|s| (Occur::Should, s)).collect();
209        BooleanQuery::new(subqueries)
210    }
211
212    /// Helper method to create a boolean query matching a given list of terms.
213    /// The resulting query is a disjunction of the terms.
214    pub fn new_multiterms_query(terms: Vec<Term>) -> BooleanQuery {
215        let occur_term_queries: Vec<(Occur, Box<dyn Query>)> = terms
216            .into_iter()
217            .map(|term| {
218                let term_query: Box<dyn Query> =
219                    Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs));
220                (Occur::Should, term_query)
221            })
222            .collect();
223        BooleanQuery::new(occur_term_queries)
224    }
225
226    /// Deconstructed view of the clauses making up this query.
227    pub fn clauses(&self) -> &[(Occur, Box<dyn Query>)] {
228        &self.subqueries[..]
229    }
230}
231
232#[cfg(test)]
233mod tests {
234    use super::BooleanQuery;
235    use crate::collector::{Count, DocSetCollector};
236    use crate::query::{QueryClone, QueryParser, TermQuery};
237    use crate::schema::{IndexRecordOption, Schema, TEXT};
238    use crate::{DocAddress, Index, Term};
239
240    fn create_test_index() -> crate::Result<Index> {
241        let mut schema_builder = Schema::builder();
242        let text = schema_builder.add_text_field("text", TEXT);
243        let schema = schema_builder.build();
244        let index = Index::create_in_ram(schema);
245        let mut writer = index.writer_for_tests()?;
246        writer.add_document(doc!(text=>"b c"))?;
247        writer.add_document(doc!(text=>"a c"))?;
248        writer.add_document(doc!(text=>"a b"))?;
249        writer.add_document(doc!(text=>"a d"))?;
250        writer.commit()?;
251        Ok(index)
252    }
253
254    #[test]
255    fn test_union() -> crate::Result<()> {
256        let index = create_test_index()?;
257        let searcher = index.reader()?.searcher();
258        let text = index.schema().get_field("text").unwrap();
259        let term_a = TermQuery::new(Term::from_field_text(text, "a"), IndexRecordOption::Basic);
260        let term_d = TermQuery::new(Term::from_field_text(text, "d"), IndexRecordOption::Basic);
261        let union_ad = BooleanQuery::union(vec![term_a.box_clone(), term_d.box_clone()]);
262        let docs = searcher.search(&union_ad, &DocSetCollector)?;
263        assert_eq!(
264            docs,
265            vec![
266                DocAddress::new(0u32, 1u32),
267                DocAddress::new(0u32, 2u32),
268                DocAddress::new(0u32, 3u32)
269            ]
270            .into_iter()
271            .collect()
272        );
273        Ok(())
274    }
275
276    #[test]
277    fn test_intersection() -> crate::Result<()> {
278        let index = create_test_index()?;
279        let searcher = index.reader()?.searcher();
280        let text = index.schema().get_field("text").unwrap();
281        let term_a = TermQuery::new(Term::from_field_text(text, "a"), IndexRecordOption::Basic);
282        let term_b = TermQuery::new(Term::from_field_text(text, "b"), IndexRecordOption::Basic);
283        let term_c = TermQuery::new(Term::from_field_text(text, "c"), IndexRecordOption::Basic);
284        let intersection_ab =
285            BooleanQuery::intersection(vec![term_a.box_clone(), term_b.box_clone()]);
286        let intersection_ac =
287            BooleanQuery::intersection(vec![term_a.box_clone(), term_c.box_clone()]);
288        let intersection_bc =
289            BooleanQuery::intersection(vec![term_b.box_clone(), term_c.box_clone()]);
290        {
291            let docs = searcher.search(&intersection_ab, &DocSetCollector)?;
292            assert_eq!(
293                docs,
294                vec![DocAddress::new(0u32, 2u32)].into_iter().collect()
295            );
296        }
297        {
298            let docs = searcher.search(&intersection_ac, &DocSetCollector)?;
299            assert_eq!(
300                docs,
301                vec![DocAddress::new(0u32, 1u32)].into_iter().collect()
302            );
303        }
304        {
305            let docs = searcher.search(&intersection_bc, &DocSetCollector)?;
306            assert_eq!(
307                docs,
308                vec![DocAddress::new(0u32, 0u32)].into_iter().collect()
309            );
310        }
311        Ok(())
312    }
313
314    #[test]
315    pub fn test_json_array_pitfall_bag_of_terms() -> crate::Result<()> {
316        let mut schema_builder = Schema::builder();
317        let json_field = schema_builder.add_json_field("json", TEXT);
318        let schema = schema_builder.build();
319        let index = Index::create_in_ram(schema);
320        {
321            let mut index_writer = index.writer_for_tests()?;
322            index_writer.add_document(doc!(json_field=>json!({
323                "cart": [
324                    {"product_type": "sneakers", "attributes": {"color": "white"}},
325                    {"product_type": "t-shirt", "attributes": {"color": "red"}},
326                    {"product_type": "cd", "attributes": {"genre": "blues"}},
327                ]
328            })))?;
329            index_writer.commit()?;
330        }
331        let searcher = index.reader()?.searcher();
332        let doc_matches = |query: &str| {
333            let query_parser = QueryParser::for_index(&index, vec![json_field]);
334            let query = query_parser.parse_query(query).unwrap();
335            searcher.search(&query, &Count).unwrap() == 1
336        };
337        // As expected
338        assert!(doc_matches(
339            r#"cart.product_type:sneakers AND cart.attributes.color:white"#
340        ));
341        // Unexpected match, due to the fact that array do not act as nested docs.
342        assert!(doc_matches(
343            r#"cart.product_type:sneakers AND cart.attributes.color:red"#
344        ));
345        // However, bviously this works...
346        assert!(!doc_matches(
347            r#"cart.product_type:sneakers AND cart.attributes.color:blues"#
348        ));
349        Ok(())
350    }
351}