izihawa_tantivy/query/term_query/
term_query.rs

1use std::fmt;
2
3use async_trait::async_trait;
4
5use super::term_weight::TermWeight;
6use crate::query::bm25::Bm25Weight;
7use crate::query::{EnableScoring, Explanation, Query, Weight};
8use crate::schema::IndexRecordOption;
9use crate::Term;
10
11/// A Term query matches all of the documents
12/// containing a specific term.
13///
14/// The score associated is defined as
15/// `idf` *  sqrt(`term_freq` / `field norm`)
16/// in which :
17/// * `idf`        - inverse document frequency.
18/// * `term_freq`  - number of occurrences of the term in the field
19/// * `field norm` - number of tokens in the field.
20///
21/// ```rust
22/// use tantivy::collector::{Count, TopDocs};
23/// use tantivy::query::TermQuery;
24/// use tantivy::schema::{Schema, TEXT, IndexRecordOption};
25/// use tantivy::{doc, Index, IndexWriter, Term};
26/// # fn test() -> tantivy::Result<()> {
27/// let mut schema_builder = Schema::builder();
28/// let title = schema_builder.add_text_field("title", TEXT);
29/// let schema = schema_builder.build();
30/// let index = Index::create_in_ram(schema);
31/// {
32///     let mut index_writer: IndexWriter = index.writer(15_000_000)?;
33///     index_writer.add_document(doc!(
34///         title => "The Name of the Wind",
35///     ))?;
36///     index_writer.add_document(doc!(
37///         title => "The Diary of Muadib",
38///     ))?;
39///     index_writer.add_document(doc!(
40///         title => "A Dairy Cow",
41///     ))?;
42///     index_writer.add_document(doc!(
43///         title => "The Diary of a Young Girl",
44///     ))?;
45///     index_writer.commit()?;
46/// }
47/// let reader = index.reader()?;
48/// let searcher = reader.searcher();
49/// let query = TermQuery::new(
50///     Term::from_field_text(title, "diary"),
51///     IndexRecordOption::Basic,
52/// );
53/// let (top_docs, count) = searcher.search(&query, &(TopDocs::with_limit(2), Count))?;
54/// assert_eq!(count, 2);
55/// Ok(())
56/// # }
57/// # assert!(test().is_ok());
58/// ```
59#[derive(Clone)]
60pub struct TermQuery {
61    term: Term,
62    index_record_option: IndexRecordOption,
63}
64
65impl fmt::Debug for TermQuery {
66    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
67        write!(f, "TermQuery({:?})", self.term)
68    }
69}
70
71impl TermQuery {
72    /// Creates a new term query.
73    pub fn new(term: Term, segment_postings_options: IndexRecordOption) -> TermQuery {
74        TermQuery {
75            term,
76            index_record_option: segment_postings_options,
77        }
78    }
79
80    /// The `Term` this query is built out of.
81    pub fn term(&self) -> &Term {
82        &self.term
83    }
84
85    fn check_field(&self, enable_scoring: EnableScoring<'_>) -> Result<(), crate::TantivyError> {
86        let schema = enable_scoring.schema();
87        let field_entry = schema.get_field_entry(self.term.field());
88        if !field_entry.is_indexed() {
89            let error_msg = format!("Field {:?} is not indexed.", field_entry.name());
90            return Err(crate::TantivyError::SchemaError(error_msg));
91        }
92        Ok(())
93    }
94
95    fn create_term_weight(
96        &self,
97        enable_scoring: EnableScoring<'_>,
98        bm25_weight: Bm25Weight,
99    ) -> TermWeight {
100        let scoring_enabled = enable_scoring.is_scoring_enabled();
101        let index_record_option = if scoring_enabled {
102            self.index_record_option
103        } else {
104            IndexRecordOption::Basic
105        };
106        TermWeight::new(
107            self.term.clone(),
108            index_record_option,
109            bm25_weight,
110            enable_scoring.is_fieldnorms_enabled(),
111        )
112    }
113
114    /// Returns a weight object.
115    ///
116    /// While `.weight(...)` returns a boxed trait object,
117    /// this method return a specific implementation.
118    /// This is useful for optimization purpose.
119    pub fn specialized_weight(
120        &self,
121        enable_scoring: EnableScoring<'_>,
122    ) -> crate::Result<TermWeight> {
123        self.check_field(enable_scoring)?;
124        let bm25_weight = match enable_scoring {
125            EnableScoring::Enabled {
126                statistics_provider,
127                ..
128            } => Bm25Weight::for_terms(statistics_provider, &[self.term.clone()])?,
129            EnableScoring::Disabled { .. } => {
130                Bm25Weight::new(Explanation::new("<no score>", 1.0f32), 1.0f32)
131            }
132        };
133        Ok(self.create_term_weight(enable_scoring, bm25_weight))
134    }
135
136    /// Returns a weight object asynchronously.
137    ///
138    /// See [`Self::specialized_weight()`]
139    #[cfg(feature = "quickwit")]
140    pub async fn specialized_weight_async(
141        &self,
142        enable_scoring: EnableScoring<'_>,
143    ) -> crate::Result<TermWeight> {
144        self.check_field(enable_scoring)?;
145        let bm25_weight = match enable_scoring {
146            EnableScoring::Enabled {
147                statistics_provider,
148                ..
149            } => Bm25Weight::for_terms_async(statistics_provider, &[self.term.clone()]).await?,
150            EnableScoring::Disabled { .. } => {
151                Bm25Weight::new(Explanation::new("<no score>", 1.0f32), 1.0f32)
152            }
153        };
154        Ok(self.create_term_weight(enable_scoring, bm25_weight))
155    }
156}
157
158#[async_trait]
159impl Query for TermQuery {
160    fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
161        Ok(Box::new(self.specialized_weight(enable_scoring)?))
162    }
163
164    #[cfg(feature = "quickwit")]
165    async fn weight_async(
166        &self,
167        enable_scoring: EnableScoring<'_>,
168    ) -> crate::Result<Box<dyn Weight>> {
169        Ok(Box::new(
170            self.specialized_weight_async(enable_scoring).await?,
171        ))
172    }
173
174    fn query_terms<'a>(&'a self, visitor: &mut dyn FnMut(&'a Term, bool)) {
175        visitor(&self.term, false);
176    }
177}
178
179#[cfg(test)]
180mod tests {
181    use std::net::{IpAddr, Ipv6Addr};
182    use std::str::FromStr;
183
184    use columnar::MonotonicallyMappableToU128;
185
186    use crate::collector::{Count, TopDocs};
187    use crate::query::{Query, QueryParser, TermQuery};
188    use crate::schema::{IndexRecordOption, IntoIpv6Addr, Schema, INDEXED, STORED};
189    use crate::{Index, IndexWriter, Term};
190
191    #[test]
192    fn search_ip_test() {
193        let mut schema_builder = Schema::builder();
194        let ip_field = schema_builder.add_ip_addr_field("ip", INDEXED | STORED);
195        let schema = schema_builder.build();
196        let index = Index::create_in_ram(schema);
197        let ip_addr_1 = IpAddr::from_str("127.0.0.1").unwrap().into_ipv6_addr();
198        let ip_addr_2 = Ipv6Addr::from_u128(10);
199
200        {
201            let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
202            index_writer
203                .add_document(doc!(
204                    ip_field => ip_addr_1
205                ))
206                .unwrap();
207            index_writer
208                .add_document(doc!(
209                    ip_field => ip_addr_2
210                ))
211                .unwrap();
212
213            index_writer.commit().unwrap();
214        }
215        let reader = index.reader().unwrap();
216        let searcher = reader.searcher();
217
218        let assert_single_hit = |query| {
219            let (_top_docs, count) = searcher
220                .search(&query, &(TopDocs::with_limit(2), Count))
221                .unwrap();
222            assert_eq!(count, 1);
223        };
224        let query_from_text = |text: String| {
225            QueryParser::for_index(&index, vec![ip_field])
226                .parse_query(&text)
227                .unwrap()
228        };
229
230        let query_from_ip = |ip_addr| -> Box<dyn Query> {
231            Box::new(TermQuery::new(
232                Term::from_field_ip_addr(ip_field, ip_addr),
233                IndexRecordOption::Basic,
234            ))
235        };
236
237        assert_single_hit(query_from_ip(ip_addr_1));
238        assert_single_hit(query_from_ip(ip_addr_2));
239        assert_single_hit(query_from_text("127.0.0.1".to_string()));
240        assert_single_hit(query_from_text("\"127.0.0.1\"".to_string()));
241        assert_single_hit(query_from_text(format!("\"{ip_addr_1}\"")));
242        assert_single_hit(query_from_text(format!("\"{ip_addr_2}\"")));
243    }
244}