summa_core/components/query_parser/
summa_ql.rs

1use std::collections::Bound;
2use std::ops::Bound::{Included, Unbounded};
3use std::ops::Deref;
4use std::str::FromStr;
5
6use base64::engine::general_purpose::STANDARD as BASE64;
7use base64::Engine;
8use pest::iterators::{Pair, Pairs};
9use pest::Parser;
10use pest_derive::Parser;
11use tantivy::query::{BooleanQuery, BoostQuery, DisjunctionMaxQuery, EmptyQuery, PhraseQuery, Query, QueryClone, RangeQuery, RegexQuery, TermQuery};
12use tantivy::schema::{Facet, FacetParseError, Field, FieldEntry, FieldType, IndexRecordOption, Schema, TextFieldIndexing, Type};
13use tantivy::tokenizer::{TextAnalyzer, TokenizerManager};
14use tantivy::{Index, Term};
15use tantivy_query_grammar::Occur;
16
17use crate::components::queries::ExistsQuery;
18use crate::components::query_parser::morphology::MorphologyManager;
19use crate::components::query_parser::proto_query_parser::QueryParserDefaultMode;
20use crate::components::query_parser::term_field_mappers::TermFieldMappersManager;
21use crate::components::query_parser::utils::cast_field_to_term;
22use crate::configs::core::QueryParserConfig;
23use crate::errors::SummaResult;
24use crate::utils::transpose;
25use crate::validators;
26
27#[derive(Parser)]
28#[grammar = "src/components/query_parser/summa_ql.pest"] // relative to src
29struct SummaQlParser;
30
31pub struct QueryParser {
32    schema: Schema,
33    tokenizer_manager: TokenizerManager,
34    morphology_manager: MorphologyManager,
35    term_field_mappers_manager: TermFieldMappersManager,
36    query_parser_config: QueryParserConfig,
37}
38
39/// Possible error that may happen when parsing a query.
40#[derive(thiserror::Error, Debug, PartialEq)]
41pub enum QueryParserError {
42    /// Error in the query syntax
43    #[error("syntax_error: {0}")]
44    Syntax(String),
45    /// This query is unsupported.
46    #[error("unsupported_query_error: {0}")]
47    UnsupportedQuery(String),
48    /// The query references a field that is not in the schema
49    #[error("field_doest_not_exist_error: '{0}'")]
50    FieldDoesNotExist(String),
51    /// The query contains a term for a `u64` or `i64`-field, but the value
52    /// is neither.
53    #[error("expected_int_error: '{0:?}'")]
54    ExpectedInt(#[from] std::num::ParseIntError),
55    /// The query contains a term for a bytes field, but the value is not valid
56    /// base64.
57    #[error("expected_base64_error: '{0:?}'")]
58    ExpectedBase64(#[from] base64::DecodeError),
59    /// The query contains a term for a `f64`-field, but the value
60    /// is not a f64.
61    #[error("expected_float_error: {0}")]
62    ExpectedFloat(#[from] std::num::ParseFloatError),
63    /// The query contains a term for a bool field, but the value
64    /// is not a bool.
65    #[error("exptected_bool: '{0:?}'")]
66    ExpectedBool(#[from] std::str::ParseBoolError),
67    /// It is forbidden queries that are only "excluding". (e.g. -title:pop)
68    #[error("all_but_query_forbidden_error")]
69    AllButQueryForbidden,
70    /// If no default field is declared, running a query without any
71    /// field specified is forbbidden.
72    #[error("No default field declared and no field specified in query")]
73    NoDefaultFieldDeclared,
74    /// The field searched for is not declared
75    /// as indexed in the schema.
76    #[error("field_not_indexed_error: {0}")]
77    FieldNotIndexed(String),
78    /// The field is declated as JSON but the query does not contain object path
79    #[error("json_field_without_path_error: {0}")]
80    JsonFieldWithoutPath(String),
81    /// The field is not declated as JSON but the query does contain object path
82    #[error("non_json_field_with_path_error: {0}")]
83    NonJsonFieldWithPath(String),
84    /// A phrase query was requested for a field that does not
85    /// have any positions indexed.
86    #[error("field_does_not_have_positions_indexed_error: {0}")]
87    FieldDoesNotHavePositionsIndexed(String),
88    /// The tokenizer for the given field is unknown
89    /// The two argument strings are the name of the field, the name of the tokenizer
90    #[error("unknown_tokenizer_error: '{tokenizer:?}' for the field '{field:?}'")]
91    UnknownTokenizer {
92        /// The name of the tokenizer
93        tokenizer: String,
94        /// The field name
95        field: String,
96    },
97    /// The query contains a range query with a phrase as one of the bounds.
98    /// Only terms can be used as bounds.
99    #[error("range_must_not_have_phrase")]
100    RangeMustNotHavePhrase,
101    /// The format for the date field is not RFC 3339 compliant.
102    #[error("date_format_error: {0}")]
103    DateFormat(#[from] time::error::Parse),
104    /// The format for the facet field is invalid.
105    #[error("facet_parse_error: {0}")]
106    FacetFormat(#[from] FacetParseError),
107    /// The format for the ip field is invalid.
108    #[error("ip_format_error: {0}")]
109    IpFormat(#[from] std::net::AddrParseError),
110    /// Pest parser failed to parse string
111    #[error("pest_error: {0}")]
112    Pest(#[from] Box<pest::error::Error<Rule>>),
113}
114
115pub(crate) fn is_type_valid_for_fastfield_range_query(typ: Type) -> bool {
116    match typ {
117        Type::U64 | Type::I64 | Type::F64 | Type::Bool | Type::Date => true,
118        Type::IpAddr => true,
119        Type::Str | Type::Facet | Type::Bytes | Type::Json => false,
120    }
121}
122
123fn boost_query(query: Box<dyn Query>, boost: Option<f32>) -> Box<dyn Query> {
124    if let Some(boost) = boost {
125        return Box::new(BoostQuery::new(query, boost)) as Box<dyn Query>;
126    }
127    query
128}
129
130fn multiply_boosts(a: Option<f32>, b: Option<f32>) -> Option<f32> {
131    match (a, b) {
132        (Some(a), Some(b)) => Some(a * b),
133        (Some(a), None) => Some(a),
134        (None, Some(b)) => Some(b),
135        (None, None) => None,
136    }
137}
138
139fn reduce_should_clause(query: Box<dyn Query>) -> Box<dyn Query> {
140    if let Some(boolean_query) = query.deref().as_any().downcast_ref::<BooleanQuery>() {
141        let mut subqueries = vec![];
142        for (occur, nested_query) in boolean_query.clauses() {
143            let nested_query = reduce_should_clause(nested_query.box_clone());
144            match occur {
145                Occur::Must | Occur::MustNot => subqueries.push((*occur, nested_query)),
146                Occur::Should => {
147                    if let Some(nested_boolean_query) = nested_query.deref().as_any().downcast_ref::<BooleanQuery>() {
148                        subqueries.extend(nested_boolean_query.clauses().iter().map(|(o, q)| (*o, reduce_should_clause(q.box_clone()))))
149                    } else {
150                        subqueries.push((*occur, nested_query))
151                    }
152                }
153            }
154        }
155        if subqueries.len() == 1 && subqueries[0].0 == Occur::Should {
156            return subqueries.into_iter().next().expect("impossible").1;
157        }
158        return Box::new(BooleanQuery::new(subqueries)) as Box<dyn Query>;
159    }
160    query
161}
162
163fn reduce_empty_queries(query: Box<dyn Query>) -> Box<dyn Query> {
164    if let Some(boolean_query) = query.deref().as_any().downcast_ref::<BooleanQuery>() {
165        let subqueries: Vec<_> = boolean_query
166            .clauses()
167            .iter()
168            .filter_map(|(occur, nested_query)| {
169                if nested_query.deref().as_any().downcast_ref::<EmptyQuery>().is_some() {
170                    None
171                } else {
172                    Some((*occur, reduce_empty_queries(nested_query.box_clone())))
173                }
174            })
175            .collect();
176        if subqueries.is_empty() {
177            return Box::new(EmptyQuery {}) as Box<dyn Query>;
178        }
179        return Box::new(BooleanQuery::new(subqueries)) as Box<dyn Query>;
180    }
181    query
182}
183
184type Subqueries = Vec<(Occur, Box<dyn Query>)>;
185
186impl QueryParser {
187    pub fn new(
188        schema: Schema,
189        query_parser_config: QueryParserConfig,
190        morphology_manager: &MorphologyManager,
191        tokenizer_manager: &TokenizerManager,
192    ) -> SummaResult<QueryParser> {
193        validators::parse_fields(&schema, &query_parser_config.0.default_fields, &[])?;
194        Ok(QueryParser {
195            term_field_mappers_manager: TermFieldMappersManager::new(&schema, tokenizer_manager),
196            morphology_manager: morphology_manager.clone(),
197            tokenizer_manager: tokenizer_manager.clone(),
198            query_parser_config,
199            schema,
200        })
201    }
202
203    pub fn for_index(index: &Index, query_parser_config: QueryParserConfig, morphology_manager: &MorphologyManager) -> SummaResult<QueryParser> {
204        QueryParser::new(index.schema(), query_parser_config, morphology_manager, index.tokenizers())
205    }
206
207    pub fn resolve_field_name<'a>(&'a self, field_name: &'a str) -> &str {
208        self.query_parser_config
209            .0
210            .field_aliases
211            .get(field_name)
212            .map(|s| s.as_str())
213            .unwrap_or(field_name)
214    }
215
216    fn get_text_analyzer(&self, field_entry: &FieldEntry, option: &TextFieldIndexing) -> Result<TextAnalyzer, QueryParserError> {
217        self.tokenizer_manager
218            .get(option.tokenizer())
219            .ok_or_else(|| QueryParserError::UnknownTokenizer {
220                field: field_entry.name().to_string(),
221                tokenizer: option.tokenizer().to_string(),
222            })
223    }
224
225    fn default_field_queries(&self, term: Pair<Rule>, boost: Option<f32>) -> Result<Box<dyn Query>, QueryParserError> {
226        let (occur, term) = match term.as_rule() {
227            Rule::field_name => (Occur::Should, term),
228            _ => {
229                let term = term.into_inner().next().expect("grammar failure");
230                let occur = self.parse_occur(&term);
231                let pre_term = term.into_inner().next().expect("grammar failure");
232                (occur, pre_term.clone())
233            }
234        };
235
236        let default_field_queries = self
237            .query_parser_config
238            .0
239            .default_fields
240            .iter()
241            .map(|field| {
242                let (field, full_path) = self.schema.find_field(field).expect("inconsistent state");
243                self.parse_pre_term(&field, full_path, term.clone(), boost, true)
244            })
245            .collect::<Result<Vec<_>, _>>()?;
246
247        Ok(match occur {
248            Occur::Should => {
249                let default_field_queries = default_field_queries.into_iter().flatten();
250                match QueryParserDefaultMode::from(self.query_parser_config.0.default_mode.clone()) {
251                    QueryParserDefaultMode::Boolean => Box::new(BooleanQuery::new(default_field_queries.map(|q| (occur, q)).collect())) as Box<dyn Query>,
252                    QueryParserDefaultMode::DisjuctionMax { tie_breaker } => {
253                        Box::new(DisjunctionMaxQuery::with_tie_breaker(default_field_queries.collect(), tie_breaker)) as Box<dyn Query>
254                    }
255                }
256            }
257            Occur::MustNot => Box::new(BooleanQuery::new(default_field_queries.into_iter().flatten().map(|q| (occur, q)).collect())) as Box<dyn Query>,
258            Occur::Must => {
259                if self.query_parser_config.0.default_fields.len() == 1 {
260                    Box::new(BooleanQuery::new(
261                        default_field_queries.into_iter().flatten().map(|q| (Occur::Must, q)).collect(),
262                    )) as Box<dyn Query>
263                } else {
264                    let transposed_default_field_queries = transpose(default_field_queries);
265                    Box::new(BooleanQuery::new(
266                        transposed_default_field_queries
267                            .into_iter()
268                            .map(|queries| {
269                                (
270                                    Occur::Must,
271                                    Box::new(BooleanQuery::new(queries.into_iter().map(|q| (Occur::Should, q)).collect())) as Box<dyn Query>,
272                                )
273                            })
274                            .collect(),
275                    )) as Box<dyn Query>
276                }
277            }
278        })
279    }
280
281    fn parse_range(&self, pre_term: Pair<Rule>, field: &Field) -> Result<RangeQuery, QueryParserError> {
282        let mut range_pairs = pre_term.into_inner();
283        let field_entry = self.schema.get_field_entry(*field);
284        if !field_entry.field_type().is_indexed() && !field_entry.field_type().is_fast() {
285            return Err(QueryParserError::FieldNotIndexed(field_entry.name().to_string()));
286        }
287        let left = self.parse_boundary_word(*field, range_pairs.next().expect("grammar failure"))?;
288        let right = self.parse_boundary_word(*field, range_pairs.next().expect("grammar failure"))?;
289
290        Ok(RangeQuery::new(left, right))
291    }
292
293    pub fn parse_words(&self, field: Field, full_path: &str, option: &TextFieldIndexing, words: &str) -> Result<Vec<(usize, Term)>, QueryParserError> {
294        let field_entry = self.schema.get_field_entry(field);
295        let mut text_analyzer = self.get_text_analyzer(field_entry, option)?;
296        let mut token_stream = text_analyzer.token_stream(words);
297        let mut terms = Vec::new();
298        token_stream.process(&mut |token| {
299            let term = cast_field_to_term(&field, full_path, field_entry.field_type(), &token.text, true);
300            terms.push((token.position, term));
301        });
302        Ok(terms)
303    }
304
305    fn parse_pre_term(
306        &self,
307        field: &Field,
308        full_path: &str,
309        pre_term: Pair<Rule>,
310        boost: Option<f32>,
311        ignore_phrase_for_non_position_field: bool,
312    ) -> Result<Vec<Box<dyn Query>>, QueryParserError> {
313        let field_entry = self.schema.get_field_entry(*field);
314        let field_type = field_entry.field_type();
315
316        if field_type.value_type() == Type::Json && full_path.is_empty() {
317            return Err(QueryParserError::JsonFieldWithoutPath(field_entry.name().to_string()));
318        }
319
320        if field_type.value_type() != Type::Json && !full_path.is_empty() {
321            return Err(QueryParserError::NonJsonFieldWithPath(format!("{}.{}", field_entry.name(), full_path)));
322        }
323
324        if !(field_type.is_indexed() || matches!(pre_term.as_rule(), Rule::range) && field_type.is_fast()) {
325            return Err(QueryParserError::FieldNotIndexed(field_entry.name().to_string()));
326        }
327
328        let boost = multiply_boosts(self.query_parser_config.0.field_boosts.get(field_entry.name()).copied(), boost);
329
330        if matches!(pre_term.as_rule(), Rule::range) {
331            return Ok(vec![boost_query(Box::new(self.parse_range(pre_term, field)?) as Box<dyn Query>, boost)]);
332        }
333
334        return match *field_type {
335            FieldType::Bytes(_) => match pre_term.as_rule() {
336                Rule::range => Ok(vec![Box::new(self.parse_range(pre_term, field)?) as Box<dyn Query>]),
337                Rule::phrase | Rule::word => {
338                    let val = &BASE64.decode(pre_term.as_str())?;
339                    let query = Box::new(TermQuery::new(Term::from_field_bytes(*field, val), IndexRecordOption::Basic)) as Box<dyn Query>;
340                    Ok(vec![boost_query(query, boost)])
341                }
342                _ => unreachable!(),
343            },
344            FieldType::U64(_) => match pre_term.as_rule() {
345                Rule::range => Ok(vec![Box::new(self.parse_range(pre_term, field)?) as Box<dyn Query>]),
346                Rule::phrase | Rule::word => {
347                    let val: u64 = u64::from_str(pre_term.as_str())?;
348                    let query = Box::new(TermQuery::new(Term::from_field_u64(*field, val), IndexRecordOption::WithFreqs)) as Box<dyn Query>;
349                    Ok(vec![boost_query(query, boost)])
350                }
351                _ => unreachable!(),
352            },
353            FieldType::I64(_) => match pre_term.as_rule() {
354                Rule::range => Ok(vec![Box::new(self.parse_range(pre_term, field)?) as Box<dyn Query>]),
355                Rule::phrase | Rule::word => {
356                    let val: i64 = i64::from_str(pre_term.as_str())?;
357                    let query = Box::new(TermQuery::new(Term::from_field_i64(*field, val), IndexRecordOption::WithFreqs)) as Box<dyn Query>;
358                    Ok(vec![boost_query(query, boost)])
359                }
360                _ => unreachable!(),
361            },
362            FieldType::Facet(_) => match pre_term.as_rule() {
363                Rule::phrase | Rule::word => {
364                    let val = Facet::from_text(pre_term.as_str())?;
365                    let query = Box::new(TermQuery::new(Term::from_facet(*field, &val), IndexRecordOption::Basic)) as Box<dyn Query>;
366                    Ok(vec![boost_query(query, boost)])
367                }
368                _ => unreachable!(),
369            },
370            FieldType::F64(_) => match pre_term.as_rule() {
371                Rule::range => Ok(vec![Box::new(self.parse_range(pre_term, field)?) as Box<dyn Query>]),
372                Rule::phrase | Rule::word => {
373                    if !field_type.is_indexed() {
374                        return Err(QueryParserError::FieldNotIndexed(field_entry.name().to_string()));
375                    }
376                    let val: f64 = f64::from_str(pre_term.as_str())?;
377                    let query = Box::new(TermQuery::new(Term::from_field_f64(*field, val), IndexRecordOption::WithFreqs)) as Box<dyn Query>;
378                    Ok(vec![boost_query(query, boost)])
379                }
380                _ => unreachable!(),
381            },
382            FieldType::Bool(_) => match pre_term.as_rule() {
383                Rule::range => Ok(vec![Box::new(self.parse_range(pre_term, field)?) as Box<dyn Query>]),
384                Rule::phrase | Rule::word => {
385                    let val: bool = bool::from_str(pre_term.as_str())?;
386                    let query = Box::new(TermQuery::new(Term::from_field_bool(*field, val), IndexRecordOption::WithFreqs)) as Box<dyn Query>;
387                    Ok(vec![boost_query(query, boost)])
388                }
389                _ => unreachable!(),
390            },
391            FieldType::Str(_) | FieldType::JsonObject(_) => {
392                let indexing = if let FieldType::Str(ref text_options) = field_type {
393                    text_options.get_indexing_options().expect("unreachable")
394                } else if let FieldType::JsonObject(ref json_options) = field_type {
395                    json_options.get_text_indexing_options().expect("unreachable")
396                } else {
397                    unreachable!()
398                };
399
400                match pre_term.as_rule() {
401                    Rule::word | Rule::field_name => {
402                        let mut text_analyzer = self.get_text_analyzer(field_entry, indexing)?;
403                        let mut token_stream = text_analyzer.token_stream(pre_term.as_str());
404                        let mut queries = Vec::new();
405                        token_stream.process(&mut |token| {
406                            let morphology_config = self
407                                .query_parser_config
408                                .0
409                                .morphology_configs
410                                .get(field_entry.name())
411                                .cloned()
412                                .unwrap_or_default();
413                            let query = if let Some(morphology) = self.morphology_manager.get(self.query_parser_config.0.query_language()) {
414                                // ToDo: Change heuristic
415                                if pre_term.as_str().len() < 24 {
416                                    morphology.derive_query(morphology_config, field, full_path, field_type, &token.text)
417                                } else {
418                                    let term = cast_field_to_term(field, full_path, field_type, &token.text, false);
419                                    Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs)) as Box<dyn Query>
420                                }
421                            } else {
422                                let term = cast_field_to_term(field, full_path, field_type, &token.text, false);
423                                Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs)) as Box<dyn Query>
424                            };
425                            queries.push(boost_query(query, boost))
426                        });
427                        Ok(queries)
428                    }
429                    Rule::phrase => {
430                        let mut phrase_pairs = pre_term.into_inner();
431                        let words = match phrase_pairs.next() {
432                            None => return Ok(vec![]),
433                            Some(words) => words,
434                        };
435
436                        let slop = phrase_pairs
437                            .next()
438                            .map(|v| match v.as_str() {
439                                "" => 0,
440                                _ => u32::from_str(v.as_str()).expect("cannot parse"),
441                            })
442                            .unwrap_or(0);
443                        let terms = self.parse_words(*field, full_path, indexing, words.as_str())?;
444                        if terms.len() <= 1 {
445                            return Ok(terms
446                                .into_iter()
447                                .map(|(_, term)| {
448                                    let query = Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs)) as Box<dyn Query>;
449                                    boost_query(query, boost)
450                                })
451                                .collect());
452                        }
453                        return if indexing.index_option().has_positions() {
454                            let query = Box::new(PhraseQuery::new_with_offset_and_slop(terms, slop)) as Box<dyn Query>;
455                            Ok(vec![boost_query(query, boost)])
456                        } else if ignore_phrase_for_non_position_field {
457                            Ok(vec![])
458                        } else {
459                            Err(QueryParserError::FieldDoesNotHavePositionsIndexed(field_entry.name().to_string()))
460                        };
461                    }
462                    Rule::range => Ok(vec![Box::new(self.parse_range(pre_term, field)?) as Box<dyn Query>]),
463                    Rule::regex => {
464                        let query = Box::new(
465                            RegexQuery::from_pattern(pre_term.clone().into_inner().next().expect("grammar failure").as_str(), *field)
466                                .map_err(|_| QueryParserError::Syntax(pre_term.as_str().to_string()))?,
467                        ) as Box<dyn Query>;
468                        return Ok(vec![boost_query(query, boost)]);
469                    }
470                    _ => unreachable!(),
471                }
472            }
473            _ => unreachable!(),
474        };
475    }
476
477    fn parse_occur(&self, occur: &Pair<Rule>) -> Occur {
478        match occur.as_rule() {
479            Rule::positive_term | Rule::positive_grouping => Occur::Must,
480            Rule::negative_term | Rule::negative_grouping => Occur::MustNot,
481            Rule::default_term | Rule::default_grouping => Occur::Should,
482            _ => unreachable!(),
483        }
484    }
485
486    fn parse_term(&self, term: Pair<Rule>, field: &Field, full_path: &str, boost: Option<f32>) -> Result<Box<dyn Query>, QueryParserError> {
487        let term = term.into_inner().next().expect("grammar failure");
488        let occur = self.parse_occur(&term);
489        let pre_term = term.into_inner().next().expect("grammar failure");
490        Ok(Box::new(BooleanQuery::new(
491            self.parse_pre_term(field, full_path, pre_term, boost, false)?
492                .into_iter()
493                .map(|q| (occur, q))
494                .collect(),
495        )))
496    }
497
498    fn compute_boundary_term(&self, field: Field, phrase: &str) -> Result<Term, QueryParserError> {
499        let field_entry = self.schema.get_field_entry(field);
500        let field_type = field_entry.field_type();
501        let field_supports_ff_range_queries = field_type.is_fast() && is_type_valid_for_fastfield_range_query(field_type.value_type());
502
503        if !field_type.is_indexed() && !field_supports_ff_range_queries {
504            return Err(QueryParserError::FieldNotIndexed(field_entry.name().to_string()));
505        }
506
507        match *field_type {
508            FieldType::U64(_) => {
509                let val: u64 = u64::from_str(phrase)?;
510                Ok(Term::from_field_u64(field, val))
511            }
512            FieldType::I64(_) => {
513                let val: i64 = i64::from_str(phrase)?;
514                Ok(Term::from_field_i64(field, val))
515            }
516            FieldType::F64(_) => {
517                let val: f64 = f64::from_str(phrase)?;
518                Ok(Term::from_field_f64(field, val))
519            }
520            FieldType::Bool(_) => {
521                let val: bool = bool::from_str(phrase)?;
522                Ok(Term::from_field_bool(field, val))
523            }
524            FieldType::Bytes(_) => {
525                let val = &BASE64.decode(phrase)?;
526                Ok(Term::from_field_bytes(field, val))
527            }
528            FieldType::Str(ref str_options) => {
529                let option = str_options.get_indexing_options().ok_or_else(|| {
530                    // This should have been seen earlier really.
531                    QueryParserError::FieldNotIndexed(field_entry.name().to_string())
532                })?;
533                let mut terms: Vec<Term> = Vec::new();
534                let mut text_analyzer = self
535                    .tokenizer_manager
536                    .get(option.tokenizer())
537                    .ok_or_else(|| QueryParserError::UnknownTokenizer {
538                        field: field_entry.name().to_string(),
539                        tokenizer: option.tokenizer().to_string(),
540                    })?;
541                let mut token_stream = text_analyzer.token_stream(phrase);
542                token_stream.process(&mut |token| {
543                    let term = Term::from_field_text(field, &token.text);
544                    terms.push(term);
545                });
546                if terms.len() != 1 {
547                    return Err(QueryParserError::UnsupportedQuery(format!(
548                        "Range query boundary cannot have multiple tokens: {phrase:?}."
549                    )));
550                }
551                Ok(terms.into_iter().next().expect("grammar failure"))
552            }
553            _ => unreachable!(),
554        }
555    }
556
557    fn parse_boundary_word(&self, field: Field, boundary_word: Pair<Rule>) -> Result<Bound<Term>, QueryParserError> {
558        Ok(match boundary_word.as_rule() {
559            Rule::star => Unbounded,
560            Rule::signed_word => Included(self.compute_boundary_term(field, boundary_word.as_str())?),
561            _ => unreachable!(),
562        })
563    }
564
565    fn extract_top_level_phrase(&self, pairs: Pairs<Rule>) -> Option<String> {
566        let mut terms = vec![];
567        for pair in pairs {
568            let mut statement_pairs = pair.into_inner();
569            let search_group_or_term = statement_pairs.next().expect("grammar failure");
570            let boost = statement_pairs.next().map(|boost| f32::from_str(boost.as_str()).expect("grammar failure"));
571            match (search_group_or_term.as_rule(), boost) {
572                (Rule::term, None) => {
573                    let term = search_group_or_term.into_inner().next().expect("grammar_failure");
574                    let occur = self.parse_occur(&term);
575                    let pre_term = term.into_inner().next().expect("grammar failure");
576                    if occur == Occur::Should && matches!(pre_term.as_rule(), Rule::word) {
577                        terms.push(pre_term.as_str())
578                    }
579                }
580                _ => return None,
581            }
582        }
583        (!terms.is_empty()).then(|| terms.join(" "))
584    }
585
586    fn parse_statement(&self, pair: Pair<Rule>) -> Result<Box<dyn Query>, QueryParserError> {
587        let mut statement_pairs = pair.into_inner();
588        let isbn_doi_or_search_group_or_grouping_or_term = statement_pairs.next().expect("grammar failure");
589        let statement_boost = statement_pairs.next().map(|boost| f32::from_str(boost.as_str()).expect("grammar failure"));
590        let statement_result = match isbn_doi_or_search_group_or_grouping_or_term.as_rule() {
591            Rule::search_group => {
592                let mut search_group = isbn_doi_or_search_group_or_grouping_or_term.into_inner();
593                let field_name = search_group.next().expect("grammar failure");
594                let grouping_or_term = search_group.next().expect("grammar failure");
595                match grouping_or_term.as_rule() {
596                    Rule::grouping => {
597                        let grouping = grouping_or_term.into_inner().next().expect("grammar failure");
598                        let occur = self.parse_occur(&grouping);
599                        let mut intermediate_results = vec![];
600                        let resolved_field_name = self.resolve_field_name(field_name.as_str());
601                        match self.schema.find_field(resolved_field_name) {
602                            Some((field, full_path)) => {
603                                for term in grouping.into_inner() {
604                                    intermediate_results.push(self.parse_term(term, &field, full_path, statement_boost)?);
605                                }
606                            }
607                            None => {
608                                if self.query_parser_config.0.excluded_fields.iter().any(|x| x == field_name.as_str()) {
609                                    return Ok(Box::new(EmptyQuery {}));
610                                }
611                                intermediate_results.push(self.default_field_queries(field_name, statement_boost)?);
612                                for term in grouping.into_inner() {
613                                    intermediate_results.push(self.default_field_queries(term, statement_boost)?)
614                                }
615                            }
616                        }
617                        let group_query = Box::new(BooleanQuery::new(intermediate_results.into_iter().map(|q| (Occur::Should, q)).collect())) as Box<dyn Query>;
618                        match occur {
619                            Occur::Should => Ok(group_query),
620                            Occur::Must => Ok(Box::new(BooleanQuery::new(vec![(Occur::Must, group_query)])) as Box<dyn Query>),
621                            Occur::MustNot => Ok(Box::new(BooleanQuery::new(vec![(Occur::MustNot, group_query)])) as Box<dyn Query>),
622                        }
623                    }
624                    Rule::term => {
625                        let resolved_field_name = self.resolve_field_name(field_name.as_str());
626                        match self.schema.find_field(resolved_field_name) {
627                            Some((field, full_path)) => self.parse_term(grouping_or_term, &field, full_path, statement_boost),
628                            None => {
629                                if self
630                                    .query_parser_config
631                                    .0
632                                    .excluded_fields
633                                    .iter()
634                                    .any(|x| x == field_name.as_str() || Some(x.as_str()) == field_name.as_str().split('.').next())
635                                {
636                                    Ok(Box::new(EmptyQuery {}) as Box<dyn Query>)
637                                } else {
638                                    Ok(Box::new(BooleanQuery::new(vec![
639                                        (Occur::Should, self.default_field_queries(field_name, statement_boost)?),
640                                        (Occur::Should, self.default_field_queries(grouping_or_term, statement_boost)?),
641                                    ])) as Box<dyn Query>)
642                                }
643                            }
644                        }
645                    }
646                    Rule::star => {
647                        let resolved_field_name = self.resolve_field_name(field_name.as_str());
648                        match self.schema.find_field(resolved_field_name) {
649                            Some((field, full_path)) => Ok(Box::new(ExistsQuery::new(field, full_path)) as Box<dyn Query>),
650                            None => {
651                                if self
652                                    .query_parser_config
653                                    .0
654                                    .excluded_fields
655                                    .iter()
656                                    .any(|x| x == field_name.as_str() || Some(x.as_str()) == field_name.as_str().split('.').next())
657                                {
658                                    Ok(Box::new(EmptyQuery {}) as Box<dyn Query>)
659                                } else {
660                                    Ok(self.default_field_queries(field_name, statement_boost)?)
661                                }
662                            }
663                        }
664                    }
665                    _ => unreachable!(),
666                }
667            }
668            Rule::doi => {
669                let mut queries = vec![];
670
671                for term_field_mapper_name in ["doi", "doi_isbn"] {
672                    if let Some(term_field_mapper_config) = self.query_parser_config.0.term_field_mapper_configs.get(term_field_mapper_name) {
673                        if let Some(term_field_mapper) = self.term_field_mappers_manager.get(term_field_mapper_name) {
674                            if let Some(query) = term_field_mapper.map(isbn_doi_or_search_group_or_grouping_or_term.as_str(), &term_field_mapper_config.fields)
675                            {
676                                queries.push((Occur::Should, query));
677                            }
678                        }
679                    }
680                }
681
682                Ok(Box::new(BooleanQuery::new(queries)) as Box<dyn Query>)
683            }
684            Rule::isbn => {
685                let mut queries = vec![];
686
687                for term_field_mapper_name in ["isbn"] {
688                    if let Some(term_field_mapper_config) = self.query_parser_config.0.term_field_mapper_configs.get(term_field_mapper_name) {
689                        if let Some(term_field_mapper) = self.term_field_mappers_manager.get(term_field_mapper_name) {
690                            if let Some(query) = term_field_mapper.map(isbn_doi_or_search_group_or_grouping_or_term.as_str(), &term_field_mapper_config.fields)
691                            {
692                                queries.push((Occur::Should, query));
693                            }
694                        }
695                    }
696                }
697
698                Ok(Box::new(BooleanQuery::new(queries)) as Box<dyn Query>)
699            }
700            Rule::term => self.default_field_queries(isbn_doi_or_search_group_or_grouping_or_term, statement_boost),
701            Rule::grouping => {
702                let grouping = isbn_doi_or_search_group_or_grouping_or_term.into_inner().next().expect("grammar failure");
703                let occur = self.parse_occur(&grouping);
704                let mut intermediate_results = vec![];
705                for term in grouping.into_inner() {
706                    intermediate_results.push(self.default_field_queries(term, statement_boost)?)
707                }
708                let group_query = Box::new(BooleanQuery::new(intermediate_results.into_iter().map(|q| (Occur::Should, q)).collect())) as Box<dyn Query>;
709                match occur {
710                    Occur::Should => Ok(group_query),
711                    Occur::Must => Ok(Box::new(BooleanQuery::new(vec![(Occur::Must, group_query)])) as Box<dyn Query>),
712                    Occur::MustNot => Ok(Box::new(BooleanQuery::new(vec![(Occur::MustNot, group_query)])) as Box<dyn Query>),
713                }
714            }
715            e => panic!("{e:?}"),
716        }?;
717        Ok(statement_result)
718    }
719
720    fn parse_statements(&self, pairs: Pairs<Rule>) -> Result<Box<dyn Query>, QueryParserError> {
721        let mut subqueries = Subqueries::new();
722
723        for pair in pairs.clone() {
724            let parsed_queries = self.parse_statement(pair)?;
725            subqueries.push((Occur::Should, parsed_queries));
726        }
727
728        if let Some(top_level_phrase) = self.extract_top_level_phrase(pairs) {
729            if let Some(exact_matches_promoter) = &self.query_parser_config.0.exact_matches_promoter {
730                let fields = if exact_matches_promoter.fields.is_empty() {
731                    self.query_parser_config.0.default_fields.iter()
732                } else {
733                    exact_matches_promoter.fields.iter()
734                };
735                subqueries.extend(
736                    fields
737                        .filter_map(|field| {
738                            let (field, full_path) = self.schema.find_field(self.resolve_field_name(field)).expect("no field");
739                            let field_entry = self.schema.get_field_entry(field);
740                            let field_boost = self.query_parser_config.0.field_boosts.get(field_entry.name()).copied();
741                            match field_entry.field_type() {
742                                FieldType::Str(ref str_option) => {
743                                    let option = str_option.get_indexing_options()?;
744                                    let terms = match self.parse_words(field, full_path, option, &top_level_phrase) {
745                                        Ok(terms) => terms,
746                                        Err(err) => return Some(Err(err)),
747                                    };
748                                    (terms.len() > 1 && option.index_option().has_positions()).then(|| {
749                                        let query = Box::new(PhraseQuery::new_with_offset_and_slop(terms, exact_matches_promoter.slop)) as Box<dyn Query>;
750                                        Ok(boost_query(query, multiply_boosts(exact_matches_promoter.boost, field_boost)))
751                                    })
752                                }
753                                FieldType::JsonObject(ref json_option) => {
754                                    let option = json_option.get_text_indexing_options()?;
755                                    let terms = match self.parse_words(field, full_path, option, &top_level_phrase) {
756                                        Ok(terms) => terms,
757                                        Err(err) => return Some(Err(err)),
758                                    };
759                                    (terms.len() > 1 && option.index_option().has_positions()).then(|| {
760                                        let query = Box::new(PhraseQuery::new_with_offset_and_slop(terms, exact_matches_promoter.slop)) as Box<dyn Query>;
761                                        Ok(boost_query(query, multiply_boosts(exact_matches_promoter.boost, field_boost)))
762                                    })
763                                }
764                                _ => None,
765                            }
766                        })
767                        .collect::<Result<Vec<_>, _>>()?
768                        .into_iter()
769                        .map(|q| (Occur::Should, q)),
770                )
771            }
772        }
773        Ok(Box::new(BooleanQuery::new(subqueries.into_iter().take(self.query_parser_config.term_limit()).collect())) as Box<dyn Query>)
774    }
775
776    pub fn parse_query(&self, query: &str) -> Result<Box<dyn Query>, QueryParserError> {
777        let pairs = SummaQlParser::parse(Rule::main, query).map_err(Box::new)?;
778        Ok(reduce_empty_queries(reduce_should_clause(self.parse_statements(pairs)?)))
779    }
780}
781
782#[cfg(test)]
783mod tests {
784    use std::collections::HashMap;
785
786    use summa_proto::proto;
787    use tantivy::schema::{TextOptions, INDEXED, STRING, TEXT};
788    use tantivy::tokenizer::{LowerCaser, RemoveLongFilter};
789
790    use super::*;
791    use crate::components::tokenizers::Tokenizer;
792
793    fn create_query_parser() -> QueryParser {
794        let tokenizer_manager = TokenizerManager::default();
795        let morphology_manager = MorphologyManager::default();
796        let mut schema_builder = Schema::builder();
797        schema_builder.add_text_field("title", TEXT);
798        schema_builder.add_text_field("body", TEXT);
799        schema_builder.add_i64_field("timestamp", INDEXED);
800        schema_builder.add_text_field("doi", STRING);
801        schema_builder.add_text_field("isbns", STRING);
802        schema_builder.add_json_field("metadata", TEXT);
803        let schema = schema_builder.build();
804        let query_parser_config = QueryParserConfig(proto::QueryParserConfig {
805            default_fields: vec!["title".to_string()],
806            ..Default::default()
807        });
808        QueryParser::new(schema, query_parser_config, &morphology_manager, &tokenizer_manager).expect("cannot create parser")
809    }
810
811    fn create_complex_query_parser() -> QueryParser {
812        let tokenizer_manager = TokenizerManager::default();
813        tokenizer_manager.register(
814            "summa",
815            TextAnalyzer::builder(Tokenizer).filter(RemoveLongFilter::limit(100)).filter(LowerCaser).build(),
816        );
817        let morphology_manager = MorphologyManager::default();
818        let mut schema_builder = Schema::builder();
819        let text_options = TextOptions::default().set_indexing_options(
820            TextFieldIndexing::default()
821                .set_tokenizer("summa")
822                .set_index_option(IndexRecordOption::WithFreqsAndPositions),
823        );
824        schema_builder.add_text_field("title", text_options);
825        schema_builder.add_text_field("body", TEXT);
826        schema_builder.add_text_field("authors", TEXT);
827        schema_builder.add_text_field("language", TEXT);
828        schema_builder.add_i64_field("timestamp", INDEXED);
829        schema_builder.add_text_field("doi", STRING);
830        schema_builder.add_text_field("isbns", STRING);
831        schema_builder.add_json_field("metadata", TEXT);
832        let schema = schema_builder.build();
833        let query_parser_config = QueryParserConfig(proto::QueryParserConfig {
834            default_fields: vec!["title".to_string(), "body".to_string()],
835            ..Default::default()
836        });
837        QueryParser::new(schema, query_parser_config, &morphology_manager, &tokenizer_manager).expect("cannot create parser")
838    }
839
840    #[test]
841    pub fn test_parser_base() {
842        let query_parser = create_complex_query_parser();
843        let query = query_parser.parse_query("search engine");
844        assert_eq!(format!("{:?}", query), "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"search\"))), (Should, TermQuery(Term(field=1, type=Str, \"search\"))), (Should, TermQuery(Term(field=0, type=Str, \"engine\"))), (Should, TermQuery(Term(field=1, type=Str, \"engine\")))] })");
845        let query = query_parser.parse_query("'search engine'");
846        assert_eq!(
847            format!("{:?}", query),
848            "Ok(BooleanQuery { subqueries: [(Should, PhraseQuery { field: Field(0), phrase_terms: [(0, Term(field=0, type=Str, \"search\")), (1, Term(field=0, type=Str, \"engine\"))], slop: 0 }), (Should, PhraseQuery { field: Field(1), phrase_terms: [(0, Term(field=1, type=Str, \"search\")), (1, Term(field=1, type=Str, \"engine\"))], slop: 0 })] })"
849        );
850        let query = query_parser.parse_query("+'I sette messaggeri'");
851        assert_eq!(
852            format!("{:?}", query),
853            "Ok(BooleanQuery { subqueries: [(Must, BooleanQuery { subqueries: [(Should, PhraseQuery { field: Field(0), phrase_terms: [(0, Term(field=0, type=Str, \"i\")), (1, Term(field=0, type=Str, \"sette\")), (2, Term(field=0, type=Str, \"messaggeri\"))], slop: 0 }), (Should, PhraseQuery { field: Field(1), phrase_terms: [(0, Term(field=1, type=Str, \"i\")), (1, Term(field=1, type=Str, \"sette\")), (2, Term(field=1, type=Str, \"messaggeri\"))], slop: 0 })] })] })"
854        );
855    }
856
857    #[test]
858    pub fn test_parser_slop() {
859        let query_parser = create_query_parser();
860        let query = query_parser.parse_query("body:'search engine'~4");
861        assert_eq!(
862            format!("{:?}", query),
863            "Ok(PhraseQuery { field: Field(1), phrase_terms: [(0, Term(field=1, type=Str, \"search\")), (1, Term(field=1, type=Str, \"engine\"))], slop: 4 })"
864        );
865    }
866
867    #[test]
868    pub fn test_parser_fields() {
869        let mut query_parser = create_query_parser();
870        query_parser.query_parser_config.0.term_field_mapper_configs.insert(
871            "doi".to_string(),
872            proto::TermFieldMapperConfig {
873                fields: vec!["doi".to_string()],
874            },
875        );
876        query_parser.query_parser_config.0.term_field_mapper_configs.insert(
877            "doi_isbn".to_string(),
878            proto::TermFieldMapperConfig {
879                fields: vec!["metadata.isbns".to_string()],
880            },
881        );
882        query_parser.query_parser_config.0.term_field_mapper_configs.insert(
883            "isbn".to_string(),
884            proto::TermFieldMapperConfig {
885                fields: vec!["metadata.isbns".to_string()],
886            },
887        );
888        assert_eq!(
889            format!("{:?}", query_parser.parse_query("body:'search engine'")),
890            "Ok(PhraseQuery { field: Field(1), phrase_terms: [(0, Term(field=1, type=Str, \"search\")), (1, Term(field=1, type=Str, \"engine\"))], slop: 0 })"
891        );
892        assert_eq!(
893            format!("{:?}", query_parser.parse_query("timestamp:10")),
894            "Ok(TermQuery(Term(field=2, type=I64, 10)))"
895        );
896        assert_eq!(
897            format!("{:?}", query_parser.parse_query("title:search engine")),
898            "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"search\"))), (Should, TermQuery(Term(field=0, type=Str, \"engine\")))] })"
899        );
900        assert_eq!(
901            format!("{:?}", query_parser.parse_query("not_field:search engine")),
902            "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"not\"))), (Should, TermQuery(Term(field=0, type=Str, \"field\"))), (Should, TermQuery(Term(field=0, type=Str, \"search\"))), (Should, TermQuery(Term(field=0, type=Str, \"engine\")))] })"
903        );
904        assert_eq!(
905            format!("{:?}", query_parser.parse_query("doi:10.0000/abcd.0123 ")),
906            "Ok(TermQuery(Term(field=3, type=Str, \"10.0000/abcd.0123\")))"
907        );
908        assert_eq!(
909            format!("{:?}", query_parser.parse_query("doi:https://doi.org/10.0000/abcd.0123")),
910            "Ok(TermQuery(Term(field=3, type=Str, \"https://doi.org/10.0000/abcd.0123\")))"
911        );
912        assert_eq!(
913            format!("{:?}", query_parser.parse_query("doi:doi.org/10.0000/abcd.0123")),
914            "Ok(TermQuery(Term(field=3, type=Str, \"doi.org/10.0000/abcd.0123\")))"
915        );
916        assert_eq!(
917            format!("{:?}", query_parser.parse_query("10.0000/978123")),
918            "Ok(TermQuery(Term(field=3, type=Str, \"10.0000/978123\")))"
919        );
920        assert_eq!(format!("{:?}", query_parser.parse_query("10.0000/9781234567890")), "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=3, type=Str, \"10.0000/9781234567890\"))), (Should, TermQuery(Term(field=5, type=Json, path=isbns, type=Str, \"9781234567890\")))] })");
921        assert_eq!(format!("{:?}", query_parser.parse_query("10.0000/978-12345-6789-0")), "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=3, type=Str, \"10.0000/978-12345-6789-0\"))), (Should, TermQuery(Term(field=5, type=Json, path=isbns, type=Str, \"9781234567890\")))] })");
922        assert_eq!(format!("{:?}", query_parser.parse_query("10.0000/978-12345-6789-0.ch11")), "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=3, type=Str, \"10.0000/978-12345-6789-0.ch11\"))), (Should, TermQuery(Term(field=5, type=Json, path=isbns, type=Str, \"9781234567890\")))] })");
923        assert_eq!(format!("{:?}", query_parser.parse_query("10.0000/cbo978-12345-6789-0.ch11")), "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=3, type=Str, \"10.0000/cbo978-12345-6789-0.ch11\"))), (Should, TermQuery(Term(field=5, type=Json, path=isbns, type=Str, \"9781234567890\")))] })");
924        assert_eq!(
925            format!("{:?}", query_parser.parse_query("978-12345-6789-0")),
926            "Ok(TermQuery(Term(field=5, type=Json, path=isbns, type=Str, \"9781234567890\")))"
927        );
928        assert_eq!(
929            format!("{:?}", query_parser.parse_query("9781234567890")),
930            "Ok(TermQuery(Term(field=5, type=Json, path=isbns, type=Str, \"9781234567890\")))"
931        );
932        assert_eq!(format!("{:?}", query_parser.parse_query("97812-34-5678-909")), "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"97812\"))), (Should, TermQuery(Term(field=0, type=Str, \"34\"))), (Should, TermQuery(Term(field=0, type=Str, \"5678\"))), (Should, TermQuery(Term(field=0, type=Str, \"909\")))] })");
933        assert_eq!(
934            format!("{:?}", query_parser.parse_query("metadata.isbns:97812-34-5678-90")),
935            "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=5, type=Json, path=isbns, type=I64, 97812))), (Should, TermQuery(Term(field=5, type=Json, path=isbns, type=I64, 34))), (Should, TermQuery(Term(field=5, type=Json, path=isbns, type=I64, 5678))), (Should, TermQuery(Term(field=5, type=Json, path=isbns, type=I64, 90)))] })"
936        );
937        assert_eq!(format!("{:?}", query_parser.parse_query("123 97812-34-5678-909")), "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"123\"))), (Should, TermQuery(Term(field=0, type=Str, \"97812\"))), (Should, TermQuery(Term(field=0, type=Str, \"34\"))), (Should, TermQuery(Term(field=0, type=Str, \"5678\"))), (Should, TermQuery(Term(field=0, type=Str, \"909\")))] })");
938        assert_eq!(
939            format!("{:?}", query_parser.parse_query("10.0000/cbo123")),
940            "Ok(TermQuery(Term(field=3, type=Str, \"10.0000/cbo123\")))"
941        );
942        assert_eq!(
943            format!("{:?}", query_parser.parse_query("10.1515/12-23")),
944            "Ok(TermQuery(Term(field=3, type=Str, \"10.1515/12-23\")))"
945        );
946        assert_eq!(
947            format!("{:?}", query_parser.parse_query("doi.org/10.0000/abcd.0123")),
948            "Ok(TermQuery(Term(field=3, type=Str, \"10.0000/abcd.0123\")))"
949        );
950        assert_eq!(
951            format!("{:?}", query_parser.parse_query("10.0000/abcd.0123")),
952            "Ok(TermQuery(Term(field=3, type=Str, \"10.0000/abcd.0123\")))"
953        );
954        assert_eq!(
955            format!("{:?}", query_parser.parse_query("https://doi.org/10.0000/abcd.0123")),
956            "Ok(TermQuery(Term(field=3, type=Str, \"10.0000/abcd.0123\")))"
957        );
958    }
959
960    #[test]
961    pub fn test_free_queries() {
962        let query_parser = create_query_parser();
963        assert_eq!(
964            format!("{:?}", query_parser.parse_query("Search Engines: The Ultimate, Only Guide!")),
965            "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"search\"))), (Should, TermQuery(Term(field=0, type=Str, \"engines\"))), (Should, TermQuery(Term(field=0, type=Str, \"the\"))), (Should, TermQuery(Term(field=0, type=Str, \"ultimate\"))), (Should, TermQuery(Term(field=0, type=Str, \"only\"))), (Should, TermQuery(Term(field=0, type=Str, \"guide\")))] })"
966        );
967        assert_eq!(
968            format!("{:?}", query_parser.parse_query("!! HI !! (SEARCH! ENGINES!")),
969            "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"hi\"))), (Should, TermQuery(Term(field=0, type=Str, \"search\"))), (Should, TermQuery(Term(field=0, type=Str, \"engines\")))] })"
970        );
971        assert_eq!(
972            format!("{:?}", query_parser.parse_query("`non closed")),
973            "Ok(PhraseQuery { field: Field(0), phrase_terms: [(0, Term(field=0, type=Str, \"non\")), (1, Term(field=0, type=Str, \"closed\"))], slop: 0 })"
974        );
975        assert_eq!(
976            format!("{:?}", query_parser.parse_query("\"non closed")),
977            "Ok(PhraseQuery { field: Field(0), phrase_terms: [(0, Term(field=0, type=Str, \"non\")), (1, Term(field=0, type=Str, \"closed\"))], slop: 0 })"
978        );
979        assert_eq!(
980            format!("{:?}", query_parser.parse_query("non closed`")),
981            "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"non\"))), (Should, TermQuery(Term(field=0, type=Str, \"closed\")))] })"
982        );
983        assert_eq!(
984            format!("{:?}", query_parser.parse_query("non closed\"")),
985            "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"non\"))), (Should, TermQuery(Term(field=0, type=Str, \"closed\")))] })"
986        );
987        assert_eq!(
988            format!("{:?}", query_parser.parse_query("title:(search ")),
989            "Ok(TermQuery(Term(field=0, type=Str, \"title\")))"
990        );
991        assert_eq!(
992            format!("{:?}", query_parser.parse_query("title:(search -")),
993            "Ok(TermQuery(Term(field=0, type=Str, \"title\")))"
994        );
995        assert_eq!(format!("{:?}", query_parser.parse_query("``")), "Ok(EmptyQuery)");
996        assert_eq!(format!("{:?}", query_parser.parse_query("```")), "Ok(EmptyQuery)");
997        assert_eq!(format!("{:?}", query_parser.parse_query(")(")), "Ok(EmptyQuery)");
998        assert_eq!(
999            format!("{:?}", query_parser.parse_query("(a)(b)`")),
1000            "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"a\"))), (Should, TermQuery(Term(field=0, type=Str, \"b\")))] })"
1001        );
1002        assert_eq!(
1003            format!("{:?}", query_parser.parse_query("doi:'10.1182/blood.v53.1.19.bloodjournal53119'")),
1004            "Ok(TermQuery(Term(field=3, type=Str, \"10.1182/blood.v53.1.19.bloodjournal53119\")))"
1005        );
1006        assert_eq!(
1007            format!("{:?}", query_parser.parse_query("doi:10.1182/blood.v53.1.19.bloodjournal53119")),
1008            "Ok(TermQuery(Term(field=3, type=Str, \"10.1182/blood.v53.1.19.bloodjournal53119\")))"
1009        );
1010        assert_eq!(
1011            format!("{:?}", query_parser.parse_query("10.10 10/10")),
1012            "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"10\"))), (Should, TermQuery(Term(field=0, type=Str, \"10\"))), (Should, TermQuery(Term(field=0, type=Str, \"10\"))), (Should, TermQuery(Term(field=0, type=Str, \"10\")))] })"
1013        );
1014        let query_parser = create_complex_query_parser();
1015        assert_eq!(format!("{:?}", query_parser.parse_query("\"search engines\"")), "Ok(BooleanQuery { subqueries: [(Should, PhraseQuery { field: Field(0), phrase_terms: [(0, Term(field=0, type=Str, \"search\")), (1, Term(field=0, type=Str, \"engines\"))], slop: 0 }), (Should, PhraseQuery { field: Field(1), phrase_terms: [(0, Term(field=1, type=Str, \"search\")), (1, Term(field=1, type=Str, \"engines\"))], slop: 0 })] })");
1016    }
1017
1018    #[test]
1019    pub fn test_non_ascii() {
1020        let query_parser = create_query_parser();
1021        assert_eq!(
1022            format!("{:?}", query_parser.parse_query("body:поисковые системы")),
1023            "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=1, type=Str, \"поисковые\"))), (Should, TermQuery(Term(field=0, type=Str, \"системы\")))] })"
1024        );
1025        assert_eq!(
1026            format!("{:?}", query_parser.parse_query("(поисковые системы)")),
1027            "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"поисковые\"))), (Should, TermQuery(Term(field=0, type=Str, \"системы\")))] })"
1028        );
1029        assert_eq!(
1030            format!("{:?}", query_parser.parse_query("поисковые: системы")),
1031            "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"поисковые\"))), (Should, TermQuery(Term(field=0, type=Str, \"системы\")))] })"
1032        );
1033        assert_eq!(
1034            format!("{:?}", query_parser.parse_query("healthcare cyber–physical system")),
1035            "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"healthcare\"))), (Should, TermQuery(Term(field=0, type=Str, \"cyber\"))), (Should, TermQuery(Term(field=0, type=Str, \"physical\"))), (Should, TermQuery(Term(field=0, type=Str, \"system\")))] })"
1036        );
1037    }
1038
1039    #[test]
1040    pub fn test_json() {
1041        let query_parser = create_query_parser();
1042        assert_eq!(
1043            format!("{:?}", query_parser.parse_query("metadata.a:1")),
1044            "Ok(TermQuery(Term(field=5, type=Json, path=a, type=I64, 1)))"
1045        );
1046        assert_eq!(
1047            format!("{:?}", query_parser.parse_query("metadata.a:\"1\"")),
1048            "Ok(TermQuery(Term(field=5, type=Json, path=a, type=Str, \"1\")))"
1049        );
1050        assert_eq!(
1051            format!("{:?}", query_parser.parse_query("metadata.a:\"1 2 3\"")),
1052            "Ok(PhraseQuery { field: Field(5), phrase_terms: [(0, Term(field=5, type=Json, path=a, type=Str, \"1\")), (1, Term(field=5, type=Json, path=a, type=Str, \"2\")), (2, Term(field=5, type=Json, path=a, type=Str, \"3\"))], slop: 0 })"
1053        );
1054    }
1055
1056    #[test]
1057    pub fn test_grouping() {
1058        let query_parser = create_query_parser();
1059        assert_eq!(
1060            format!("{:?}", query_parser.parse_query("body:+(a b)")),
1061            "Ok(BooleanQuery { subqueries: [(Must, BooleanQuery { subqueries: [(Should, TermQuery(Term(field=1, type=Str, \"a\"))), (Should, TermQuery(Term(field=1, type=Str, \"b\")))], minimum_number_should_match: 1 })], minimum_number_should_match: 0 })"
1062        );
1063        assert_eq!(
1064            format!("{:?}", query_parser.parse_query("body:-(a b)")),
1065            "Ok(BooleanQuery { subqueries: [(MustNot, BooleanQuery { subqueries: [(Should, TermQuery(Term(field=1, type=Str, \"a\"))), (Should, TermQuery(Term(field=1, type=Str, \"b\")))], minimum_number_should_match: 1 })], minimum_number_should_match: 0 })"
1066        );
1067    }
1068
1069    #[test]
1070    pub fn test_punct() {
1071        let query_parser = create_query_parser();
1072        assert_eq!(
1073            format!("{:?}", query_parser.parse_query("a + b - c")),
1074            "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"a\"))), (Should, TermQuery(Term(field=0, type=Str, \"b\"))), (Should, TermQuery(Term(field=0, type=Str, \"c\")))] })"
1075        );
1076    }
1077
1078    #[test]
1079    pub fn test_plus_minus() {
1080        let query_parser = create_query_parser();
1081        assert_eq!(
1082            format!("{:?}", query_parser.parse_query("body:+search -engine")),
1083            "Ok(BooleanQuery { subqueries: [(Must, TermQuery(Term(field=1, type=Str, \"search\"))), (MustNot, TermQuery(Term(field=0, type=Str, \"engine\")))] })"
1084        );
1085        assert_eq!(
1086            format!("{:?}", query_parser.parse_query("body:+'search engine'")),
1087            "Ok(BooleanQuery { subqueries: [(Must, PhraseQuery { field: Field(1), phrase_terms: [(0, Term(field=1, type=Str, \"search\")), (1, Term(field=1, type=Str, \"engine\"))], slop: 0 })] })"
1088        );
1089        assert_eq!(
1090            format!("{:?}", query_parser.parse_query("+search +engine")),
1091            "Ok(BooleanQuery { subqueries: [(Must, TermQuery(Term(field=0, type=Str, \"search\"))), (Must, TermQuery(Term(field=0, type=Str, \"engine\")))] })"
1092        );
1093        let query_parser = create_complex_query_parser();
1094        assert_eq!(format!("{:?}", query_parser.parse_query("+search +engine")), "Ok(BooleanQuery { subqueries: [(Must, BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"search\"))), (Should, TermQuery(Term(field=1, type=Str, \"search\")))] }), (Must, BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"engine\"))), (Should, TermQuery(Term(field=1, type=Str, \"engine\")))] })] })");
1095        assert_eq!(format!("{:?}", query_parser.parse_query("+search language:+ru")), "Ok(BooleanQuery { subqueries: [(Must, BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"search\"))), (Should, TermQuery(Term(field=1, type=Str, \"search\")))] }), (Must, TermQuery(Term(field=3, type=Str, \"ru\")))] })");
1096        assert_eq!(format!("{:?}", query_parser.parse_query("+c++ language:+ru")), "Ok(BooleanQuery { subqueries: [(Must, BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"c++\"))), (Should, TermQuery(Term(field=1, type=Str, \"c\")))] }), (Must, TermQuery(Term(field=3, type=Str, \"ru\")))] })");
1097    }
1098
1099    #[test]
1100    pub fn test_quotes() {
1101        let query_parser = create_query_parser();
1102        assert_eq!(
1103            format!("{:?}", query_parser.parse_query("Don't duck with my family")),
1104            "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"don\"))), (Should, TermQuery(Term(field=0, type=Str, \"t\"))), (Should, TermQuery(Term(field=0, type=Str, \"duck\"))), (Should, TermQuery(Term(field=0, type=Str, \"with\"))), (Should, TermQuery(Term(field=0, type=Str, \"my\"))), (Should, TermQuery(Term(field=0, type=Str, \"family\")))] })"
1105        );
1106        assert_eq!(
1107            format!("{:?}", query_parser.parse_query("\"I Don't Want to be Me\"")),
1108            "Ok(PhraseQuery { field: Field(0), phrase_terms: [(0, Term(field=0, type=Str, \"i\")), (1, Term(field=0, type=Str, \"don\")), (2, Term(field=0, type=Str, \"t\")), (3, Term(field=0, type=Str, \"want\")), (4, Term(field=0, type=Str, \"to\")), (5, Term(field=0, type=Str, \"be\")), (6, Term(field=0, type=Str, \"me\"))], slop: 0 })"
1109        );
1110    }
1111
1112    #[test]
1113    pub fn test_parser_boostings() {
1114        let query_parser = create_query_parser();
1115        let query = query_parser.parse_query("search^2.0");
1116        assert_eq!(
1117            format!("{:?}", query),
1118            "Ok(Boost(query=TermQuery(Term(field=0, type=Str, \"search\")), boost=2))"
1119        );
1120        let query = query_parser.parse_query("'search engine'~3^2.0");
1121        assert_eq!(
1122            format!("{:?}", query),
1123            "Ok(Boost(query=PhraseQuery { field: Field(0), phrase_terms: [(0, Term(field=0, type=Str, \"search\")), (1, Term(field=0, type=Str, \"engine\"))], slop: 3 }, boost=2))"
1124        );
1125        let query = query_parser.parse_query("search engine^2.0");
1126        assert_eq!(
1127            format!("{:?}", query),
1128            "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"search\"))), (Should, Boost(query=TermQuery(Term(field=0, type=Str, \"engine\")), boost=2))] })"
1129        );
1130        let query = query_parser.parse_query("body:title^2.0");
1131        assert_eq!(
1132            format!("{:?}", query),
1133            "Ok(Boost(query=TermQuery(Term(field=1, type=Str, \"title\")), boost=2))"
1134        );
1135        let query = query_parser.parse_query("body:'title'^2.0");
1136        assert_eq!(
1137            format!("{:?}", query),
1138            "Ok(Boost(query=TermQuery(Term(field=1, type=Str, \"title\")), boost=2))"
1139        );
1140    }
1141
1142    #[test]
1143    pub fn test_range_queries() {
1144        let query_parser = create_query_parser();
1145        let query = query_parser.parse_query("body:[aaa TO ccc]");
1146        assert_eq!(
1147            format!("{:?}", query),
1148            "Ok(RangeQuery { field: \"body\", value_type: Str, lower_bound: Included([97, 97, 97]), upper_bound: Included([99, 99, 99]), limit: None })"
1149        );
1150        let query = query_parser.parse_query("body:[ a to  * ]");
1151        assert_eq!(
1152            format!("{:?}", query),
1153            "Ok(RangeQuery { field: \"body\", value_type: Str, lower_bound: Included([97]), upper_bound: Unbounded, limit: None })"
1154        );
1155        let query = query_parser.parse_query("timestamp:[ 1000 to 2000 ]");
1156        assert_eq!(format!("{:?}", query), "Ok(RangeQuery { field: \"timestamp\", value_type: I64, lower_bound: Included([128, 0, 0, 0, 0, 0, 3, 232]), upper_bound: Included([128, 0, 0, 0, 0, 0, 7, 208]), limit: None })");
1157        let query = query_parser.parse_query("timestamp:(-[1100 to 1200] [ 1000 to 2000 ] -1500 +3000)");
1158        assert_eq!(format!("{:?}", query), "Ok(BooleanQuery { subqueries: [(MustNot, RangeQuery { field: \"timestamp\", value_type: I64, lower_bound: Included([128, 0, 0, 0, 0, 0, 4, 76]), upper_bound: Included([128, 0, 0, 0, 0, 0, 4, 176]), limit: None }), (Should, RangeQuery { field: \"timestamp\", value_type: I64, lower_bound: Included([128, 0, 0, 0, 0, 0, 3, 232]), upper_bound: Included([128, 0, 0, 0, 0, 0, 7, 208]), limit: None }), (MustNot, TermQuery(Term(field=2, type=I64, 1500))), (Must, TermQuery(Term(field=2, type=I64, 3000)))] })");
1159    }
1160
1161    #[test]
1162    pub fn test_exact_phrase_promoter() {
1163        let mut query_parser = create_query_parser();
1164        query_parser.query_parser_config.0.exact_matches_promoter = Some(proto::ExactMatchesPromoter {
1165            slop: 3,
1166            boost: Some(2.0),
1167            fields: vec![],
1168        });
1169        let query = query_parser.parse_query("old school holy-wood");
1170        assert_eq!(format!("{:?}", query), "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"old\"))), (Should, TermQuery(Term(field=0, type=Str, \"school\"))), (Should, TermQuery(Term(field=0, type=Str, \"holy\"))), (Should, TermQuery(Term(field=0, type=Str, \"wood\"))), (Should, Boost(query=PhraseQuery { field: Field(0), phrase_terms: [(0, Term(field=0, type=Str, \"old\")), (1, Term(field=0, type=Str, \"school\")), (2, Term(field=0, type=Str, \"holy\")), (3, Term(field=0, type=Str, \"wood\"))], slop: 3 }, boost=2))], minimum_number_should_match: 1 })");
1171        let query = query_parser.parse_query("old^2.0 school");
1172        assert_eq!(format!("{:?}", query), "Ok(BooleanQuery { subqueries: [(Should, Boost(query=TermQuery(Term(field=0, type=Str, \"old\")), boost=2)), (Should, TermQuery(Term(field=0, type=Str, \"school\")))], minimum_number_should_match: 1 })");
1173        query_parser.query_parser_config.0.field_boosts = HashMap::from_iter(vec![("title".to_string(), 3.0)]);
1174        let query = query_parser.parse_query("old school");
1175        assert_eq!(format!("{:?}", query), "Ok(BooleanQuery { subqueries: [(Should, Boost(query=TermQuery(Term(field=0, type=Str, \"old\")), boost=3)), (Should, Boost(query=TermQuery(Term(field=0, type=Str, \"school\")), boost=3)), (Should, Boost(query=PhraseQuery { field: Field(0), phrase_terms: [(0, Term(field=0, type=Str, \"old\")), (1, Term(field=0, type=Str, \"school\"))], slop: 3 }, boost=6))], minimum_number_should_match: 1 })");
1176    }
1177
1178    #[test]
1179    pub fn test_inflection() {
1180        let mut query_parser = create_query_parser();
1181        let mut morphology_configs = HashMap::new();
1182        morphology_configs.insert(
1183            "title".to_string(),
1184            proto::MorphologyConfig {
1185                derive_tenses_coefficient: Some(0.3),
1186            },
1187        );
1188        query_parser.query_parser_config.0.morphology_configs = morphology_configs;
1189        query_parser.query_parser_config.0.query_language = Some("en".to_string());
1190        let query = query_parser.parse_query("red1 search engine going");
1191        assert_eq!(format!("{:?}", query), "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"red1\"))), (Should, DisjunctionMaxQuery { disjuncts: [TermQuery(Term(field=0, type=Str, \"search\")), TermQuery(Term(field=0, type=Str, \"searches\"))], tie_breaker: 0.3 }), (Should, DisjunctionMaxQuery { disjuncts: [TermQuery(Term(field=0, type=Str, \"engine\")), TermQuery(Term(field=0, type=Str, \"engines\"))], tie_breaker: 0.3 }), (Should, TermQuery(Term(field=0, type=Str, \"going\")))] })");
1192        let query = query_parser.parse_query("iso 34-1:2022");
1193        assert_eq!(format!("{:?}", query), "Ok(BooleanQuery { subqueries: [(Should, DisjunctionMaxQuery { disjuncts: [TermQuery(Term(field=0, type=Str, \"iso\")), TermQuery(Term(field=0, type=Str, \"isos\"))], tie_breaker: 0.3 }), (Should, TermQuery(Term(field=0, type=Str, \"34\"))), (Should, TermQuery(Term(field=0, type=Str, \"1\")))] })");
1194    }
1195
1196    #[test]
1197    pub fn test_root_grouping() {
1198        let query_parser = create_query_parser();
1199        let query = query_parser.parse_query("(test1 test2) -(test3) +(test4 test5)");
1200        assert_eq!(format!("{:?}", query), "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"test1\"))), (Should, TermQuery(Term(field=0, type=Str, \"test2\"))), (MustNot, TermQuery(Term(field=0, type=Str, \"test3\"))), (Must, BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"test4\"))), (Should, TermQuery(Term(field=0, type=Str, \"test5\")))] })] })");
1201    }
1202}