1use std::collections::Bound;
2use std::ops::Bound::{Included, Unbounded};
3use std::ops::Deref;
4use std::str::FromStr;
5
6use base64::engine::general_purpose::STANDARD as BASE64;
7use base64::Engine;
8use pest::iterators::{Pair, Pairs};
9use pest::Parser;
10use pest_derive::Parser;
11use tantivy::query::{BooleanQuery, BoostQuery, DisjunctionMaxQuery, EmptyQuery, PhraseQuery, Query, QueryClone, RangeQuery, RegexQuery, TermQuery};
12use tantivy::schema::{Facet, FacetParseError, Field, FieldEntry, FieldType, IndexRecordOption, Schema, TextFieldIndexing, Type};
13use tantivy::tokenizer::{TextAnalyzer, TokenizerManager};
14use tantivy::{Index, Term};
15use tantivy_query_grammar::Occur;
16
17use crate::components::queries::ExistsQuery;
18use crate::components::query_parser::morphology::MorphologyManager;
19use crate::components::query_parser::proto_query_parser::QueryParserDefaultMode;
20use crate::components::query_parser::term_field_mappers::TermFieldMappersManager;
21use crate::components::query_parser::utils::cast_field_to_term;
22use crate::configs::core::QueryParserConfig;
23use crate::errors::SummaResult;
24use crate::utils::transpose;
25use crate::validators;
26
27#[derive(Parser)]
28#[grammar = "src/components/query_parser/summa_ql.pest"] struct SummaQlParser;
30
31pub struct QueryParser {
32 schema: Schema,
33 tokenizer_manager: TokenizerManager,
34 morphology_manager: MorphologyManager,
35 term_field_mappers_manager: TermFieldMappersManager,
36 query_parser_config: QueryParserConfig,
37}
38
39#[derive(thiserror::Error, Debug, PartialEq)]
41pub enum QueryParserError {
42 #[error("syntax_error: {0}")]
44 Syntax(String),
45 #[error("unsupported_query_error: {0}")]
47 UnsupportedQuery(String),
48 #[error("field_doest_not_exist_error: '{0}'")]
50 FieldDoesNotExist(String),
51 #[error("expected_int_error: '{0:?}'")]
54 ExpectedInt(#[from] std::num::ParseIntError),
55 #[error("expected_base64_error: '{0:?}'")]
58 ExpectedBase64(#[from] base64::DecodeError),
59 #[error("expected_float_error: {0}")]
62 ExpectedFloat(#[from] std::num::ParseFloatError),
63 #[error("exptected_bool: '{0:?}'")]
66 ExpectedBool(#[from] std::str::ParseBoolError),
67 #[error("all_but_query_forbidden_error")]
69 AllButQueryForbidden,
70 #[error("No default field declared and no field specified in query")]
73 NoDefaultFieldDeclared,
74 #[error("field_not_indexed_error: {0}")]
77 FieldNotIndexed(String),
78 #[error("json_field_without_path_error: {0}")]
80 JsonFieldWithoutPath(String),
81 #[error("non_json_field_with_path_error: {0}")]
83 NonJsonFieldWithPath(String),
84 #[error("field_does_not_have_positions_indexed_error: {0}")]
87 FieldDoesNotHavePositionsIndexed(String),
88 #[error("unknown_tokenizer_error: '{tokenizer:?}' for the field '{field:?}'")]
91 UnknownTokenizer {
92 tokenizer: String,
94 field: String,
96 },
97 #[error("range_must_not_have_phrase")]
100 RangeMustNotHavePhrase,
101 #[error("date_format_error: {0}")]
103 DateFormat(#[from] time::error::Parse),
104 #[error("facet_parse_error: {0}")]
106 FacetFormat(#[from] FacetParseError),
107 #[error("ip_format_error: {0}")]
109 IpFormat(#[from] std::net::AddrParseError),
110 #[error("pest_error: {0}")]
112 Pest(#[from] Box<pest::error::Error<Rule>>),
113}
114
115pub(crate) fn is_type_valid_for_fastfield_range_query(typ: Type) -> bool {
116 match typ {
117 Type::U64 | Type::I64 | Type::F64 | Type::Bool | Type::Date => true,
118 Type::IpAddr => true,
119 Type::Str | Type::Facet | Type::Bytes | Type::Json => false,
120 }
121}
122
123fn boost_query(query: Box<dyn Query>, boost: Option<f32>) -> Box<dyn Query> {
124 if let Some(boost) = boost {
125 return Box::new(BoostQuery::new(query, boost)) as Box<dyn Query>;
126 }
127 query
128}
129
130fn multiply_boosts(a: Option<f32>, b: Option<f32>) -> Option<f32> {
131 match (a, b) {
132 (Some(a), Some(b)) => Some(a * b),
133 (Some(a), None) => Some(a),
134 (None, Some(b)) => Some(b),
135 (None, None) => None,
136 }
137}
138
139fn reduce_should_clause(query: Box<dyn Query>) -> Box<dyn Query> {
140 if let Some(boolean_query) = query.deref().as_any().downcast_ref::<BooleanQuery>() {
141 let mut subqueries = vec![];
142 for (occur, nested_query) in boolean_query.clauses() {
143 let nested_query = reduce_should_clause(nested_query.box_clone());
144 match occur {
145 Occur::Must | Occur::MustNot => subqueries.push((*occur, nested_query)),
146 Occur::Should => {
147 if let Some(nested_boolean_query) = nested_query.deref().as_any().downcast_ref::<BooleanQuery>() {
148 subqueries.extend(nested_boolean_query.clauses().iter().map(|(o, q)| (*o, reduce_should_clause(q.box_clone()))))
149 } else {
150 subqueries.push((*occur, nested_query))
151 }
152 }
153 }
154 }
155 if subqueries.len() == 1 && subqueries[0].0 == Occur::Should {
156 return subqueries.into_iter().next().expect("impossible").1;
157 }
158 return Box::new(BooleanQuery::new(subqueries)) as Box<dyn Query>;
159 }
160 query
161}
162
163fn reduce_empty_queries(query: Box<dyn Query>) -> Box<dyn Query> {
164 if let Some(boolean_query) = query.deref().as_any().downcast_ref::<BooleanQuery>() {
165 let subqueries: Vec<_> = boolean_query
166 .clauses()
167 .iter()
168 .filter_map(|(occur, nested_query)| {
169 if nested_query.deref().as_any().downcast_ref::<EmptyQuery>().is_some() {
170 None
171 } else {
172 Some((*occur, reduce_empty_queries(nested_query.box_clone())))
173 }
174 })
175 .collect();
176 if subqueries.is_empty() {
177 return Box::new(EmptyQuery {}) as Box<dyn Query>;
178 }
179 return Box::new(BooleanQuery::new(subqueries)) as Box<dyn Query>;
180 }
181 query
182}
183
184type Subqueries = Vec<(Occur, Box<dyn Query>)>;
185
186impl QueryParser {
187 pub fn new(
188 schema: Schema,
189 query_parser_config: QueryParserConfig,
190 morphology_manager: &MorphologyManager,
191 tokenizer_manager: &TokenizerManager,
192 ) -> SummaResult<QueryParser> {
193 validators::parse_fields(&schema, &query_parser_config.0.default_fields, &[])?;
194 Ok(QueryParser {
195 term_field_mappers_manager: TermFieldMappersManager::new(&schema, tokenizer_manager),
196 morphology_manager: morphology_manager.clone(),
197 tokenizer_manager: tokenizer_manager.clone(),
198 query_parser_config,
199 schema,
200 })
201 }
202
203 pub fn for_index(index: &Index, query_parser_config: QueryParserConfig, morphology_manager: &MorphologyManager) -> SummaResult<QueryParser> {
204 QueryParser::new(index.schema(), query_parser_config, morphology_manager, index.tokenizers())
205 }
206
207 pub fn resolve_field_name<'a>(&'a self, field_name: &'a str) -> &str {
208 self.query_parser_config
209 .0
210 .field_aliases
211 .get(field_name)
212 .map(|s| s.as_str())
213 .unwrap_or(field_name)
214 }
215
216 fn get_text_analyzer(&self, field_entry: &FieldEntry, option: &TextFieldIndexing) -> Result<TextAnalyzer, QueryParserError> {
217 self.tokenizer_manager
218 .get(option.tokenizer())
219 .ok_or_else(|| QueryParserError::UnknownTokenizer {
220 field: field_entry.name().to_string(),
221 tokenizer: option.tokenizer().to_string(),
222 })
223 }
224
225 fn default_field_queries(&self, term: Pair<Rule>, boost: Option<f32>) -> Result<Box<dyn Query>, QueryParserError> {
226 let (occur, term) = match term.as_rule() {
227 Rule::field_name => (Occur::Should, term),
228 _ => {
229 let term = term.into_inner().next().expect("grammar failure");
230 let occur = self.parse_occur(&term);
231 let pre_term = term.into_inner().next().expect("grammar failure");
232 (occur, pre_term.clone())
233 }
234 };
235
236 let default_field_queries = self
237 .query_parser_config
238 .0
239 .default_fields
240 .iter()
241 .map(|field| {
242 let (field, full_path) = self.schema.find_field(field).expect("inconsistent state");
243 self.parse_pre_term(&field, full_path, term.clone(), boost, true)
244 })
245 .collect::<Result<Vec<_>, _>>()?;
246
247 Ok(match occur {
248 Occur::Should => {
249 let default_field_queries = default_field_queries.into_iter().flatten();
250 match QueryParserDefaultMode::from(self.query_parser_config.0.default_mode.clone()) {
251 QueryParserDefaultMode::Boolean => Box::new(BooleanQuery::new(default_field_queries.map(|q| (occur, q)).collect())) as Box<dyn Query>,
252 QueryParserDefaultMode::DisjuctionMax { tie_breaker } => {
253 Box::new(DisjunctionMaxQuery::with_tie_breaker(default_field_queries.collect(), tie_breaker)) as Box<dyn Query>
254 }
255 }
256 }
257 Occur::MustNot => Box::new(BooleanQuery::new(default_field_queries.into_iter().flatten().map(|q| (occur, q)).collect())) as Box<dyn Query>,
258 Occur::Must => {
259 if self.query_parser_config.0.default_fields.len() == 1 {
260 Box::new(BooleanQuery::new(
261 default_field_queries.into_iter().flatten().map(|q| (Occur::Must, q)).collect(),
262 )) as Box<dyn Query>
263 } else {
264 let transposed_default_field_queries = transpose(default_field_queries);
265 Box::new(BooleanQuery::new(
266 transposed_default_field_queries
267 .into_iter()
268 .map(|queries| {
269 (
270 Occur::Must,
271 Box::new(BooleanQuery::new(queries.into_iter().map(|q| (Occur::Should, q)).collect())) as Box<dyn Query>,
272 )
273 })
274 .collect(),
275 )) as Box<dyn Query>
276 }
277 }
278 })
279 }
280
281 fn parse_range(&self, pre_term: Pair<Rule>, field: &Field) -> Result<RangeQuery, QueryParserError> {
282 let mut range_pairs = pre_term.into_inner();
283 let field_entry = self.schema.get_field_entry(*field);
284 if !field_entry.field_type().is_indexed() && !field_entry.field_type().is_fast() {
285 return Err(QueryParserError::FieldNotIndexed(field_entry.name().to_string()));
286 }
287 let left = self.parse_boundary_word(*field, range_pairs.next().expect("grammar failure"))?;
288 let right = self.parse_boundary_word(*field, range_pairs.next().expect("grammar failure"))?;
289
290 Ok(RangeQuery::new(left, right))
291 }
292
293 pub fn parse_words(&self, field: Field, full_path: &str, option: &TextFieldIndexing, words: &str) -> Result<Vec<(usize, Term)>, QueryParserError> {
294 let field_entry = self.schema.get_field_entry(field);
295 let mut text_analyzer = self.get_text_analyzer(field_entry, option)?;
296 let mut token_stream = text_analyzer.token_stream(words);
297 let mut terms = Vec::new();
298 token_stream.process(&mut |token| {
299 let term = cast_field_to_term(&field, full_path, field_entry.field_type(), &token.text, true);
300 terms.push((token.position, term));
301 });
302 Ok(terms)
303 }
304
305 fn parse_pre_term(
306 &self,
307 field: &Field,
308 full_path: &str,
309 pre_term: Pair<Rule>,
310 boost: Option<f32>,
311 ignore_phrase_for_non_position_field: bool,
312 ) -> Result<Vec<Box<dyn Query>>, QueryParserError> {
313 let field_entry = self.schema.get_field_entry(*field);
314 let field_type = field_entry.field_type();
315
316 if field_type.value_type() == Type::Json && full_path.is_empty() {
317 return Err(QueryParserError::JsonFieldWithoutPath(field_entry.name().to_string()));
318 }
319
320 if field_type.value_type() != Type::Json && !full_path.is_empty() {
321 return Err(QueryParserError::NonJsonFieldWithPath(format!("{}.{}", field_entry.name(), full_path)));
322 }
323
324 if !(field_type.is_indexed() || matches!(pre_term.as_rule(), Rule::range) && field_type.is_fast()) {
325 return Err(QueryParserError::FieldNotIndexed(field_entry.name().to_string()));
326 }
327
328 let boost = multiply_boosts(self.query_parser_config.0.field_boosts.get(field_entry.name()).copied(), boost);
329
330 if matches!(pre_term.as_rule(), Rule::range) {
331 return Ok(vec![boost_query(Box::new(self.parse_range(pre_term, field)?) as Box<dyn Query>, boost)]);
332 }
333
334 return match *field_type {
335 FieldType::Bytes(_) => match pre_term.as_rule() {
336 Rule::range => Ok(vec![Box::new(self.parse_range(pre_term, field)?) as Box<dyn Query>]),
337 Rule::phrase | Rule::word => {
338 let val = &BASE64.decode(pre_term.as_str())?;
339 let query = Box::new(TermQuery::new(Term::from_field_bytes(*field, val), IndexRecordOption::Basic)) as Box<dyn Query>;
340 Ok(vec![boost_query(query, boost)])
341 }
342 _ => unreachable!(),
343 },
344 FieldType::U64(_) => match pre_term.as_rule() {
345 Rule::range => Ok(vec![Box::new(self.parse_range(pre_term, field)?) as Box<dyn Query>]),
346 Rule::phrase | Rule::word => {
347 let val: u64 = u64::from_str(pre_term.as_str())?;
348 let query = Box::new(TermQuery::new(Term::from_field_u64(*field, val), IndexRecordOption::WithFreqs)) as Box<dyn Query>;
349 Ok(vec![boost_query(query, boost)])
350 }
351 _ => unreachable!(),
352 },
353 FieldType::I64(_) => match pre_term.as_rule() {
354 Rule::range => Ok(vec![Box::new(self.parse_range(pre_term, field)?) as Box<dyn Query>]),
355 Rule::phrase | Rule::word => {
356 let val: i64 = i64::from_str(pre_term.as_str())?;
357 let query = Box::new(TermQuery::new(Term::from_field_i64(*field, val), IndexRecordOption::WithFreqs)) as Box<dyn Query>;
358 Ok(vec![boost_query(query, boost)])
359 }
360 _ => unreachable!(),
361 },
362 FieldType::Facet(_) => match pre_term.as_rule() {
363 Rule::phrase | Rule::word => {
364 let val = Facet::from_text(pre_term.as_str())?;
365 let query = Box::new(TermQuery::new(Term::from_facet(*field, &val), IndexRecordOption::Basic)) as Box<dyn Query>;
366 Ok(vec![boost_query(query, boost)])
367 }
368 _ => unreachable!(),
369 },
370 FieldType::F64(_) => match pre_term.as_rule() {
371 Rule::range => Ok(vec![Box::new(self.parse_range(pre_term, field)?) as Box<dyn Query>]),
372 Rule::phrase | Rule::word => {
373 if !field_type.is_indexed() {
374 return Err(QueryParserError::FieldNotIndexed(field_entry.name().to_string()));
375 }
376 let val: f64 = f64::from_str(pre_term.as_str())?;
377 let query = Box::new(TermQuery::new(Term::from_field_f64(*field, val), IndexRecordOption::WithFreqs)) as Box<dyn Query>;
378 Ok(vec![boost_query(query, boost)])
379 }
380 _ => unreachable!(),
381 },
382 FieldType::Bool(_) => match pre_term.as_rule() {
383 Rule::range => Ok(vec![Box::new(self.parse_range(pre_term, field)?) as Box<dyn Query>]),
384 Rule::phrase | Rule::word => {
385 let val: bool = bool::from_str(pre_term.as_str())?;
386 let query = Box::new(TermQuery::new(Term::from_field_bool(*field, val), IndexRecordOption::WithFreqs)) as Box<dyn Query>;
387 Ok(vec![boost_query(query, boost)])
388 }
389 _ => unreachable!(),
390 },
391 FieldType::Str(_) | FieldType::JsonObject(_) => {
392 let indexing = if let FieldType::Str(ref text_options) = field_type {
393 text_options.get_indexing_options().expect("unreachable")
394 } else if let FieldType::JsonObject(ref json_options) = field_type {
395 json_options.get_text_indexing_options().expect("unreachable")
396 } else {
397 unreachable!()
398 };
399
400 match pre_term.as_rule() {
401 Rule::word | Rule::field_name => {
402 let mut text_analyzer = self.get_text_analyzer(field_entry, indexing)?;
403 let mut token_stream = text_analyzer.token_stream(pre_term.as_str());
404 let mut queries = Vec::new();
405 token_stream.process(&mut |token| {
406 let morphology_config = self
407 .query_parser_config
408 .0
409 .morphology_configs
410 .get(field_entry.name())
411 .cloned()
412 .unwrap_or_default();
413 let query = if let Some(morphology) = self.morphology_manager.get(self.query_parser_config.0.query_language()) {
414 if pre_term.as_str().len() < 24 {
416 morphology.derive_query(morphology_config, field, full_path, field_type, &token.text)
417 } else {
418 let term = cast_field_to_term(field, full_path, field_type, &token.text, false);
419 Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs)) as Box<dyn Query>
420 }
421 } else {
422 let term = cast_field_to_term(field, full_path, field_type, &token.text, false);
423 Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs)) as Box<dyn Query>
424 };
425 queries.push(boost_query(query, boost))
426 });
427 Ok(queries)
428 }
429 Rule::phrase => {
430 let mut phrase_pairs = pre_term.into_inner();
431 let words = match phrase_pairs.next() {
432 None => return Ok(vec![]),
433 Some(words) => words,
434 };
435
436 let slop = phrase_pairs
437 .next()
438 .map(|v| match v.as_str() {
439 "" => 0,
440 _ => u32::from_str(v.as_str()).expect("cannot parse"),
441 })
442 .unwrap_or(0);
443 let terms = self.parse_words(*field, full_path, indexing, words.as_str())?;
444 if terms.len() <= 1 {
445 return Ok(terms
446 .into_iter()
447 .map(|(_, term)| {
448 let query = Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs)) as Box<dyn Query>;
449 boost_query(query, boost)
450 })
451 .collect());
452 }
453 return if indexing.index_option().has_positions() {
454 let query = Box::new(PhraseQuery::new_with_offset_and_slop(terms, slop)) as Box<dyn Query>;
455 Ok(vec![boost_query(query, boost)])
456 } else if ignore_phrase_for_non_position_field {
457 Ok(vec![])
458 } else {
459 Err(QueryParserError::FieldDoesNotHavePositionsIndexed(field_entry.name().to_string()))
460 };
461 }
462 Rule::range => Ok(vec![Box::new(self.parse_range(pre_term, field)?) as Box<dyn Query>]),
463 Rule::regex => {
464 let query = Box::new(
465 RegexQuery::from_pattern(pre_term.clone().into_inner().next().expect("grammar failure").as_str(), *field)
466 .map_err(|_| QueryParserError::Syntax(pre_term.as_str().to_string()))?,
467 ) as Box<dyn Query>;
468 return Ok(vec![boost_query(query, boost)]);
469 }
470 _ => unreachable!(),
471 }
472 }
473 _ => unreachable!(),
474 };
475 }
476
477 fn parse_occur(&self, occur: &Pair<Rule>) -> Occur {
478 match occur.as_rule() {
479 Rule::positive_term | Rule::positive_grouping => Occur::Must,
480 Rule::negative_term | Rule::negative_grouping => Occur::MustNot,
481 Rule::default_term | Rule::default_grouping => Occur::Should,
482 _ => unreachable!(),
483 }
484 }
485
486 fn parse_term(&self, term: Pair<Rule>, field: &Field, full_path: &str, boost: Option<f32>) -> Result<Box<dyn Query>, QueryParserError> {
487 let term = term.into_inner().next().expect("grammar failure");
488 let occur = self.parse_occur(&term);
489 let pre_term = term.into_inner().next().expect("grammar failure");
490 Ok(Box::new(BooleanQuery::new(
491 self.parse_pre_term(field, full_path, pre_term, boost, false)?
492 .into_iter()
493 .map(|q| (occur, q))
494 .collect(),
495 )))
496 }
497
498 fn compute_boundary_term(&self, field: Field, phrase: &str) -> Result<Term, QueryParserError> {
499 let field_entry = self.schema.get_field_entry(field);
500 let field_type = field_entry.field_type();
501 let field_supports_ff_range_queries = field_type.is_fast() && is_type_valid_for_fastfield_range_query(field_type.value_type());
502
503 if !field_type.is_indexed() && !field_supports_ff_range_queries {
504 return Err(QueryParserError::FieldNotIndexed(field_entry.name().to_string()));
505 }
506
507 match *field_type {
508 FieldType::U64(_) => {
509 let val: u64 = u64::from_str(phrase)?;
510 Ok(Term::from_field_u64(field, val))
511 }
512 FieldType::I64(_) => {
513 let val: i64 = i64::from_str(phrase)?;
514 Ok(Term::from_field_i64(field, val))
515 }
516 FieldType::F64(_) => {
517 let val: f64 = f64::from_str(phrase)?;
518 Ok(Term::from_field_f64(field, val))
519 }
520 FieldType::Bool(_) => {
521 let val: bool = bool::from_str(phrase)?;
522 Ok(Term::from_field_bool(field, val))
523 }
524 FieldType::Bytes(_) => {
525 let val = &BASE64.decode(phrase)?;
526 Ok(Term::from_field_bytes(field, val))
527 }
528 FieldType::Str(ref str_options) => {
529 let option = str_options.get_indexing_options().ok_or_else(|| {
530 QueryParserError::FieldNotIndexed(field_entry.name().to_string())
532 })?;
533 let mut terms: Vec<Term> = Vec::new();
534 let mut text_analyzer = self
535 .tokenizer_manager
536 .get(option.tokenizer())
537 .ok_or_else(|| QueryParserError::UnknownTokenizer {
538 field: field_entry.name().to_string(),
539 tokenizer: option.tokenizer().to_string(),
540 })?;
541 let mut token_stream = text_analyzer.token_stream(phrase);
542 token_stream.process(&mut |token| {
543 let term = Term::from_field_text(field, &token.text);
544 terms.push(term);
545 });
546 if terms.len() != 1 {
547 return Err(QueryParserError::UnsupportedQuery(format!(
548 "Range query boundary cannot have multiple tokens: {phrase:?}."
549 )));
550 }
551 Ok(terms.into_iter().next().expect("grammar failure"))
552 }
553 _ => unreachable!(),
554 }
555 }
556
557 fn parse_boundary_word(&self, field: Field, boundary_word: Pair<Rule>) -> Result<Bound<Term>, QueryParserError> {
558 Ok(match boundary_word.as_rule() {
559 Rule::star => Unbounded,
560 Rule::signed_word => Included(self.compute_boundary_term(field, boundary_word.as_str())?),
561 _ => unreachable!(),
562 })
563 }
564
565 fn extract_top_level_phrase(&self, pairs: Pairs<Rule>) -> Option<String> {
566 let mut terms = vec![];
567 for pair in pairs {
568 let mut statement_pairs = pair.into_inner();
569 let search_group_or_term = statement_pairs.next().expect("grammar failure");
570 let boost = statement_pairs.next().map(|boost| f32::from_str(boost.as_str()).expect("grammar failure"));
571 match (search_group_or_term.as_rule(), boost) {
572 (Rule::term, None) => {
573 let term = search_group_or_term.into_inner().next().expect("grammar_failure");
574 let occur = self.parse_occur(&term);
575 let pre_term = term.into_inner().next().expect("grammar failure");
576 if occur == Occur::Should && matches!(pre_term.as_rule(), Rule::word) {
577 terms.push(pre_term.as_str())
578 }
579 }
580 _ => return None,
581 }
582 }
583 (!terms.is_empty()).then(|| terms.join(" "))
584 }
585
586 fn parse_statement(&self, pair: Pair<Rule>) -> Result<Box<dyn Query>, QueryParserError> {
587 let mut statement_pairs = pair.into_inner();
588 let isbn_doi_or_search_group_or_grouping_or_term = statement_pairs.next().expect("grammar failure");
589 let statement_boost = statement_pairs.next().map(|boost| f32::from_str(boost.as_str()).expect("grammar failure"));
590 let statement_result = match isbn_doi_or_search_group_or_grouping_or_term.as_rule() {
591 Rule::search_group => {
592 let mut search_group = isbn_doi_or_search_group_or_grouping_or_term.into_inner();
593 let field_name = search_group.next().expect("grammar failure");
594 let grouping_or_term = search_group.next().expect("grammar failure");
595 match grouping_or_term.as_rule() {
596 Rule::grouping => {
597 let grouping = grouping_or_term.into_inner().next().expect("grammar failure");
598 let occur = self.parse_occur(&grouping);
599 let mut intermediate_results = vec![];
600 let resolved_field_name = self.resolve_field_name(field_name.as_str());
601 match self.schema.find_field(resolved_field_name) {
602 Some((field, full_path)) => {
603 for term in grouping.into_inner() {
604 intermediate_results.push(self.parse_term(term, &field, full_path, statement_boost)?);
605 }
606 }
607 None => {
608 if self.query_parser_config.0.excluded_fields.iter().any(|x| x == field_name.as_str()) {
609 return Ok(Box::new(EmptyQuery {}));
610 }
611 intermediate_results.push(self.default_field_queries(field_name, statement_boost)?);
612 for term in grouping.into_inner() {
613 intermediate_results.push(self.default_field_queries(term, statement_boost)?)
614 }
615 }
616 }
617 let group_query = Box::new(BooleanQuery::new(intermediate_results.into_iter().map(|q| (Occur::Should, q)).collect())) as Box<dyn Query>;
618 match occur {
619 Occur::Should => Ok(group_query),
620 Occur::Must => Ok(Box::new(BooleanQuery::new(vec![(Occur::Must, group_query)])) as Box<dyn Query>),
621 Occur::MustNot => Ok(Box::new(BooleanQuery::new(vec![(Occur::MustNot, group_query)])) as Box<dyn Query>),
622 }
623 }
624 Rule::term => {
625 let resolved_field_name = self.resolve_field_name(field_name.as_str());
626 match self.schema.find_field(resolved_field_name) {
627 Some((field, full_path)) => self.parse_term(grouping_or_term, &field, full_path, statement_boost),
628 None => {
629 if self
630 .query_parser_config
631 .0
632 .excluded_fields
633 .iter()
634 .any(|x| x == field_name.as_str() || Some(x.as_str()) == field_name.as_str().split('.').next())
635 {
636 Ok(Box::new(EmptyQuery {}) as Box<dyn Query>)
637 } else {
638 Ok(Box::new(BooleanQuery::new(vec![
639 (Occur::Should, self.default_field_queries(field_name, statement_boost)?),
640 (Occur::Should, self.default_field_queries(grouping_or_term, statement_boost)?),
641 ])) as Box<dyn Query>)
642 }
643 }
644 }
645 }
646 Rule::star => {
647 let resolved_field_name = self.resolve_field_name(field_name.as_str());
648 match self.schema.find_field(resolved_field_name) {
649 Some((field, full_path)) => Ok(Box::new(ExistsQuery::new(field, full_path)) as Box<dyn Query>),
650 None => {
651 if self
652 .query_parser_config
653 .0
654 .excluded_fields
655 .iter()
656 .any(|x| x == field_name.as_str() || Some(x.as_str()) == field_name.as_str().split('.').next())
657 {
658 Ok(Box::new(EmptyQuery {}) as Box<dyn Query>)
659 } else {
660 Ok(self.default_field_queries(field_name, statement_boost)?)
661 }
662 }
663 }
664 }
665 _ => unreachable!(),
666 }
667 }
668 Rule::doi => {
669 let mut queries = vec![];
670
671 for term_field_mapper_name in ["doi", "doi_isbn"] {
672 if let Some(term_field_mapper_config) = self.query_parser_config.0.term_field_mapper_configs.get(term_field_mapper_name) {
673 if let Some(term_field_mapper) = self.term_field_mappers_manager.get(term_field_mapper_name) {
674 if let Some(query) = term_field_mapper.map(isbn_doi_or_search_group_or_grouping_or_term.as_str(), &term_field_mapper_config.fields)
675 {
676 queries.push((Occur::Should, query));
677 }
678 }
679 }
680 }
681
682 Ok(Box::new(BooleanQuery::new(queries)) as Box<dyn Query>)
683 }
684 Rule::isbn => {
685 let mut queries = vec![];
686
687 for term_field_mapper_name in ["isbn"] {
688 if let Some(term_field_mapper_config) = self.query_parser_config.0.term_field_mapper_configs.get(term_field_mapper_name) {
689 if let Some(term_field_mapper) = self.term_field_mappers_manager.get(term_field_mapper_name) {
690 if let Some(query) = term_field_mapper.map(isbn_doi_or_search_group_or_grouping_or_term.as_str(), &term_field_mapper_config.fields)
691 {
692 queries.push((Occur::Should, query));
693 }
694 }
695 }
696 }
697
698 Ok(Box::new(BooleanQuery::new(queries)) as Box<dyn Query>)
699 }
700 Rule::term => self.default_field_queries(isbn_doi_or_search_group_or_grouping_or_term, statement_boost),
701 Rule::grouping => {
702 let grouping = isbn_doi_or_search_group_or_grouping_or_term.into_inner().next().expect("grammar failure");
703 let occur = self.parse_occur(&grouping);
704 let mut intermediate_results = vec![];
705 for term in grouping.into_inner() {
706 intermediate_results.push(self.default_field_queries(term, statement_boost)?)
707 }
708 let group_query = Box::new(BooleanQuery::new(intermediate_results.into_iter().map(|q| (Occur::Should, q)).collect())) as Box<dyn Query>;
709 match occur {
710 Occur::Should => Ok(group_query),
711 Occur::Must => Ok(Box::new(BooleanQuery::new(vec![(Occur::Must, group_query)])) as Box<dyn Query>),
712 Occur::MustNot => Ok(Box::new(BooleanQuery::new(vec![(Occur::MustNot, group_query)])) as Box<dyn Query>),
713 }
714 }
715 e => panic!("{e:?}"),
716 }?;
717 Ok(statement_result)
718 }
719
720 fn parse_statements(&self, pairs: Pairs<Rule>) -> Result<Box<dyn Query>, QueryParserError> {
721 let mut subqueries = Subqueries::new();
722
723 for pair in pairs.clone() {
724 let parsed_queries = self.parse_statement(pair)?;
725 subqueries.push((Occur::Should, parsed_queries));
726 }
727
728 if let Some(top_level_phrase) = self.extract_top_level_phrase(pairs) {
729 if let Some(exact_matches_promoter) = &self.query_parser_config.0.exact_matches_promoter {
730 let fields = if exact_matches_promoter.fields.is_empty() {
731 self.query_parser_config.0.default_fields.iter()
732 } else {
733 exact_matches_promoter.fields.iter()
734 };
735 subqueries.extend(
736 fields
737 .filter_map(|field| {
738 let (field, full_path) = self.schema.find_field(self.resolve_field_name(field)).expect("no field");
739 let field_entry = self.schema.get_field_entry(field);
740 let field_boost = self.query_parser_config.0.field_boosts.get(field_entry.name()).copied();
741 match field_entry.field_type() {
742 FieldType::Str(ref str_option) => {
743 let option = str_option.get_indexing_options()?;
744 let terms = match self.parse_words(field, full_path, option, &top_level_phrase) {
745 Ok(terms) => terms,
746 Err(err) => return Some(Err(err)),
747 };
748 (terms.len() > 1 && option.index_option().has_positions()).then(|| {
749 let query = Box::new(PhraseQuery::new_with_offset_and_slop(terms, exact_matches_promoter.slop)) as Box<dyn Query>;
750 Ok(boost_query(query, multiply_boosts(exact_matches_promoter.boost, field_boost)))
751 })
752 }
753 FieldType::JsonObject(ref json_option) => {
754 let option = json_option.get_text_indexing_options()?;
755 let terms = match self.parse_words(field, full_path, option, &top_level_phrase) {
756 Ok(terms) => terms,
757 Err(err) => return Some(Err(err)),
758 };
759 (terms.len() > 1 && option.index_option().has_positions()).then(|| {
760 let query = Box::new(PhraseQuery::new_with_offset_and_slop(terms, exact_matches_promoter.slop)) as Box<dyn Query>;
761 Ok(boost_query(query, multiply_boosts(exact_matches_promoter.boost, field_boost)))
762 })
763 }
764 _ => None,
765 }
766 })
767 .collect::<Result<Vec<_>, _>>()?
768 .into_iter()
769 .map(|q| (Occur::Should, q)),
770 )
771 }
772 }
773 Ok(Box::new(BooleanQuery::new(subqueries.into_iter().take(self.query_parser_config.term_limit()).collect())) as Box<dyn Query>)
774 }
775
776 pub fn parse_query(&self, query: &str) -> Result<Box<dyn Query>, QueryParserError> {
777 let pairs = SummaQlParser::parse(Rule::main, query).map_err(Box::new)?;
778 Ok(reduce_empty_queries(reduce_should_clause(self.parse_statements(pairs)?)))
779 }
780}
781
782#[cfg(test)]
783mod tests {
784 use std::collections::HashMap;
785
786 use summa_proto::proto;
787 use tantivy::schema::{TextOptions, INDEXED, STRING, TEXT};
788 use tantivy::tokenizer::{LowerCaser, RemoveLongFilter};
789
790 use super::*;
791 use crate::components::tokenizers::Tokenizer;
792
793 fn create_query_parser() -> QueryParser {
794 let tokenizer_manager = TokenizerManager::default();
795 let morphology_manager = MorphologyManager::default();
796 let mut schema_builder = Schema::builder();
797 schema_builder.add_text_field("title", TEXT);
798 schema_builder.add_text_field("body", TEXT);
799 schema_builder.add_i64_field("timestamp", INDEXED);
800 schema_builder.add_text_field("doi", STRING);
801 schema_builder.add_text_field("isbns", STRING);
802 schema_builder.add_json_field("metadata", TEXT);
803 let schema = schema_builder.build();
804 let query_parser_config = QueryParserConfig(proto::QueryParserConfig {
805 default_fields: vec!["title".to_string()],
806 ..Default::default()
807 });
808 QueryParser::new(schema, query_parser_config, &morphology_manager, &tokenizer_manager).expect("cannot create parser")
809 }
810
811 fn create_complex_query_parser() -> QueryParser {
812 let tokenizer_manager = TokenizerManager::default();
813 tokenizer_manager.register(
814 "summa",
815 TextAnalyzer::builder(Tokenizer).filter(RemoveLongFilter::limit(100)).filter(LowerCaser).build(),
816 );
817 let morphology_manager = MorphologyManager::default();
818 let mut schema_builder = Schema::builder();
819 let text_options = TextOptions::default().set_indexing_options(
820 TextFieldIndexing::default()
821 .set_tokenizer("summa")
822 .set_index_option(IndexRecordOption::WithFreqsAndPositions),
823 );
824 schema_builder.add_text_field("title", text_options);
825 schema_builder.add_text_field("body", TEXT);
826 schema_builder.add_text_field("authors", TEXT);
827 schema_builder.add_text_field("language", TEXT);
828 schema_builder.add_i64_field("timestamp", INDEXED);
829 schema_builder.add_text_field("doi", STRING);
830 schema_builder.add_text_field("isbns", STRING);
831 schema_builder.add_json_field("metadata", TEXT);
832 let schema = schema_builder.build();
833 let query_parser_config = QueryParserConfig(proto::QueryParserConfig {
834 default_fields: vec!["title".to_string(), "body".to_string()],
835 ..Default::default()
836 });
837 QueryParser::new(schema, query_parser_config, &morphology_manager, &tokenizer_manager).expect("cannot create parser")
838 }
839
840 #[test]
841 pub fn test_parser_base() {
842 let query_parser = create_complex_query_parser();
843 let query = query_parser.parse_query("search engine");
844 assert_eq!(format!("{:?}", query), "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"search\"))), (Should, TermQuery(Term(field=1, type=Str, \"search\"))), (Should, TermQuery(Term(field=0, type=Str, \"engine\"))), (Should, TermQuery(Term(field=1, type=Str, \"engine\")))] })");
845 let query = query_parser.parse_query("'search engine'");
846 assert_eq!(
847 format!("{:?}", query),
848 "Ok(BooleanQuery { subqueries: [(Should, PhraseQuery { field: Field(0), phrase_terms: [(0, Term(field=0, type=Str, \"search\")), (1, Term(field=0, type=Str, \"engine\"))], slop: 0 }), (Should, PhraseQuery { field: Field(1), phrase_terms: [(0, Term(field=1, type=Str, \"search\")), (1, Term(field=1, type=Str, \"engine\"))], slop: 0 })] })"
849 );
850 let query = query_parser.parse_query("+'I sette messaggeri'");
851 assert_eq!(
852 format!("{:?}", query),
853 "Ok(BooleanQuery { subqueries: [(Must, BooleanQuery { subqueries: [(Should, PhraseQuery { field: Field(0), phrase_terms: [(0, Term(field=0, type=Str, \"i\")), (1, Term(field=0, type=Str, \"sette\")), (2, Term(field=0, type=Str, \"messaggeri\"))], slop: 0 }), (Should, PhraseQuery { field: Field(1), phrase_terms: [(0, Term(field=1, type=Str, \"i\")), (1, Term(field=1, type=Str, \"sette\")), (2, Term(field=1, type=Str, \"messaggeri\"))], slop: 0 })] })] })"
854 );
855 }
856
857 #[test]
858 pub fn test_parser_slop() {
859 let query_parser = create_query_parser();
860 let query = query_parser.parse_query("body:'search engine'~4");
861 assert_eq!(
862 format!("{:?}", query),
863 "Ok(PhraseQuery { field: Field(1), phrase_terms: [(0, Term(field=1, type=Str, \"search\")), (1, Term(field=1, type=Str, \"engine\"))], slop: 4 })"
864 );
865 }
866
867 #[test]
868 pub fn test_parser_fields() {
869 let mut query_parser = create_query_parser();
870 query_parser.query_parser_config.0.term_field_mapper_configs.insert(
871 "doi".to_string(),
872 proto::TermFieldMapperConfig {
873 fields: vec!["doi".to_string()],
874 },
875 );
876 query_parser.query_parser_config.0.term_field_mapper_configs.insert(
877 "doi_isbn".to_string(),
878 proto::TermFieldMapperConfig {
879 fields: vec!["metadata.isbns".to_string()],
880 },
881 );
882 query_parser.query_parser_config.0.term_field_mapper_configs.insert(
883 "isbn".to_string(),
884 proto::TermFieldMapperConfig {
885 fields: vec!["metadata.isbns".to_string()],
886 },
887 );
888 assert_eq!(
889 format!("{:?}", query_parser.parse_query("body:'search engine'")),
890 "Ok(PhraseQuery { field: Field(1), phrase_terms: [(0, Term(field=1, type=Str, \"search\")), (1, Term(field=1, type=Str, \"engine\"))], slop: 0 })"
891 );
892 assert_eq!(
893 format!("{:?}", query_parser.parse_query("timestamp:10")),
894 "Ok(TermQuery(Term(field=2, type=I64, 10)))"
895 );
896 assert_eq!(
897 format!("{:?}", query_parser.parse_query("title:search engine")),
898 "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"search\"))), (Should, TermQuery(Term(field=0, type=Str, \"engine\")))] })"
899 );
900 assert_eq!(
901 format!("{:?}", query_parser.parse_query("not_field:search engine")),
902 "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"not\"))), (Should, TermQuery(Term(field=0, type=Str, \"field\"))), (Should, TermQuery(Term(field=0, type=Str, \"search\"))), (Should, TermQuery(Term(field=0, type=Str, \"engine\")))] })"
903 );
904 assert_eq!(
905 format!("{:?}", query_parser.parse_query("doi:10.0000/abcd.0123 ")),
906 "Ok(TermQuery(Term(field=3, type=Str, \"10.0000/abcd.0123\")))"
907 );
908 assert_eq!(
909 format!("{:?}", query_parser.parse_query("doi:https://doi.org/10.0000/abcd.0123")),
910 "Ok(TermQuery(Term(field=3, type=Str, \"https://doi.org/10.0000/abcd.0123\")))"
911 );
912 assert_eq!(
913 format!("{:?}", query_parser.parse_query("doi:doi.org/10.0000/abcd.0123")),
914 "Ok(TermQuery(Term(field=3, type=Str, \"doi.org/10.0000/abcd.0123\")))"
915 );
916 assert_eq!(
917 format!("{:?}", query_parser.parse_query("10.0000/978123")),
918 "Ok(TermQuery(Term(field=3, type=Str, \"10.0000/978123\")))"
919 );
920 assert_eq!(format!("{:?}", query_parser.parse_query("10.0000/9781234567890")), "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=3, type=Str, \"10.0000/9781234567890\"))), (Should, TermQuery(Term(field=5, type=Json, path=isbns, type=Str, \"9781234567890\")))] })");
921 assert_eq!(format!("{:?}", query_parser.parse_query("10.0000/978-12345-6789-0")), "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=3, type=Str, \"10.0000/978-12345-6789-0\"))), (Should, TermQuery(Term(field=5, type=Json, path=isbns, type=Str, \"9781234567890\")))] })");
922 assert_eq!(format!("{:?}", query_parser.parse_query("10.0000/978-12345-6789-0.ch11")), "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=3, type=Str, \"10.0000/978-12345-6789-0.ch11\"))), (Should, TermQuery(Term(field=5, type=Json, path=isbns, type=Str, \"9781234567890\")))] })");
923 assert_eq!(format!("{:?}", query_parser.parse_query("10.0000/cbo978-12345-6789-0.ch11")), "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=3, type=Str, \"10.0000/cbo978-12345-6789-0.ch11\"))), (Should, TermQuery(Term(field=5, type=Json, path=isbns, type=Str, \"9781234567890\")))] })");
924 assert_eq!(
925 format!("{:?}", query_parser.parse_query("978-12345-6789-0")),
926 "Ok(TermQuery(Term(field=5, type=Json, path=isbns, type=Str, \"9781234567890\")))"
927 );
928 assert_eq!(
929 format!("{:?}", query_parser.parse_query("9781234567890")),
930 "Ok(TermQuery(Term(field=5, type=Json, path=isbns, type=Str, \"9781234567890\")))"
931 );
932 assert_eq!(format!("{:?}", query_parser.parse_query("97812-34-5678-909")), "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"97812\"))), (Should, TermQuery(Term(field=0, type=Str, \"34\"))), (Should, TermQuery(Term(field=0, type=Str, \"5678\"))), (Should, TermQuery(Term(field=0, type=Str, \"909\")))] })");
933 assert_eq!(
934 format!("{:?}", query_parser.parse_query("metadata.isbns:97812-34-5678-90")),
935 "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=5, type=Json, path=isbns, type=I64, 97812))), (Should, TermQuery(Term(field=5, type=Json, path=isbns, type=I64, 34))), (Should, TermQuery(Term(field=5, type=Json, path=isbns, type=I64, 5678))), (Should, TermQuery(Term(field=5, type=Json, path=isbns, type=I64, 90)))] })"
936 );
937 assert_eq!(format!("{:?}", query_parser.parse_query("123 97812-34-5678-909")), "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"123\"))), (Should, TermQuery(Term(field=0, type=Str, \"97812\"))), (Should, TermQuery(Term(field=0, type=Str, \"34\"))), (Should, TermQuery(Term(field=0, type=Str, \"5678\"))), (Should, TermQuery(Term(field=0, type=Str, \"909\")))] })");
938 assert_eq!(
939 format!("{:?}", query_parser.parse_query("10.0000/cbo123")),
940 "Ok(TermQuery(Term(field=3, type=Str, \"10.0000/cbo123\")))"
941 );
942 assert_eq!(
943 format!("{:?}", query_parser.parse_query("10.1515/12-23")),
944 "Ok(TermQuery(Term(field=3, type=Str, \"10.1515/12-23\")))"
945 );
946 assert_eq!(
947 format!("{:?}", query_parser.parse_query("doi.org/10.0000/abcd.0123")),
948 "Ok(TermQuery(Term(field=3, type=Str, \"10.0000/abcd.0123\")))"
949 );
950 assert_eq!(
951 format!("{:?}", query_parser.parse_query("10.0000/abcd.0123")),
952 "Ok(TermQuery(Term(field=3, type=Str, \"10.0000/abcd.0123\")))"
953 );
954 assert_eq!(
955 format!("{:?}", query_parser.parse_query("https://doi.org/10.0000/abcd.0123")),
956 "Ok(TermQuery(Term(field=3, type=Str, \"10.0000/abcd.0123\")))"
957 );
958 }
959
960 #[test]
961 pub fn test_free_queries() {
962 let query_parser = create_query_parser();
963 assert_eq!(
964 format!("{:?}", query_parser.parse_query("Search Engines: The Ultimate, Only Guide!")),
965 "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"search\"))), (Should, TermQuery(Term(field=0, type=Str, \"engines\"))), (Should, TermQuery(Term(field=0, type=Str, \"the\"))), (Should, TermQuery(Term(field=0, type=Str, \"ultimate\"))), (Should, TermQuery(Term(field=0, type=Str, \"only\"))), (Should, TermQuery(Term(field=0, type=Str, \"guide\")))] })"
966 );
967 assert_eq!(
968 format!("{:?}", query_parser.parse_query("!! HI !! (SEARCH! ENGINES!")),
969 "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"hi\"))), (Should, TermQuery(Term(field=0, type=Str, \"search\"))), (Should, TermQuery(Term(field=0, type=Str, \"engines\")))] })"
970 );
971 assert_eq!(
972 format!("{:?}", query_parser.parse_query("`non closed")),
973 "Ok(PhraseQuery { field: Field(0), phrase_terms: [(0, Term(field=0, type=Str, \"non\")), (1, Term(field=0, type=Str, \"closed\"))], slop: 0 })"
974 );
975 assert_eq!(
976 format!("{:?}", query_parser.parse_query("\"non closed")),
977 "Ok(PhraseQuery { field: Field(0), phrase_terms: [(0, Term(field=0, type=Str, \"non\")), (1, Term(field=0, type=Str, \"closed\"))], slop: 0 })"
978 );
979 assert_eq!(
980 format!("{:?}", query_parser.parse_query("non closed`")),
981 "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"non\"))), (Should, TermQuery(Term(field=0, type=Str, \"closed\")))] })"
982 );
983 assert_eq!(
984 format!("{:?}", query_parser.parse_query("non closed\"")),
985 "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"non\"))), (Should, TermQuery(Term(field=0, type=Str, \"closed\")))] })"
986 );
987 assert_eq!(
988 format!("{:?}", query_parser.parse_query("title:(search ")),
989 "Ok(TermQuery(Term(field=0, type=Str, \"title\")))"
990 );
991 assert_eq!(
992 format!("{:?}", query_parser.parse_query("title:(search -")),
993 "Ok(TermQuery(Term(field=0, type=Str, \"title\")))"
994 );
995 assert_eq!(format!("{:?}", query_parser.parse_query("``")), "Ok(EmptyQuery)");
996 assert_eq!(format!("{:?}", query_parser.parse_query("```")), "Ok(EmptyQuery)");
997 assert_eq!(format!("{:?}", query_parser.parse_query(")(")), "Ok(EmptyQuery)");
998 assert_eq!(
999 format!("{:?}", query_parser.parse_query("(a)(b)`")),
1000 "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"a\"))), (Should, TermQuery(Term(field=0, type=Str, \"b\")))] })"
1001 );
1002 assert_eq!(
1003 format!("{:?}", query_parser.parse_query("doi:'10.1182/blood.v53.1.19.bloodjournal53119'")),
1004 "Ok(TermQuery(Term(field=3, type=Str, \"10.1182/blood.v53.1.19.bloodjournal53119\")))"
1005 );
1006 assert_eq!(
1007 format!("{:?}", query_parser.parse_query("doi:10.1182/blood.v53.1.19.bloodjournal53119")),
1008 "Ok(TermQuery(Term(field=3, type=Str, \"10.1182/blood.v53.1.19.bloodjournal53119\")))"
1009 );
1010 assert_eq!(
1011 format!("{:?}", query_parser.parse_query("10.10 10/10")),
1012 "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"10\"))), (Should, TermQuery(Term(field=0, type=Str, \"10\"))), (Should, TermQuery(Term(field=0, type=Str, \"10\"))), (Should, TermQuery(Term(field=0, type=Str, \"10\")))] })"
1013 );
1014 let query_parser = create_complex_query_parser();
1015 assert_eq!(format!("{:?}", query_parser.parse_query("\"search engines\"")), "Ok(BooleanQuery { subqueries: [(Should, PhraseQuery { field: Field(0), phrase_terms: [(0, Term(field=0, type=Str, \"search\")), (1, Term(field=0, type=Str, \"engines\"))], slop: 0 }), (Should, PhraseQuery { field: Field(1), phrase_terms: [(0, Term(field=1, type=Str, \"search\")), (1, Term(field=1, type=Str, \"engines\"))], slop: 0 })] })");
1016 }
1017
1018 #[test]
1019 pub fn test_non_ascii() {
1020 let query_parser = create_query_parser();
1021 assert_eq!(
1022 format!("{:?}", query_parser.parse_query("body:поисковые системы")),
1023 "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=1, type=Str, \"поисковые\"))), (Should, TermQuery(Term(field=0, type=Str, \"системы\")))] })"
1024 );
1025 assert_eq!(
1026 format!("{:?}", query_parser.parse_query("(поисковые системы)")),
1027 "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"поисковые\"))), (Should, TermQuery(Term(field=0, type=Str, \"системы\")))] })"
1028 );
1029 assert_eq!(
1030 format!("{:?}", query_parser.parse_query("поисковые: системы")),
1031 "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"поисковые\"))), (Should, TermQuery(Term(field=0, type=Str, \"системы\")))] })"
1032 );
1033 assert_eq!(
1034 format!("{:?}", query_parser.parse_query("healthcare cyber–physical system")),
1035 "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"healthcare\"))), (Should, TermQuery(Term(field=0, type=Str, \"cyber\"))), (Should, TermQuery(Term(field=0, type=Str, \"physical\"))), (Should, TermQuery(Term(field=0, type=Str, \"system\")))] })"
1036 );
1037 }
1038
1039 #[test]
1040 pub fn test_json() {
1041 let query_parser = create_query_parser();
1042 assert_eq!(
1043 format!("{:?}", query_parser.parse_query("metadata.a:1")),
1044 "Ok(TermQuery(Term(field=5, type=Json, path=a, type=I64, 1)))"
1045 );
1046 assert_eq!(
1047 format!("{:?}", query_parser.parse_query("metadata.a:\"1\"")),
1048 "Ok(TermQuery(Term(field=5, type=Json, path=a, type=Str, \"1\")))"
1049 );
1050 assert_eq!(
1051 format!("{:?}", query_parser.parse_query("metadata.a:\"1 2 3\"")),
1052 "Ok(PhraseQuery { field: Field(5), phrase_terms: [(0, Term(field=5, type=Json, path=a, type=Str, \"1\")), (1, Term(field=5, type=Json, path=a, type=Str, \"2\")), (2, Term(field=5, type=Json, path=a, type=Str, \"3\"))], slop: 0 })"
1053 );
1054 }
1055
1056 #[test]
1057 pub fn test_grouping() {
1058 let query_parser = create_query_parser();
1059 assert_eq!(
1060 format!("{:?}", query_parser.parse_query("body:+(a b)")),
1061 "Ok(BooleanQuery { subqueries: [(Must, BooleanQuery { subqueries: [(Should, TermQuery(Term(field=1, type=Str, \"a\"))), (Should, TermQuery(Term(field=1, type=Str, \"b\")))], minimum_number_should_match: 1 })], minimum_number_should_match: 0 })"
1062 );
1063 assert_eq!(
1064 format!("{:?}", query_parser.parse_query("body:-(a b)")),
1065 "Ok(BooleanQuery { subqueries: [(MustNot, BooleanQuery { subqueries: [(Should, TermQuery(Term(field=1, type=Str, \"a\"))), (Should, TermQuery(Term(field=1, type=Str, \"b\")))], minimum_number_should_match: 1 })], minimum_number_should_match: 0 })"
1066 );
1067 }
1068
1069 #[test]
1070 pub fn test_punct() {
1071 let query_parser = create_query_parser();
1072 assert_eq!(
1073 format!("{:?}", query_parser.parse_query("a + b - c")),
1074 "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"a\"))), (Should, TermQuery(Term(field=0, type=Str, \"b\"))), (Should, TermQuery(Term(field=0, type=Str, \"c\")))] })"
1075 );
1076 }
1077
1078 #[test]
1079 pub fn test_plus_minus() {
1080 let query_parser = create_query_parser();
1081 assert_eq!(
1082 format!("{:?}", query_parser.parse_query("body:+search -engine")),
1083 "Ok(BooleanQuery { subqueries: [(Must, TermQuery(Term(field=1, type=Str, \"search\"))), (MustNot, TermQuery(Term(field=0, type=Str, \"engine\")))] })"
1084 );
1085 assert_eq!(
1086 format!("{:?}", query_parser.parse_query("body:+'search engine'")),
1087 "Ok(BooleanQuery { subqueries: [(Must, PhraseQuery { field: Field(1), phrase_terms: [(0, Term(field=1, type=Str, \"search\")), (1, Term(field=1, type=Str, \"engine\"))], slop: 0 })] })"
1088 );
1089 assert_eq!(
1090 format!("{:?}", query_parser.parse_query("+search +engine")),
1091 "Ok(BooleanQuery { subqueries: [(Must, TermQuery(Term(field=0, type=Str, \"search\"))), (Must, TermQuery(Term(field=0, type=Str, \"engine\")))] })"
1092 );
1093 let query_parser = create_complex_query_parser();
1094 assert_eq!(format!("{:?}", query_parser.parse_query("+search +engine")), "Ok(BooleanQuery { subqueries: [(Must, BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"search\"))), (Should, TermQuery(Term(field=1, type=Str, \"search\")))] }), (Must, BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"engine\"))), (Should, TermQuery(Term(field=1, type=Str, \"engine\")))] })] })");
1095 assert_eq!(format!("{:?}", query_parser.parse_query("+search language:+ru")), "Ok(BooleanQuery { subqueries: [(Must, BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"search\"))), (Should, TermQuery(Term(field=1, type=Str, \"search\")))] }), (Must, TermQuery(Term(field=3, type=Str, \"ru\")))] })");
1096 assert_eq!(format!("{:?}", query_parser.parse_query("+c++ language:+ru")), "Ok(BooleanQuery { subqueries: [(Must, BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"c++\"))), (Should, TermQuery(Term(field=1, type=Str, \"c\")))] }), (Must, TermQuery(Term(field=3, type=Str, \"ru\")))] })");
1097 }
1098
1099 #[test]
1100 pub fn test_quotes() {
1101 let query_parser = create_query_parser();
1102 assert_eq!(
1103 format!("{:?}", query_parser.parse_query("Don't duck with my family")),
1104 "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"don\"))), (Should, TermQuery(Term(field=0, type=Str, \"t\"))), (Should, TermQuery(Term(field=0, type=Str, \"duck\"))), (Should, TermQuery(Term(field=0, type=Str, \"with\"))), (Should, TermQuery(Term(field=0, type=Str, \"my\"))), (Should, TermQuery(Term(field=0, type=Str, \"family\")))] })"
1105 );
1106 assert_eq!(
1107 format!("{:?}", query_parser.parse_query("\"I Don't Want to be Me\"")),
1108 "Ok(PhraseQuery { field: Field(0), phrase_terms: [(0, Term(field=0, type=Str, \"i\")), (1, Term(field=0, type=Str, \"don\")), (2, Term(field=0, type=Str, \"t\")), (3, Term(field=0, type=Str, \"want\")), (4, Term(field=0, type=Str, \"to\")), (5, Term(field=0, type=Str, \"be\")), (6, Term(field=0, type=Str, \"me\"))], slop: 0 })"
1109 );
1110 }
1111
1112 #[test]
1113 pub fn test_parser_boostings() {
1114 let query_parser = create_query_parser();
1115 let query = query_parser.parse_query("search^2.0");
1116 assert_eq!(
1117 format!("{:?}", query),
1118 "Ok(Boost(query=TermQuery(Term(field=0, type=Str, \"search\")), boost=2))"
1119 );
1120 let query = query_parser.parse_query("'search engine'~3^2.0");
1121 assert_eq!(
1122 format!("{:?}", query),
1123 "Ok(Boost(query=PhraseQuery { field: Field(0), phrase_terms: [(0, Term(field=0, type=Str, \"search\")), (1, Term(field=0, type=Str, \"engine\"))], slop: 3 }, boost=2))"
1124 );
1125 let query = query_parser.parse_query("search engine^2.0");
1126 assert_eq!(
1127 format!("{:?}", query),
1128 "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"search\"))), (Should, Boost(query=TermQuery(Term(field=0, type=Str, \"engine\")), boost=2))] })"
1129 );
1130 let query = query_parser.parse_query("body:title^2.0");
1131 assert_eq!(
1132 format!("{:?}", query),
1133 "Ok(Boost(query=TermQuery(Term(field=1, type=Str, \"title\")), boost=2))"
1134 );
1135 let query = query_parser.parse_query("body:'title'^2.0");
1136 assert_eq!(
1137 format!("{:?}", query),
1138 "Ok(Boost(query=TermQuery(Term(field=1, type=Str, \"title\")), boost=2))"
1139 );
1140 }
1141
1142 #[test]
1143 pub fn test_range_queries() {
1144 let query_parser = create_query_parser();
1145 let query = query_parser.parse_query("body:[aaa TO ccc]");
1146 assert_eq!(
1147 format!("{:?}", query),
1148 "Ok(RangeQuery { field: \"body\", value_type: Str, lower_bound: Included([97, 97, 97]), upper_bound: Included([99, 99, 99]), limit: None })"
1149 );
1150 let query = query_parser.parse_query("body:[ a to * ]");
1151 assert_eq!(
1152 format!("{:?}", query),
1153 "Ok(RangeQuery { field: \"body\", value_type: Str, lower_bound: Included([97]), upper_bound: Unbounded, limit: None })"
1154 );
1155 let query = query_parser.parse_query("timestamp:[ 1000 to 2000 ]");
1156 assert_eq!(format!("{:?}", query), "Ok(RangeQuery { field: \"timestamp\", value_type: I64, lower_bound: Included([128, 0, 0, 0, 0, 0, 3, 232]), upper_bound: Included([128, 0, 0, 0, 0, 0, 7, 208]), limit: None })");
1157 let query = query_parser.parse_query("timestamp:(-[1100 to 1200] [ 1000 to 2000 ] -1500 +3000)");
1158 assert_eq!(format!("{:?}", query), "Ok(BooleanQuery { subqueries: [(MustNot, RangeQuery { field: \"timestamp\", value_type: I64, lower_bound: Included([128, 0, 0, 0, 0, 0, 4, 76]), upper_bound: Included([128, 0, 0, 0, 0, 0, 4, 176]), limit: None }), (Should, RangeQuery { field: \"timestamp\", value_type: I64, lower_bound: Included([128, 0, 0, 0, 0, 0, 3, 232]), upper_bound: Included([128, 0, 0, 0, 0, 0, 7, 208]), limit: None }), (MustNot, TermQuery(Term(field=2, type=I64, 1500))), (Must, TermQuery(Term(field=2, type=I64, 3000)))] })");
1159 }
1160
1161 #[test]
1162 pub fn test_exact_phrase_promoter() {
1163 let mut query_parser = create_query_parser();
1164 query_parser.query_parser_config.0.exact_matches_promoter = Some(proto::ExactMatchesPromoter {
1165 slop: 3,
1166 boost: Some(2.0),
1167 fields: vec![],
1168 });
1169 let query = query_parser.parse_query("old school holy-wood");
1170 assert_eq!(format!("{:?}", query), "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"old\"))), (Should, TermQuery(Term(field=0, type=Str, \"school\"))), (Should, TermQuery(Term(field=0, type=Str, \"holy\"))), (Should, TermQuery(Term(field=0, type=Str, \"wood\"))), (Should, Boost(query=PhraseQuery { field: Field(0), phrase_terms: [(0, Term(field=0, type=Str, \"old\")), (1, Term(field=0, type=Str, \"school\")), (2, Term(field=0, type=Str, \"holy\")), (3, Term(field=0, type=Str, \"wood\"))], slop: 3 }, boost=2))], minimum_number_should_match: 1 })");
1171 let query = query_parser.parse_query("old^2.0 school");
1172 assert_eq!(format!("{:?}", query), "Ok(BooleanQuery { subqueries: [(Should, Boost(query=TermQuery(Term(field=0, type=Str, \"old\")), boost=2)), (Should, TermQuery(Term(field=0, type=Str, \"school\")))], minimum_number_should_match: 1 })");
1173 query_parser.query_parser_config.0.field_boosts = HashMap::from_iter(vec![("title".to_string(), 3.0)]);
1174 let query = query_parser.parse_query("old school");
1175 assert_eq!(format!("{:?}", query), "Ok(BooleanQuery { subqueries: [(Should, Boost(query=TermQuery(Term(field=0, type=Str, \"old\")), boost=3)), (Should, Boost(query=TermQuery(Term(field=0, type=Str, \"school\")), boost=3)), (Should, Boost(query=PhraseQuery { field: Field(0), phrase_terms: [(0, Term(field=0, type=Str, \"old\")), (1, Term(field=0, type=Str, \"school\"))], slop: 3 }, boost=6))], minimum_number_should_match: 1 })");
1176 }
1177
1178 #[test]
1179 pub fn test_inflection() {
1180 let mut query_parser = create_query_parser();
1181 let mut morphology_configs = HashMap::new();
1182 morphology_configs.insert(
1183 "title".to_string(),
1184 proto::MorphologyConfig {
1185 derive_tenses_coefficient: Some(0.3),
1186 },
1187 );
1188 query_parser.query_parser_config.0.morphology_configs = morphology_configs;
1189 query_parser.query_parser_config.0.query_language = Some("en".to_string());
1190 let query = query_parser.parse_query("red1 search engine going");
1191 assert_eq!(format!("{:?}", query), "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"red1\"))), (Should, DisjunctionMaxQuery { disjuncts: [TermQuery(Term(field=0, type=Str, \"search\")), TermQuery(Term(field=0, type=Str, \"searches\"))], tie_breaker: 0.3 }), (Should, DisjunctionMaxQuery { disjuncts: [TermQuery(Term(field=0, type=Str, \"engine\")), TermQuery(Term(field=0, type=Str, \"engines\"))], tie_breaker: 0.3 }), (Should, TermQuery(Term(field=0, type=Str, \"going\")))] })");
1192 let query = query_parser.parse_query("iso 34-1:2022");
1193 assert_eq!(format!("{:?}", query), "Ok(BooleanQuery { subqueries: [(Should, DisjunctionMaxQuery { disjuncts: [TermQuery(Term(field=0, type=Str, \"iso\")), TermQuery(Term(field=0, type=Str, \"isos\"))], tie_breaker: 0.3 }), (Should, TermQuery(Term(field=0, type=Str, \"34\"))), (Should, TermQuery(Term(field=0, type=Str, \"1\")))] })");
1194 }
1195
1196 #[test]
1197 pub fn test_root_grouping() {
1198 let query_parser = create_query_parser();
1199 let query = query_parser.parse_query("(test1 test2) -(test3) +(test4 test5)");
1200 assert_eq!(format!("{:?}", query), "Ok(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"test1\"))), (Should, TermQuery(Term(field=0, type=Str, \"test2\"))), (MustNot, TermQuery(Term(field=0, type=Str, \"test3\"))), (Must, BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, \"test4\"))), (Should, TermQuery(Term(field=0, type=Str, \"test5\")))] })] })");
1201 }
1202}