summavy_query_grammar/
query_grammar.rs

1use combine::error::StringStreamError;
2use combine::parser::char::{char, digit, space, spaces, string};
3use combine::parser::combinator::recognize;
4use combine::parser::range::{take_while, take_while1};
5use combine::parser::repeat::escaped;
6use combine::parser::Parser;
7use combine::{
8    attempt, between, choice, eof, many, many1, one_of, optional, parser, satisfy, sep_by,
9    skip_many1, value,
10};
11use once_cell::sync::Lazy;
12use regex::Regex;
13
14use super::user_input_ast::{UserInputAst, UserInputBound, UserInputLeaf, UserInputLiteral};
15use crate::Occur;
16
17// Note: '-' char is only forbidden at the beginning of a field name, would be clearer to add it to
18// special characters.
19const SPECIAL_CHARS: &[char] = &[
20    '+', '^', '`', ':', '{', '}', '"', '[', ']', '(', ')', '!', '\\', '*', ' ',
21];
22const ESCAPED_SPECIAL_CHARS_PATTERN: &str = r#"\\(\+|\^|`|:|\{|\}|"|\[|\]|\(|\)|!|\\|\*|\s)"#;
23
24/// Parses a field_name
25/// A field name must have at least one character and be followed by a colon.
26/// All characters are allowed including special characters `SPECIAL_CHARS`, but these
27/// need to be escaped with a backslash character '\'.
28fn field_name<'a>() -> impl Parser<&'a str, Output = String> {
29    static ESCAPED_SPECIAL_CHARS_RE: Lazy<Regex> =
30        Lazy::new(|| Regex::new(ESCAPED_SPECIAL_CHARS_PATTERN).unwrap());
31
32    recognize::<String, _, _>(escaped(
33        (
34            take_while1(|c| !SPECIAL_CHARS.contains(&c) && c != '-'),
35            take_while(|c| !SPECIAL_CHARS.contains(&c)),
36        ),
37        '\\',
38        satisfy(|_| true), /* if the next character is not a special char, the \ will be treated
39                            * as the \ character. */
40    ))
41    .skip(char(':'))
42    .map(|s| ESCAPED_SPECIAL_CHARS_RE.replace_all(&s, "$1").to_string())
43    .and_then(|s: String| match s.is_empty() {
44        true => Err(StringStreamError::UnexpectedParse),
45        _ => Ok(s),
46    })
47}
48
49fn word<'a>() -> impl Parser<&'a str, Output = String> {
50    (
51        satisfy(|c: char| {
52            !c.is_whitespace()
53                && !['-', '^', '`', ':', '{', '}', '"', '[', ']', '(', ')'].contains(&c)
54        }),
55        many(satisfy(|c: char| {
56            !c.is_whitespace() && ![':', '^', '{', '}', '"', '[', ']', '(', ')'].contains(&c)
57        })),
58    )
59        .map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
60        .and_then(|s: String| match s.as_str() {
61            "OR" | "AND " | "NOT" => Err(StringStreamError::UnexpectedParse),
62            _ => Ok(s),
63        })
64}
65
66// word variant that allows more characters, e.g. for range queries that don't allow field
67// specifier
68fn relaxed_word<'a>() -> impl Parser<&'a str, Output = String> {
69    (
70        satisfy(|c: char| {
71            !c.is_whitespace() && !['`', '{', '}', '"', '[', ']', '(', ')'].contains(&c)
72        }),
73        many(satisfy(|c: char| {
74            !c.is_whitespace() && !['{', '}', '"', '[', ']', '(', ')'].contains(&c)
75        })),
76    )
77        .map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
78}
79
80/// Parses a date time according to rfc3339
81/// 2015-08-02T18:54:42+02
82/// 2021-04-13T19:46:26.266051969+00:00
83///
84/// NOTE: also accepts 999999-99-99T99:99:99.266051969+99:99
85/// We delegate rejecting such invalid dates to the logical AST computation code
86/// which invokes `time::OffsetDateTime::parse(..., &Rfc3339)` on the value to actually parse
87/// it (instead of merely extracting the datetime value as string as done here).
88fn date_time<'a>() -> impl Parser<&'a str, Output = String> {
89    let two_digits = || recognize::<String, _, _>((digit(), digit()));
90
91    // Parses a time zone
92    // -06:30
93    // Z
94    let time_zone = {
95        let utc = recognize::<String, _, _>(char('Z'));
96        let offset = recognize((
97            choice([char('-'), char('+')]),
98            two_digits(),
99            char(':'),
100            two_digits(),
101        ));
102
103        utc.or(offset)
104    };
105
106    // Parses a date
107    // 2010-01-30
108    let date = {
109        recognize::<String, _, _>((
110            many1::<String, _, _>(digit()),
111            char('-'),
112            two_digits(),
113            char('-'),
114            two_digits(),
115        ))
116    };
117
118    // Parses a time
119    // 12:30:02
120    // 19:46:26.266051969
121    let time = {
122        recognize::<String, _, _>((
123            two_digits(),
124            char(':'),
125            two_digits(),
126            char(':'),
127            two_digits(),
128            optional((char('.'), many1::<String, _, _>(digit()))),
129            time_zone,
130        ))
131    };
132
133    recognize((date, char('T'), time))
134}
135
136fn term_val<'a>() -> impl Parser<&'a str, Output = String> {
137    let phrase = char('"').with(many1(satisfy(|c| c != '"'))).skip(char('"'));
138    negative_number().or(phrase.or(word()))
139}
140
141fn term_query<'a>() -> impl Parser<&'a str, Output = UserInputLiteral> {
142    (field_name(), term_val(), slop_val()).map(|(field_name, phrase, slop)| UserInputLiteral {
143        field_name: Some(field_name),
144        phrase,
145        slop,
146    })
147}
148
149fn slop_val<'a>() -> impl Parser<&'a str, Output = u32> {
150    let slop =
151        (char('~'), many1(digit())).and_then(|(_, slop): (_, String)| match slop.parse::<u32>() {
152            Ok(d) => Ok(d),
153            _ => Err(StringStreamError::UnexpectedParse),
154        });
155    optional(slop).map(|slop| match slop {
156        Some(d) => d,
157        _ => 0,
158    })
159}
160
161fn literal<'a>() -> impl Parser<&'a str, Output = UserInputLeaf> {
162    let term_default_field = (term_val(), slop_val()).map(|(phrase, slop)| UserInputLiteral {
163        field_name: None,
164        phrase,
165        slop,
166    });
167
168    attempt(term_query())
169        .or(term_default_field)
170        .map(UserInputLeaf::from)
171}
172
173fn negative_number<'a>() -> impl Parser<&'a str, Output = String> {
174    (
175        char('-'),
176        many1(digit()),
177        optional((char('.'), many1(digit()))),
178    )
179        .map(|(s1, s2, s3): (char, String, Option<(char, String)>)| {
180            if let Some(('.', s3)) = s3 {
181                format!("{}{}.{}", s1, s2, s3)
182            } else {
183                format!("{}{}", s1, s2)
184            }
185        })
186}
187
188fn spaces1<'a>() -> impl Parser<&'a str, Output = ()> {
189    skip_many1(space())
190}
191
192/// Function that parses a range out of a Stream
193/// Supports ranges like:
194/// [5 TO 10], {5 TO 10}, [* TO 10], [10 TO *], {10 TO *], >5, <=10
195/// [a TO *], [a TO c], [abc TO bcd}
196fn range<'a>() -> impl Parser<&'a str, Output = UserInputLeaf> {
197    let range_term_val = || {
198        attempt(date_time())
199            .or(negative_number())
200            .or(relaxed_word())
201            .or(char('*').with(value("*".to_string())))
202    };
203
204    // check for unbounded range in the form of <5, <=10, >5, >=5
205    let elastic_unbounded_range = (
206        choice([
207            attempt(string(">=")),
208            attempt(string("<=")),
209            attempt(string("<")),
210            attempt(string(">")),
211        ])
212        .skip(spaces()),
213        range_term_val(),
214    )
215        .map(
216            |(comparison_sign, bound): (&str, String)| match comparison_sign {
217                ">=" => (UserInputBound::Inclusive(bound), UserInputBound::Unbounded),
218                "<=" => (UserInputBound::Unbounded, UserInputBound::Inclusive(bound)),
219                "<" => (UserInputBound::Unbounded, UserInputBound::Exclusive(bound)),
220                ">" => (UserInputBound::Exclusive(bound), UserInputBound::Unbounded),
221                // default case
222                _ => (UserInputBound::Unbounded, UserInputBound::Unbounded),
223            },
224        );
225    let lower_bound = (one_of("{[".chars()), range_term_val()).map(
226        |(boundary_char, lower_bound): (char, String)| {
227            if lower_bound == "*" {
228                UserInputBound::Unbounded
229            } else if boundary_char == '{' {
230                UserInputBound::Exclusive(lower_bound)
231            } else {
232                UserInputBound::Inclusive(lower_bound)
233            }
234        },
235    );
236    let upper_bound = (range_term_val(), one_of("}]".chars())).map(
237        |(higher_bound, boundary_char): (String, char)| {
238            if higher_bound == "*" {
239                UserInputBound::Unbounded
240            } else if boundary_char == '}' {
241                UserInputBound::Exclusive(higher_bound)
242            } else {
243                UserInputBound::Inclusive(higher_bound)
244            }
245        },
246    );
247    // return only lower and upper
248    let lower_to_upper = (
249        lower_bound.skip((spaces(), string("TO"), spaces())),
250        upper_bound,
251    );
252
253    (
254        optional(field_name()).skip(spaces()),
255        // try elastic first, if it matches, the range is unbounded
256        attempt(elastic_unbounded_range).or(lower_to_upper),
257    )
258        .map(|(field, (lower, upper))|
259             // Construct the leaf from extracted field (optional)
260             // and bounds
261             UserInputLeaf::Range {
262                 field,
263                 lower,
264                 upper
265    })
266}
267
268/// Function that parses a set out of a Stream
269/// Supports ranges like: `IN [val1 val2 val3]`
270fn set<'a>() -> impl Parser<&'a str, Output = UserInputLeaf> {
271    let term_list = between(char('['), char(']'), sep_by(term_val(), spaces()));
272
273    let set_content = ((string("IN"), spaces()), term_list).map(|(_, elements)| elements);
274
275    (optional(attempt(field_name().skip(spaces()))), set_content)
276        .map(|(field, elements)| UserInputLeaf::Set { field, elements })
277}
278
279fn negate(expr: UserInputAst) -> UserInputAst {
280    expr.unary(Occur::MustNot)
281}
282
283fn leaf<'a>() -> impl Parser<&'a str, Output = UserInputAst> {
284    parser(|input| {
285        char('(')
286            .with(ast())
287            .skip(char(')'))
288            .or(char('*').map(|_| UserInputAst::from(UserInputLeaf::All)))
289            .or(attempt(
290                string("NOT").skip(spaces1()).with(leaf()).map(negate),
291            ))
292            .or(attempt(range().map(UserInputAst::from)))
293            .or(attempt(set().map(UserInputAst::from)))
294            .or(literal().map(UserInputAst::from))
295            .parse_stream(input)
296            .into_result()
297    })
298}
299
300fn occur_symbol<'a>() -> impl Parser<&'a str, Output = Occur> {
301    char('-')
302        .map(|_| Occur::MustNot)
303        .or(char('+').map(|_| Occur::Must))
304}
305
306fn occur_leaf<'a>() -> impl Parser<&'a str, Output = (Option<Occur>, UserInputAst)> {
307    (optional(occur_symbol()), boosted_leaf())
308}
309
310fn positive_float_number<'a>() -> impl Parser<&'a str, Output = f64> {
311    (many1(digit()), optional((char('.'), many1(digit())))).map(
312        |(int_part, decimal_part_opt): (String, Option<(char, String)>)| {
313            let mut float_str = int_part;
314            if let Some((chr, decimal_str)) = decimal_part_opt {
315                float_str.push(chr);
316                float_str.push_str(&decimal_str);
317            }
318            float_str.parse::<f64>().unwrap()
319        },
320    )
321}
322
323fn boost<'a>() -> impl Parser<&'a str, Output = f64> {
324    (char('^'), positive_float_number()).map(|(_, boost)| boost)
325}
326
327fn boosted_leaf<'a>() -> impl Parser<&'a str, Output = UserInputAst> {
328    (leaf(), optional(boost())).map(|(leaf, boost_opt)| match boost_opt {
329        Some(boost) if (boost - 1.0).abs() > f64::EPSILON => {
330            UserInputAst::Boost(Box::new(leaf), boost)
331        }
332        _ => leaf,
333    })
334}
335
336#[derive(Clone, Copy)]
337enum BinaryOperand {
338    Or,
339    And,
340}
341
342fn binary_operand<'a>() -> impl Parser<&'a str, Output = BinaryOperand> {
343    string("AND")
344        .with(value(BinaryOperand::And))
345        .or(string("OR").with(value(BinaryOperand::Or)))
346}
347
348fn aggregate_binary_expressions(
349    left: UserInputAst,
350    others: Vec<(BinaryOperand, UserInputAst)>,
351) -> UserInputAst {
352    let mut dnf: Vec<Vec<UserInputAst>> = vec![vec![left]];
353    for (operator, operand_ast) in others {
354        match operator {
355            BinaryOperand::And => {
356                if let Some(last) = dnf.last_mut() {
357                    last.push(operand_ast);
358                }
359            }
360            BinaryOperand::Or => {
361                dnf.push(vec![operand_ast]);
362            }
363        }
364    }
365    if dnf.len() == 1 {
366        UserInputAst::and(dnf.into_iter().next().unwrap()) //< safe
367    } else {
368        let conjunctions = dnf.into_iter().map(UserInputAst::and).collect();
369        UserInputAst::or(conjunctions)
370    }
371}
372
373fn operand_leaf<'a>() -> impl Parser<&'a str, Output = (BinaryOperand, UserInputAst)> {
374    (
375        binary_operand().skip(spaces()),
376        boosted_leaf().skip(spaces()),
377    )
378}
379
380pub fn ast<'a>() -> impl Parser<&'a str, Output = UserInputAst> {
381    let boolean_expr = (boosted_leaf().skip(spaces()), many1(operand_leaf()))
382        .map(|(left, right)| aggregate_binary_expressions(left, right));
383    let whitespace_separated_leaves = many1(occur_leaf().skip(spaces().silent())).map(
384        |subqueries: Vec<(Option<Occur>, UserInputAst)>| {
385            if subqueries.len() == 1 {
386                let (occur_opt, ast) = subqueries.into_iter().next().unwrap();
387                match occur_opt.unwrap_or(Occur::Should) {
388                    Occur::Must | Occur::Should => ast,
389                    Occur::MustNot => UserInputAst::Clause(vec![(Some(Occur::MustNot), ast)]),
390                }
391            } else {
392                UserInputAst::Clause(subqueries.into_iter().collect())
393            }
394        },
395    );
396    let expr = attempt(boolean_expr).or(whitespace_separated_leaves);
397    spaces().with(expr).skip(spaces())
398}
399
400pub fn parse_to_ast<'a>() -> impl Parser<&'a str, Output = UserInputAst> {
401    spaces()
402        .with(optional(ast()).skip(eof()))
403        .map(|opt_ast| opt_ast.unwrap_or_else(UserInputAst::empty_query))
404}
405
406#[cfg(test)]
407mod test {
408
409    type TestParseResult = Result<(), StringStreamError>;
410
411    use combine::parser::Parser;
412
413    use super::*;
414
415    pub fn nearly_equals(a: f64, b: f64) -> bool {
416        (a - b).abs() < 0.0005 * (a + b).abs()
417    }
418
419    fn assert_nearly_equals(expected: f64, val: f64) {
420        assert!(
421            nearly_equals(val, expected),
422            "Got {}, expected {}.",
423            val,
424            expected
425        );
426    }
427
428    #[test]
429    fn test_occur_symbol() -> TestParseResult {
430        assert_eq!(super::occur_symbol().parse("-")?, (Occur::MustNot, ""));
431        assert_eq!(super::occur_symbol().parse("+")?, (Occur::Must, ""));
432        Ok(())
433    }
434
435    #[test]
436    fn test_positive_float_number() {
437        fn valid_parse(float_str: &str, expected_val: f64, expected_remaining: &str) {
438            let (val, remaining) = positive_float_number().parse(float_str).unwrap();
439            assert_eq!(remaining, expected_remaining);
440            assert_nearly_equals(val, expected_val);
441        }
442        fn error_parse(float_str: &str) {
443            assert!(positive_float_number().parse(float_str).is_err());
444        }
445        valid_parse("1.0", 1.0, "");
446        valid_parse("1", 1.0, "");
447        valid_parse("0.234234 aaa", 0.234234f64, " aaa");
448        error_parse(".3332");
449        error_parse("1.");
450        error_parse("-1.");
451    }
452
453    #[test]
454    fn test_date_time() {
455        let (val, remaining) = date_time()
456            .parse("2015-08-02T18:54:42+02:30")
457            .expect("cannot parse date");
458        assert_eq!(val, "2015-08-02T18:54:42+02:30");
459        assert_eq!(remaining, "");
460        assert!(date_time().parse("2015-08-02T18:54:42+02").is_err());
461
462        let (val, remaining) = date_time()
463            .parse("2021-04-13T19:46:26.266051969+00:00")
464            .expect("cannot parse fractional date");
465        assert_eq!(val, "2021-04-13T19:46:26.266051969+00:00");
466        assert_eq!(remaining, "");
467    }
468
469    fn test_parse_query_to_ast_helper(query: &str, expected: &str) {
470        let query = parse_to_ast().parse(query).unwrap().0;
471        let query_str = format!("{:?}", query);
472        assert_eq!(query_str, expected);
473    }
474
475    fn test_is_parse_err(query: &str) {
476        assert!(parse_to_ast().parse(query).is_err());
477    }
478
479    #[test]
480    fn test_parse_empty_to_ast() {
481        test_parse_query_to_ast_helper("", "<emptyclause>");
482    }
483
484    #[test]
485    fn test_parse_query_to_ast_hyphen() {
486        test_parse_query_to_ast_helper("\"www-form-encoded\"", "\"www-form-encoded\"");
487        test_parse_query_to_ast_helper("www-form-encoded", "\"www-form-encoded\"");
488        test_parse_query_to_ast_helper("www-form-encoded", "\"www-form-encoded\"");
489    }
490
491    #[test]
492    fn test_parse_query_to_ast_not_op() {
493        assert_eq!(
494            format!("{:?}", parse_to_ast().parse("NOT")),
495            "Err(UnexpectedParse)"
496        );
497        test_parse_query_to_ast_helper("NOTa", "\"NOTa\"");
498        test_parse_query_to_ast_helper("NOT a", "(-\"a\")");
499    }
500
501    #[test]
502    fn test_boosting() {
503        assert!(parse_to_ast().parse("a^2^3").is_err());
504        assert!(parse_to_ast().parse("a^2^").is_err());
505        test_parse_query_to_ast_helper("a^3", "(\"a\")^3");
506        test_parse_query_to_ast_helper("a^3 b^2", "(*(\"a\")^3 *(\"b\")^2)");
507        test_parse_query_to_ast_helper("a^1", "\"a\"");
508    }
509
510    #[test]
511    fn test_parse_query_to_ast_binary_op() {
512        test_parse_query_to_ast_helper("a AND b", "(+\"a\" +\"b\")");
513        test_parse_query_to_ast_helper("a OR b", "(?\"a\" ?\"b\")");
514        test_parse_query_to_ast_helper("a OR b AND c", "(?\"a\" ?(+\"b\" +\"c\"))");
515        test_parse_query_to_ast_helper("a AND b         AND c", "(+\"a\" +\"b\" +\"c\")");
516        assert_eq!(
517            format!("{:?}", parse_to_ast().parse("a OR b aaa")),
518            "Err(UnexpectedParse)"
519        );
520        assert_eq!(
521            format!("{:?}", parse_to_ast().parse("a AND b aaa")),
522            "Err(UnexpectedParse)"
523        );
524        assert_eq!(
525            format!("{:?}", parse_to_ast().parse("aaa a OR b ")),
526            "Err(UnexpectedParse)"
527        );
528        assert_eq!(
529            format!("{:?}", parse_to_ast().parse("aaa ccc a OR b ")),
530            "Err(UnexpectedParse)"
531        );
532    }
533
534    #[test]
535    fn test_parse_elastic_query_ranges() {
536        test_parse_query_to_ast_helper("title: >a", "\"title\":{\"a\" TO \"*\"}");
537        test_parse_query_to_ast_helper("title:>=a", "\"title\":[\"a\" TO \"*\"}");
538        test_parse_query_to_ast_helper("title: <a", "\"title\":{\"*\" TO \"a\"}");
539        test_parse_query_to_ast_helper("title:<=a", "\"title\":{\"*\" TO \"a\"]");
540        test_parse_query_to_ast_helper("title:<=bsd", "\"title\":{\"*\" TO \"bsd\"]");
541
542        test_parse_query_to_ast_helper("weight: >70", "\"weight\":{\"70\" TO \"*\"}");
543        test_parse_query_to_ast_helper("weight:>=70", "\"weight\":[\"70\" TO \"*\"}");
544        test_parse_query_to_ast_helper("weight: <70", "\"weight\":{\"*\" TO \"70\"}");
545        test_parse_query_to_ast_helper("weight:<=70", "\"weight\":{\"*\" TO \"70\"]");
546        test_parse_query_to_ast_helper("weight: >60.7", "\"weight\":{\"60.7\" TO \"*\"}");
547
548        test_parse_query_to_ast_helper("weight: <= 70", "\"weight\":{\"*\" TO \"70\"]");
549
550        test_parse_query_to_ast_helper("weight: <= 70.5", "\"weight\":{\"*\" TO \"70.5\"]");
551    }
552
553    #[test]
554    fn test_occur_leaf() {
555        let ((occur, ast), _) = super::occur_leaf().parse("+abc").unwrap();
556        assert_eq!(occur, Some(Occur::Must));
557        assert_eq!(format!("{:?}", ast), "\"abc\"");
558    }
559
560    #[test]
561    fn test_field_name() {
562        assert_eq!(
563            super::field_name().parse(".my.field.name:a"),
564            Ok((".my.field.name".to_string(), "a"))
565        );
566        assert_eq!(
567            super::field_name().parse(r#"にんじん:a"#),
568            Ok(("にんじん".to_string(), "a"))
569        );
570        assert_eq!(
571            super::field_name().parse(r#"my\field:a"#),
572            Ok((r#"my\field"#.to_string(), "a"))
573        );
574        assert!(super::field_name().parse("my field:a").is_err());
575        assert_eq!(
576            super::field_name().parse("\\(1\\+1\\):2"),
577            Ok(("(1+1)".to_string(), "2"))
578        );
579        assert_eq!(
580            super::field_name().parse("my_field_name:a"),
581            Ok(("my_field_name".to_string(), "a"))
582        );
583        assert_eq!(
584            super::field_name().parse("myfield.b:hello").unwrap(),
585            ("myfield.b".to_string(), "hello")
586        );
587        assert_eq!(
588            super::field_name().parse(r#"myfield\.b:hello"#).unwrap(),
589            (r#"myfield\.b"#.to_string(), "hello")
590        );
591        assert!(super::field_name().parse("my_field_name").is_err());
592        assert!(super::field_name().parse(":a").is_err());
593        assert!(super::field_name().parse("-my_field:a").is_err());
594        assert_eq!(
595            super::field_name().parse("_my_field:a"),
596            Ok(("_my_field".to_string(), "a"))
597        );
598        assert_eq!(
599            super::field_name().parse("~my~field:a"),
600            Ok(("~my~field".to_string(), "a"))
601        );
602        for special_char in SPECIAL_CHARS.iter() {
603            let query = &format!("\\{special_char}my\\{special_char}field:a");
604            assert_eq!(
605                super::field_name().parse(query),
606                Ok((format!("{special_char}my{special_char}field"), "a"))
607            );
608        }
609    }
610
611    #[test]
612    fn test_field_name_re() {
613        let escaped_special_chars_re = Regex::new(ESCAPED_SPECIAL_CHARS_PATTERN).unwrap();
614        for special_char in SPECIAL_CHARS.iter() {
615            assert_eq!(
616                escaped_special_chars_re.replace_all(&format!("\\{}", special_char), "$1"),
617                special_char.to_string()
618            );
619        }
620    }
621
622    #[test]
623    fn test_range_parser() {
624        // testing the range() parser separately
625        let res = range()
626            .parse("title: <hello")
627            .expect("Cannot parse felxible bound word")
628            .0;
629        let expected = UserInputLeaf::Range {
630            field: Some("title".to_string()),
631            lower: UserInputBound::Unbounded,
632            upper: UserInputBound::Exclusive("hello".to_string()),
633        };
634        let res2 = range()
635            .parse("title:{* TO hello}")
636            .expect("Cannot parse ununbounded to word")
637            .0;
638        assert_eq!(res, expected);
639        assert_eq!(res2, expected);
640
641        let expected_weight = UserInputLeaf::Range {
642            field: Some("weight".to_string()),
643            lower: UserInputBound::Inclusive("71.2".to_string()),
644            upper: UserInputBound::Unbounded,
645        };
646        let res3 = range()
647            .parse("weight: >=71.2")
648            .expect("Cannot parse flexible bound float")
649            .0;
650        let res4 = range()
651            .parse("weight:[71.2 TO *}")
652            .expect("Cannot parse float to unbounded")
653            .0;
654        assert_eq!(res3, expected_weight);
655        assert_eq!(res4, expected_weight);
656
657        let expected_dates = UserInputLeaf::Range {
658            field: Some("date_field".to_string()),
659            lower: UserInputBound::Exclusive("2015-08-02T18:54:42Z".to_string()),
660            upper: UserInputBound::Inclusive("2021-08-02T18:54:42+02:30".to_string()),
661        };
662        let res5 = range()
663            .parse("date_field:{2015-08-02T18:54:42Z TO 2021-08-02T18:54:42+02:30]")
664            .expect("Cannot parse date range")
665            .0;
666        assert_eq!(res5, expected_dates);
667
668        let expected_flexible_dates = UserInputLeaf::Range {
669            field: Some("date_field".to_string()),
670            lower: UserInputBound::Unbounded,
671            upper: UserInputBound::Inclusive("2021-08-02T18:54:42.12345+02:30".to_string()),
672        };
673
674        let res6 = range()
675            .parse("date_field: <=2021-08-02T18:54:42.12345+02:30")
676            .expect("Cannot parse date range")
677            .0;
678        assert_eq!(res6, expected_flexible_dates);
679        // IP Range Unbounded
680        let expected_weight = UserInputLeaf::Range {
681            field: Some("ip".to_string()),
682            lower: UserInputBound::Inclusive("::1".to_string()),
683            upper: UserInputBound::Unbounded,
684        };
685        let res1 = range()
686            .parse("ip: >=::1")
687            .expect("Cannot parse ip v6 format")
688            .0;
689        let res2 = range()
690            .parse("ip:[::1 TO *}")
691            .expect("Cannot parse ip v6 format")
692            .0;
693        assert_eq!(res1, expected_weight);
694        assert_eq!(res2, expected_weight);
695
696        // IP Range Bounded
697        let expected_weight = UserInputLeaf::Range {
698            field: Some("ip".to_string()),
699            lower: UserInputBound::Inclusive("::0.0.0.50".to_string()),
700            upper: UserInputBound::Exclusive("::0.0.0.52".to_string()),
701        };
702        let res1 = range()
703            .parse("ip:[::0.0.0.50 TO ::0.0.0.52}")
704            .expect("Cannot parse ip v6 format")
705            .0;
706        assert_eq!(res1, expected_weight);
707    }
708
709    #[test]
710    fn test_parse_query_to_triming_spaces() {
711        test_parse_query_to_ast_helper("   abc", "\"abc\"");
712        test_parse_query_to_ast_helper("abc ", "\"abc\"");
713        test_parse_query_to_ast_helper("(  a OR abc)", "(?\"a\" ?\"abc\")");
714        test_parse_query_to_ast_helper("(a  OR abc)", "(?\"a\" ?\"abc\")");
715        test_parse_query_to_ast_helper("(a OR  abc)", "(?\"a\" ?\"abc\")");
716        test_parse_query_to_ast_helper("a OR abc ", "(?\"a\" ?\"abc\")");
717        test_parse_query_to_ast_helper("(a OR abc )", "(?\"a\" ?\"abc\")");
718        test_parse_query_to_ast_helper("(a OR  abc) ", "(?\"a\" ?\"abc\")");
719    }
720
721    #[test]
722    fn test_parse_query_single_term() {
723        test_parse_query_to_ast_helper("abc", "\"abc\"");
724    }
725
726    #[test]
727    fn test_parse_query_default_clause() {
728        test_parse_query_to_ast_helper("a b", "(*\"a\" *\"b\")");
729    }
730
731    #[test]
732    fn test_parse_query_must_default_clause() {
733        test_parse_query_to_ast_helper("+(a b)", "(*\"a\" *\"b\")");
734    }
735
736    #[test]
737    fn test_parse_query_must_single_term() {
738        test_parse_query_to_ast_helper("+d", "\"d\"");
739    }
740
741    #[test]
742    fn test_single_term_with_field() {
743        test_parse_query_to_ast_helper("abc:toto", "\"abc\":\"toto\"");
744    }
745
746    #[test]
747    fn test_single_term_with_float() {
748        test_parse_query_to_ast_helper("abc:1.1", "\"abc\":\"1.1\"");
749        test_parse_query_to_ast_helper("a.b.c:1.1", "\"a.b.c\":\"1.1\"");
750        test_parse_query_to_ast_helper("a\\ b\\ c:1.1", "\"a b c\":\"1.1\"");
751    }
752
753    #[test]
754    fn test_must_clause() {
755        test_parse_query_to_ast_helper("(+a +b)", "(+\"a\" +\"b\")");
756    }
757
758    #[test]
759    fn test_parse_test_query_plus_a_b_plus_d() {
760        test_parse_query_to_ast_helper("+(a b) +d", "(+(*\"a\" *\"b\") +\"d\")");
761    }
762
763    #[test]
764    fn test_parse_test_query_set() {
765        test_parse_query_to_ast_helper("abc: IN [a b c]", r#""abc": IN ["a" "b" "c"]"#);
766        test_parse_query_to_ast_helper("abc: IN [1]", r#""abc": IN ["1"]"#);
767        test_parse_query_to_ast_helper("abc: IN []", r#""abc": IN []"#);
768        test_parse_query_to_ast_helper("IN [1 2]", r#"IN ["1" "2"]"#);
769    }
770
771    #[test]
772    fn test_parse_test_query_other() {
773        test_parse_query_to_ast_helper("(+a +b) d", "(*(+\"a\" +\"b\") *\"d\")");
774        test_parse_query_to_ast_helper("+abc:toto", "\"abc\":\"toto\"");
775        test_parse_query_to_ast_helper("+a\\+b\\+c:toto", "\"a+b+c\":\"toto\"");
776        test_parse_query_to_ast_helper("(+abc:toto -titi)", "(+\"abc\":\"toto\" -\"titi\")");
777        test_parse_query_to_ast_helper("-abc:toto", "(-\"abc\":\"toto\")");
778        test_is_parse_err("--abc:toto");
779        test_parse_query_to_ast_helper("abc:a b", "(*\"abc\":\"a\" *\"b\")");
780        test_parse_query_to_ast_helper("abc:\"a b\"", "\"abc\":\"a b\"");
781        test_parse_query_to_ast_helper("foo:[1 TO 5]", "\"foo\":[\"1\" TO \"5\"]");
782    }
783
784    #[test]
785    fn test_parse_query_with_range() {
786        test_parse_query_to_ast_helper("[1 TO 5]", "[\"1\" TO \"5\"]");
787        test_parse_query_to_ast_helper("foo:{a TO z}", "\"foo\":{\"a\" TO \"z\"}");
788        test_parse_query_to_ast_helper("foo:[1 TO toto}", "\"foo\":[\"1\" TO \"toto\"}");
789        test_parse_query_to_ast_helper("foo:[* TO toto}", "\"foo\":{\"*\" TO \"toto\"}");
790        test_parse_query_to_ast_helper("foo:[1 TO *}", "\"foo\":[\"1\" TO \"*\"}");
791        test_parse_query_to_ast_helper(
792            "1.2.foo.bar:[1.1 TO *}",
793            "\"1.2.foo.bar\":[\"1.1\" TO \"*\"}",
794        );
795        test_is_parse_err("abc +    ");
796    }
797
798    #[test]
799    fn test_slop() {
800        assert!(parse_to_ast().parse("\"a b\"~").is_err());
801        assert!(parse_to_ast().parse("foo:\"a b\"~").is_err());
802        assert!(parse_to_ast().parse("\"a b\"~a").is_err());
803        assert!(parse_to_ast().parse("\"a b\"~100000000000000000").is_err());
804
805        test_parse_query_to_ast_helper("\"a b\"^2~4", "(*(\"a b\")^2 *\"~4\")");
806        test_parse_query_to_ast_helper("\"~Document\"", "\"~Document\"");
807        test_parse_query_to_ast_helper("~Document", "\"~Document\"");
808        test_parse_query_to_ast_helper("a~2", "\"a~2\"");
809        test_parse_query_to_ast_helper("\"a b\"~0", "\"a b\"");
810        test_parse_query_to_ast_helper("\"a b\"~1", "\"a b\"~1");
811        test_parse_query_to_ast_helper("\"a b\"~3", "\"a b\"~3");
812        test_parse_query_to_ast_helper("foo:\"a b\"~300", "\"foo\":\"a b\"~300");
813        test_parse_query_to_ast_helper("\"a b\"~300^2", "(\"a b\"~300)^2");
814    }
815}