tau_engine/
parser.rs

1use std::convert::TryFrom;
2use std::fmt;
3use std::iter::Iterator;
4use std::iter::Peekable;
5
6use aho_corasick::{AhoCorasick, AhoCorasickBuilder, AhoCorasickKind};
7use regex::{Regex, RegexSet, RegexSetBuilder};
8use serde_yaml::{Mapping, Value as Yaml};
9use tracing::debug;
10
11use crate::identifier::{Identifier, IdentifierParser, Pattern};
12use crate::tokeniser::{BoolSym, DelSym, MatchSym, MiscSym, ModSym, Token, Tokeniser};
13
14#[derive(Clone, Debug, PartialEq)]
15pub enum MatchType {
16    Contains(String),
17    EndsWith(String),
18    Exact(String),
19    StartsWith(String),
20}
21
22impl MatchType {
23    pub fn value(&self) -> &String {
24        match self {
25            Self::Contains(s) | Self::EndsWith(s) | Self::Exact(s) | Self::StartsWith(s) => s,
26        }
27    }
28}
29
30#[derive(Clone, Debug, PartialEq)]
31pub enum Match {
32    All,
33    Of(u64),
34}
35
36#[derive(Clone, Debug)]
37pub enum Search {
38    AhoCorasick(Box<AhoCorasick>, Vec<MatchType>, bool),
39    Any,
40    Contains(String),
41    EndsWith(String),
42    Exact(String),
43    Regex(Regex, bool),
44    RegexSet(RegexSet, bool),
45    StartsWith(String),
46}
47impl fmt::Display for Search {
48    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
49        match self {
50            Self::AhoCorasick(_, t, i) => {
51                write!(f, "{}aho_corasick({:?})", if *i { "i" } else { "" }, t)
52            }
53            Self::Any => write!(f, "any"),
54            Self::Contains(s) => write!(f, "contains({})", s),
55            Self::EndsWith(s) => write!(f, "ends_with({})", s),
56            Self::Exact(s) => write!(f, "exact({})", s),
57            Self::Regex(s, i) => write!(f, "{}regex({})", if *i { "i" } else { "" }, s),
58            Self::RegexSet(s, i) => write!(
59                f,
60                "{}regex_set({:?})",
61                if *i { "i" } else { "" },
62                s.patterns()
63            ),
64            Self::StartsWith(s) => write!(f, "starts_with({})", s),
65        }
66    }
67}
68impl PartialEq for Search {
69    fn eq(&self, other: &Search) -> bool {
70        match (self, other) {
71            (Search::Any, Search::Any) => true,
72            (Search::AhoCorasick(_, m0, _), Search::AhoCorasick(_, m1, _)) => m0 == m1,
73            (Search::Contains(s0), Search::Contains(s1)) => s0 == s1,
74            (Search::EndsWith(s0), Search::EndsWith(s1)) => s0 == s1,
75            (Search::Exact(s0), Search::Exact(s1)) => s0 == s1,
76            (Search::Regex(r0, i0), Search::Regex(r1, i1)) => {
77                r0.as_str() == r1.as_str() && i0 == i1
78            }
79            (Search::RegexSet(r0, i0), Search::RegexSet(r1, i1)) => {
80                r0.patterns() == r1.patterns() && i0 == i1
81            }
82            (Search::StartsWith(s0), Search::StartsWith(s1)) => s0 == s1,
83            (_, _) => false,
84        }
85    }
86}
87
88#[derive(Clone, Debug, PartialEq)]
89pub enum Expression {
90    BooleanGroup(BoolSym, Vec<Expression>),
91    #[allow(clippy::enum_variant_names)]
92    BooleanExpression(Box<Expression>, BoolSym, Box<Expression>),
93    Boolean(bool),
94    Cast(String, ModSym),
95    Field(String),
96    Float(f64),
97    Identifier(String),
98    Integer(i64),
99    Match(Match, Box<Expression>),
100    Matrix(Vec<String>, Vec<Vec<Option<Expression>>>),
101    Negate(Box<Expression>),
102    Nested(String, Box<Expression>),
103    Null,
104    Search(Search, String, bool),
105}
106impl fmt::Display for Expression {
107    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
108        match self {
109            Self::BooleanGroup(o, g) => write!(
110                f,
111                "group({} {})",
112                o,
113                g.iter()
114                    .map(|e| e.to_string())
115                    .collect::<Vec<String>>()
116                    .join(", ")
117            ),
118            Self::BooleanExpression(l, o, r) => write!(f, "expression({} {} {})", l, o, r),
119            Self::Boolean(b) => write!(f, "bool({})", b),
120            Self::Cast(s, t) => write!(f, "cast({}({}))", t, s),
121            Self::Field(s) => write!(f, "field({})", s),
122            Self::Float(n) => write!(f, "float({})", n),
123            Self::Identifier(s) => write!(f, "identifier({})", s),
124            Self::Integer(i) => write!(f, "int({})", i),
125            Self::Match(Match::All, e) => {
126                write!(f, "all({})", e)
127            }
128            Self::Match(Match::Of(i), e) => write!(f, "of({}, {})", e, i),
129            Self::Matrix(c, m) => write!(
130                f,
131                "matrix([{}], [{}])",
132                c.iter()
133                    .map(|f| f.to_string())
134                    .collect::<Vec<String>>()
135                    .join(", "),
136                m.iter()
137                    .map(|c| format!(
138                        "[{}]",
139                        c.iter()
140                            .map(|e| e.as_ref().map(|s| s.to_string()).unwrap_or("".to_owned()))
141                            .collect::<Vec<String>>()
142                            .join(", ")
143                    ))
144                    .collect::<Vec<String>>()
145                    .join(", ")
146            ),
147            Self::Negate(e) => write!(f, "negate({})", e),
148            Self::Nested(s, e) => write!(f, "nested({}, {})", s, e),
149            Self::Null => write!(f, "null"),
150            Self::Search(e, s, c) => write!(f, "search({}, {}, {})", s, e, c),
151        }
152    }
153}
154impl Expression {
155    pub fn is_solvable(&self) -> bool {
156        match self {
157            Self::Boolean(_)
158            | Self::Cast(_, _)
159            | Self::Field(_)
160            | Self::Float(_)
161            | Self::Integer(_)
162            | Self::Null => false,
163            Self::BooleanGroup(_, _)
164            | Self::BooleanExpression(_, _, _)
165            | Self::Identifier(_)
166            | Self::Match(_, _)
167            | Self::Matrix(_, _)
168            | Self::Negate(_)
169            | Self::Nested(_, _)
170            | Self::Search(_, _, _) => true,
171        }
172    }
173}
174
175// Pratt Parser used to parse the token stream
176//
177// Left-Denotation (LED) - how an operator consumes to the right with a left-context
178// Null-Denotation (NUD) - how an operator consumes to the right with no left-context
179
180pub(crate) fn parse(tokens: &[Token]) -> crate::Result<Expression> {
181    let mut it = tokens.iter().peekable();
182    let expression = parse_expr(&mut it, 0)?;
183    if it.peek().is_some() {
184        let remaining = it.collect::<Vec<&Token>>();
185        return Err(crate::error::parse_invalid_expr(format!(
186            "failed to parse the following tokens - '{:?}'",
187            remaining
188        )));
189    }
190
191    debug!("parsed '{:?}' into '{:?}'", tokens, expression);
192
193    Ok(expression)
194}
195
196fn parse_expr<'a, I>(it: &mut Peekable<I>, right_binding_power: u8) -> crate::Result<Expression>
197where
198    I: Iterator<Item = &'a Token>,
199{
200    let mut left = parse_nud(it)?;
201    while let Some(&next) = it.peek() {
202        if right_binding_power >= next.binding_power() {
203            break;
204        }
205        left = parse_led(left, it)?;
206    }
207    Ok(left)
208}
209
210fn parse_led<'a, I>(left: Expression, it: &mut Peekable<I>) -> crate::Result<Expression>
211where
212    I: Iterator<Item = &'a Token>,
213{
214    match it.next() {
215        Some(t) => match *t {
216            Token::Operator(ref s) => {
217                let symbol = *s;
218                let right = parse_expr(it, t.binding_power())?;
219                // Handle special limited cases
220                match symbol {
221                    BoolSym::Equal => {
222                        match left {
223                            Expression::Boolean(_)
224                            | Expression::Cast(_, _)
225                            | Expression::Float(_)
226                            | Expression::Integer(_) => {}
227                            _ => {
228                                return Err(crate::error::parse_led_preceding(format!(
229                                    "encountered - '{:?}'",
230                                    t
231                                )));
232                            }
233                        }
234                        match right {
235                            Expression::Boolean(_)
236                            | Expression::Cast(_, _)
237                            | Expression::Float(_)
238                            | Expression::Integer(_) => {}
239                            _ => {
240                                return Err(crate::error::parse_led_following(format!(
241                                    "encountered - '{:?}'",
242                                    t
243                                )));
244                            }
245                        }
246                        // Type enforcement
247                        match (&left, &right) {
248                            (
249                                Expression::Cast(_, ModSym::Flt),
250                                Expression::Cast(_, ModSym::Flt),
251                            ) => {}
252                            (
253                                Expression::Cast(_, ModSym::Int),
254                                Expression::Cast(_, ModSym::Int),
255                            ) => {}
256                            (
257                                Expression::Cast(_, ModSym::Str),
258                                Expression::Cast(_, ModSym::Str),
259                            ) => {}
260                            (Expression::Cast(_, ModSym::Flt), Expression::Float(_)) => {}
261                            (Expression::Float(_), Expression::Cast(_, ModSym::Flt)) => {}
262                            (Expression::Cast(_, ModSym::Int), Expression::Integer(_)) => {}
263                            (Expression::Integer(_), Expression::Cast(_, ModSym::Int)) => {}
264                            (_, _) => {
265                                return Err(crate::error::parse_invalid_expr(format!(
266                                    "encountered - '{:?}'",
267                                    t
268                                )));
269                            }
270                        }
271                    }
272                    BoolSym::GreaterThan
273                    | BoolSym::GreaterThanOrEqual
274                    | BoolSym::LessThan
275                    | BoolSym::LessThanOrEqual => {
276                        match left {
277                            Expression::Cast(_, _)
278                            | Expression::Float(_)
279                            | Expression::Integer(_) => {}
280                            _ => {
281                                return Err(crate::error::parse_led_preceding(format!(
282                                    "encountered - '{:?}'",
283                                    t
284                                )));
285                            }
286                        }
287                        match right {
288                            Expression::Cast(_, _)
289                            | Expression::Float(_)
290                            | Expression::Integer(_) => {}
291                            _ => {
292                                return Err(crate::error::parse_led_following(format!(
293                                    "encountered - '{:?}'",
294                                    t
295                                )));
296                            }
297                        }
298                        // Type enforcement
299                        match (&left, &right) {
300                            (
301                                Expression::Cast(_, ModSym::Flt),
302                                Expression::Cast(_, ModSym::Flt),
303                            ) => {}
304                            (
305                                Expression::Cast(_, ModSym::Int),
306                                Expression::Cast(_, ModSym::Int),
307                            ) => {}
308                            (Expression::Cast(_, ModSym::Flt), Expression::Float(_)) => {}
309                            (Expression::Float(_), Expression::Cast(_, ModSym::Flt)) => {}
310                            (Expression::Cast(_, ModSym::Int), Expression::Integer(_)) => {}
311                            (Expression::Integer(_), Expression::Cast(_, ModSym::Int)) => {}
312                            (_, _) => {
313                                return Err(crate::error::parse_invalid_expr(format!(
314                                    "encountered - '{:?}'",
315                                    t
316                                )));
317                            }
318                        }
319                    }
320                    _ => {}
321                }
322                Ok(Expression::BooleanExpression(
323                    Box::new(left),
324                    symbol,
325                    Box::new(right),
326                ))
327            }
328            Token::Delimiter(_)
329            | Token::Float(_)
330            | Token::Identifier(_)
331            | Token::Integer(_)
332            | Token::Miscellaneous(_)
333            | Token::Modifier(_)
334            | Token::Match(_) => Err(crate::error::parse_invalid_token(format!(
335                "LED encountered - '{:?}'",
336                t
337            ))),
338        },
339        None => Err(crate::error::parse_invalid_token("LED expected token")),
340    }
341}
342
343fn parse_nud<'a, I>(it: &mut Peekable<I>) -> crate::Result<Expression>
344where
345    I: Iterator<Item = &'a Token>,
346{
347    match it.next() {
348        Some(t) => {
349            match *t {
350                Token::Delimiter(ref s) => match *s {
351                    DelSym::LeftParenthesis => {
352                        // Consume up to matching right parenthesis and parse that, we also discard
353                        // the matching right parenthesis
354                        let mut tokens: Vec<Token> = vec![];
355                        let mut depth = 1;
356                        for t in it.by_ref() {
357                            if t == &Token::Delimiter(DelSym::LeftParenthesis) {
358                                depth += 1;
359                            } else if t == &Token::Delimiter(DelSym::RightParenthesis) {
360                                depth -= 1;
361                                if depth == 0 {
362                                    break;
363                                }
364                            }
365                            tokens.push(t.clone());
366                        }
367                        parse(&tokens)
368                    }
369                    DelSym::Comma | DelSym::RightParenthesis => Err(
370                        crate::error::parse_invalid_token(format!("NUD encountered - '{:?}'", t)),
371                    ),
372                },
373                Token::Float(ref n) => Ok(Expression::Float(*n)),
374                Token::Identifier(ref n) => Ok(Expression::Identifier(n.to_string())),
375                Token::Integer(ref n) => Ok(Expression::Integer(*n)),
376                Token::Miscellaneous(ref m) => match *m {
377                    MiscSym::Not => {
378                        let right = parse_expr(it, t.binding_power())?;
379                        match right {
380                            Expression::BooleanGroup(_, _)
381                            | Expression::BooleanExpression(_, _, _)
382                            | Expression::Boolean(_)
383                            | Expression::Identifier(_)
384                            | Expression::Match(_, _)
385                            | Expression::Negate(_)
386                            | Expression::Nested(_, _)
387                            | Expression::Search(_, _, _) => {}
388                            _ => {
389                                return Err(crate::error::parse_invalid_token(
390                                    "NUD expected a negatable expression",
391                                ));
392                            }
393                        }
394                        Ok(Expression::Negate(Box::new(right)))
395                    }
396                },
397                Token::Modifier(ref m) => match *m {
398                    ModSym::Flt => {
399                        // We expect Flt(column_identifier)
400                        if let Some(t) = it.next() {
401                            match *t {
402                                Token::Delimiter(DelSym::LeftParenthesis) => {}
403                                _ => {
404                                    return Err(crate::error::parse_invalid_token(format!(
405                                        "NUD expected left parenthesis - '{:?}'",
406                                        t
407                                    )));
408                                }
409                            }
410                        } else {
411                            return Err(crate::error::parse_invalid_token(
412                                "NUD expected left parenthesis",
413                            ));
414                        }
415                        let token = match it.next() {
416                            Some(t) => t,
417                            None => {
418                                return Err(crate::error::parse_invalid_token(
419                                    "NUD expected column identifier",
420                                ));
421                            }
422                        };
423                        if let Some(t) = it.next() {
424                            match *t {
425                                Token::Delimiter(DelSym::RightParenthesis) => {}
426                                _ => {
427                                    return Err(crate::error::parse_invalid_token(format!(
428                                        "NUD expected right parenthesis - '{:?}'",
429                                        t
430                                    )));
431                                }
432                            }
433                        } else {
434                            return Err(crate::error::parse_invalid_token(
435                                "NUD expected right parenthesis",
436                            ));
437                        }
438                        match *token {
439                            Token::Identifier(ref s) => {
440                                Ok(Expression::Cast(s.to_string(), ModSym::Flt))
441                            }
442                            _ => Err(crate::error::parse_invalid_token(
443                                "NUD expected column identifier",
444                            )),
445                        }
446                    }
447                    ModSym::Int => {
448                        // We expect Int(column_identifier)
449                        if let Some(t) = it.next() {
450                            match *t {
451                                Token::Delimiter(DelSym::LeftParenthesis) => {}
452                                _ => {
453                                    return Err(crate::error::parse_invalid_token(format!(
454                                        "NUD expected left parenthesis - '{:?}'",
455                                        t
456                                    )));
457                                }
458                            }
459                        } else {
460                            return Err(crate::error::parse_invalid_token(
461                                "NUD expected left parenthesis",
462                            ));
463                        }
464                        let token = match it.next() {
465                            Some(t) => t,
466                            None => {
467                                return Err(crate::error::parse_invalid_token(
468                                    "NUD expected column identifier",
469                                ));
470                            }
471                        };
472                        if let Some(t) = it.next() {
473                            match *t {
474                                Token::Delimiter(DelSym::RightParenthesis) => {}
475                                _ => {
476                                    return Err(crate::error::parse_invalid_token(format!(
477                                        "NUD expected right parenthesis - '{:?}'",
478                                        t
479                                    )));
480                                }
481                            }
482                        } else {
483                            return Err(crate::error::parse_invalid_token(
484                                "NUD expected right parenthesis",
485                            ));
486                        }
487                        match *token {
488                            Token::Identifier(ref s) => {
489                                Ok(Expression::Cast(s.to_string(), ModSym::Int))
490                            }
491                            _ => Err(crate::error::parse_invalid_token(
492                                "NUD expected column identifier",
493                            )),
494                        }
495                    }
496                    ModSym::Not => {
497                        // We expect Int(column_identifier)
498                        if let Some(t) = it.next() {
499                            match *t {
500                                Token::Delimiter(DelSym::LeftParenthesis) => {}
501                                _ => {
502                                    return Err(crate::error::parse_invalid_token(format!(
503                                        "NUD expected left parenthesis - '{:?}'",
504                                        t
505                                    )));
506                                }
507                            }
508                        } else {
509                            return Err(crate::error::parse_invalid_token(
510                                "NUD expected left parenthesis",
511                            ));
512                        }
513                        let token = match it.next() {
514                            Some(t) => t,
515                            None => {
516                                return Err(crate::error::parse_invalid_token(
517                                    "NUD expected column identifier",
518                                ));
519                            }
520                        };
521                        if let Some(t) = it.next() {
522                            match *t {
523                                Token::Delimiter(DelSym::RightParenthesis) => {}
524                                _ => {
525                                    return Err(crate::error::parse_invalid_token(format!(
526                                        "NUD expected right parenthesis - '{:?}'",
527                                        t
528                                    )));
529                                }
530                            }
531                        } else {
532                            return Err(crate::error::parse_invalid_token(
533                                "NUD expected right parenthesis",
534                            ));
535                        }
536                        match *token {
537                            Token::Identifier(ref s) => {
538                                Ok(Expression::Cast(s.to_string(), ModSym::Not))
539                            }
540                            _ => Err(crate::error::parse_invalid_token(
541                                "NUD expected column identifier",
542                            )),
543                        }
544                    }
545                    ModSym::Str => {
546                        // We expect string(column_identifier)
547                        if let Some(t) = it.next() {
548                            match *t {
549                                Token::Delimiter(DelSym::LeftParenthesis) => {}
550                                _ => {
551                                    return Err(crate::error::parse_invalid_token(format!(
552                                        "NUD expected left parenthesis - '{:?}'",
553                                        t
554                                    )));
555                                }
556                            }
557                        } else {
558                            return Err(crate::error::parse_invalid_token(
559                                "NUD expected left parenthesis",
560                            ));
561                        }
562                        let token = match it.next() {
563                            Some(t) => t,
564                            None => {
565                                return Err(crate::error::parse_invalid_token(
566                                    "NUD expected column identifier",
567                                ));
568                            }
569                        };
570                        if let Some(t) = it.next() {
571                            match *t {
572                                Token::Delimiter(DelSym::RightParenthesis) => {}
573                                _ => {
574                                    return Err(crate::error::parse_invalid_token(format!(
575                                        "NUD expected right parenthesis - '{:?}'",
576                                        t
577                                    )));
578                                }
579                            }
580                        } else {
581                            return Err(crate::error::parse_invalid_token(
582                                "NUD expected right parenthesis",
583                            ));
584                        }
585                        match *token {
586                            Token::Identifier(ref s) => {
587                                Ok(Expression::Cast(s.to_string(), ModSym::Str))
588                            }
589                            _ => Err(crate::error::parse_invalid_token(
590                                "NUD expected column identifier",
591                            )),
592                        }
593                    }
594                },
595                Token::Match(ref m) => match *m {
596                    MatchSym::All => {
597                        // We expect all(column_identifier)
598                        if let Some(t) = it.next() {
599                            match *t {
600                                Token::Delimiter(DelSym::LeftParenthesis) => {}
601                                _ => {
602                                    return Err(crate::error::parse_invalid_token(format!(
603                                        "NUD expected left parenthesis - '{:?}'",
604                                        t
605                                    )));
606                                }
607                            }
608                        } else {
609                            return Err(crate::error::parse_invalid_token(
610                                "NUD expected left parenthesis",
611                            ));
612                        }
613                        let token = match it.next() {
614                            Some(t) => t,
615                            None => {
616                                return Err(crate::error::parse_invalid_token(
617                                    "NUD expected column identifier",
618                                ));
619                            }
620                        };
621                        if let Some(t) = it.next() {
622                            match *t {
623                                Token::Delimiter(DelSym::RightParenthesis) => {}
624                                _ => {
625                                    return Err(crate::error::parse_invalid_token(format!(
626                                        "NUD expected right parenthesis - '{:?}'",
627                                        t
628                                    )));
629                                }
630                            }
631                        } else {
632                            return Err(crate::error::parse_invalid_token(
633                                "NUD expected right parenthesis",
634                            ));
635                        }
636                        match *token {
637                            Token::Identifier(ref s) => Ok(Expression::Match(
638                                Match::All,
639                                Box::new(Expression::Identifier(s.to_string())),
640                            )),
641                            _ => Err(crate::error::parse_invalid_token(
642                                "NUD expected column identifier",
643                            )),
644                        }
645                    }
646                    MatchSym::Of => {
647                        // We expect of(column_identifier, 1)
648                        if let Some(t) = it.next() {
649                            match *t {
650                                Token::Delimiter(DelSym::LeftParenthesis) => {}
651                                _ => {
652                                    return Err(crate::error::parse_invalid_token(format!(
653                                        "NUD expected left parenthesis - '{:?}'",
654                                        t
655                                    )));
656                                }
657                            }
658                        } else {
659                            return Err(crate::error::parse_invalid_token(
660                                "NUD expected left parenthesis",
661                            ));
662                        }
663                        let token = match it.next() {
664                            Some(t) => t,
665                            None => {
666                                return Err(crate::error::parse_invalid_token(
667                                    "NUD expected column identifier",
668                                ));
669                            }
670                        };
671                        if let Some(t) = it.next() {
672                            match *t {
673                                Token::Delimiter(DelSym::Comma) => {}
674                                _ => {
675                                    return Err(crate::error::parse_invalid_token(format!(
676                                        "NUD expected comma - '{:?}'",
677                                        t
678                                    )));
679                                }
680                            }
681                        } else {
682                            return Err(crate::error::parse_invalid_token("NUD expected comma"));
683                        }
684                        let count = match it.next() {
685                            Some(t) => match t {
686                                Token::Integer(c) => match u64::try_from(*c) {
687                                    Ok(u) => u,
688                                    Err(_) => {
689                                        return Err(crate::error::parse_invalid_token(format!(
690                                            "NUD expected positive integer - '{:?}'",
691                                            t
692                                        )));
693                                    }
694                                },
695                                _ => {
696                                    return Err(crate::error::parse_invalid_token(format!(
697                                        "NUD expected integer - '{:?}'",
698                                        t
699                                    )));
700                                }
701                            },
702                            None => {
703                                return Err(crate::error::parse_invalid_token(
704                                    "NUD expected integer",
705                                ));
706                            }
707                        };
708                        if let Some(t) = it.next() {
709                            match *t {
710                                Token::Delimiter(DelSym::RightParenthesis) => {}
711                                _ => {
712                                    return Err(crate::error::parse_invalid_token(format!(
713                                        "NUD expected right parenthesis - '{:?}'",
714                                        t
715                                    )));
716                                }
717                            }
718                        } else {
719                            return Err(crate::error::parse_invalid_token(
720                                "NUD expected right parenthesis",
721                            ));
722                        }
723                        match *token {
724                            Token::Identifier(ref s) => Ok(Expression::Match(
725                                Match::Of(count),
726                                Box::new(Expression::Identifier(s.to_string())),
727                            )),
728                            _ => Err(crate::error::parse_invalid_token(
729                                "NUD expected column identifier",
730                            )),
731                        }
732                    }
733                },
734                Token::Operator(_) => Err(crate::error::parse_invalid_token(format!(
735                    "NUD encountered - '{:?}'",
736                    t
737                ))),
738            }
739        }
740        None => Err(crate::error::parse_invalid_token("NUD expected token")),
741    }
742}
743
744pub fn parse_identifier(yaml: &Yaml) -> crate::Result<Expression> {
745    match yaml {
746        Yaml::Mapping(m) => parse_mapping(m),
747        Yaml::Sequence(s) => {
748            // We allow a sequence of maps only on the root
749            let mut it = s.iter();
750            match it.next() {
751                Some(v) => match &v {
752                    Yaml::Mapping(m) => {
753                        let mut expressions = vec![parse_mapping(m)?];
754                        for value in it {
755                            // NOTE: A sequence can only be one type
756                            if let Yaml::Mapping(mapping) = value {
757                                expressions.push(parse_mapping(mapping)?);
758                            } else {
759                                return Err(crate::error::parse_invalid_ident(format!(
760                                    "expected a sequence of mappings, encountered - {:?}",
761                                    yaml
762                                )));
763                            }
764                        }
765                        Ok(Expression::BooleanGroup(BoolSym::Or, expressions))
766                    }
767                    _ => Err(crate::error::parse_invalid_ident(format!(
768                        "expected a sequence of mappings, encountered - {:?}",
769                        yaml
770                    ))),
771                },
772                None => Err(crate::error::parse_invalid_ident(format!(
773                    "expected a non empty sequence of mappings, encountered - {:?}",
774                    yaml
775                ))),
776            }
777        }
778        _ => Err(crate::error::parse_invalid_ident(format!(
779            "expected mapping or sequence, encountered - {:?}",
780            yaml
781        ))),
782    }
783}
784
785// TODO: Extract common code and try to make this function a little bit more readable
786fn parse_mapping(mapping: &Mapping) -> crate::Result<Expression> {
787    let mut expressions = vec![];
788    for (k, v) in mapping {
789        let mut misc: Option<ModSym> = None;
790        let (e, f) = match k {
791            Yaml::String(s) => {
792                // NOTE: Tokenise splits on whitespace, but this is undesired for keys, merge them
793                // back together
794                let mut identifier = vec![];
795                let mut tokens = vec![];
796                for token in s.tokenise()? {
797                    match token {
798                        Token::Identifier(s) => identifier.push(s),
799                        _ => {
800                            if !identifier.is_empty() {
801                                tokens.push(Token::Identifier(identifier.join(" ")));
802                                identifier.clear();
803                            }
804                            tokens.push(token);
805                        }
806                    }
807                }
808                if !identifier.is_empty() {
809                    tokens.push(Token::Identifier(identifier.join(" ")));
810                    identifier.clear();
811                }
812                let expr = parse(&tokens)?;
813                let (e, s) = match expr {
814                    Expression::Cast(f, s) => {
815                        misc = Some(s.clone());
816                        match s {
817                            ModSym::Flt => (Expression::Cast(f.clone(), s), f),
818                            ModSym::Int => (Expression::Cast(f.clone(), s), f),
819                            ModSym::Not => (Expression::Field(f.clone()), f),
820                            ModSym::Str => (Expression::Cast(f.clone(), s), f),
821                        }
822                    }
823                    Expression::Identifier(s) => (Expression::Field(s.clone()), s),
824                    Expression::Match(m, i) => {
825                        if let Yaml::Sequence(_) = v {
826                            match *i {
827                                Expression::Identifier(s) => (
828                                    Expression::Match(m, Box::new(Expression::Field(s.clone()))),
829                                    s,
830                                ),
831                                _ => {
832                                    return Err(crate::error::parse_invalid_ident(format!(
833                                        "match condition mut contain a field, encountered - {:?}",
834                                        k
835                                    )));
836                                }
837                            }
838                        } else {
839                            return Err(crate::error::parse_invalid_ident(format!(
840                                "match condition is only valid for sequences, encountered - {:?}",
841                                k
842                            )));
843                        }
844                    }
845                    _ => {
846                        return Err(crate::error::parse_invalid_ident(format!(
847                            "mapping key must be a string or valid match condition, encountered - {:?}",
848                            k
849                        )));
850                    }
851                };
852                (e, s)
853            }
854            _ => {
855                return Err(crate::error::parse_invalid_ident(format!(
856                    "mapping key must be a string, encountered - {:?}",
857                    k
858                )));
859            }
860        };
861        let expression = match v {
862            Yaml::Bool(b) => {
863                if let Some(ModSym::Int) = misc {
864                    Expression::BooleanExpression(
865                        Box::new(e.clone()),
866                        BoolSym::Equal,
867                        Box::new(Expression::Integer(if *b { 1 } else { 0 })),
868                    )
869                } else if let Some(ModSym::Str) = misc {
870                    Expression::Search(Search::Exact(b.to_string()), f.to_owned(), true)
871                } else {
872                    Expression::BooleanExpression(
873                        Box::new(e.clone()),
874                        BoolSym::Equal,
875                        Box::new(Expression::Boolean(*b)),
876                    )
877                }
878            }
879            Yaml::Number(n) => {
880                if let Some(i) = n.as_i64() {
881                    if let Some(ModSym::Str) = misc {
882                        Expression::Search(Search::Exact(i.to_string()), f.to_owned(), true)
883                    } else {
884                        Expression::BooleanExpression(
885                            Box::new(e.clone()),
886                            BoolSym::Equal,
887                            Box::new(Expression::Integer(i)),
888                        )
889                    }
890                } else if let Some(i) = n.as_f64() {
891                    if let Some(ModSym::Int) = misc {
892                        return Err(crate::error::parse_invalid_ident(format!(
893                            "float cannot be cast into an integer, encountered - {:?}",
894                            k
895                        )));
896                    } else if let Some(ModSym::Str) = misc {
897                        Expression::Search(Search::Exact(i.to_string()), f.to_owned(), true)
898                    } else {
899                        Expression::BooleanExpression(
900                            Box::new(e.clone()),
901                            BoolSym::Equal,
902                            Box::new(Expression::Float(i)),
903                        )
904                    }
905                } else {
906                    return Err(crate::error::parse_invalid_ident(format!(
907                        "number must be a signed integer or float, encountered - {:?}",
908                        k
909                    )));
910                }
911            }
912            Yaml::Null => Expression::BooleanExpression(
913                Box::new(e.clone()),
914                BoolSym::Equal,
915                Box::new(Expression::Null),
916            ),
917            Yaml::String(s) => {
918                let identifier = s.to_owned().into_identifier()?;
919                let mut cast = false;
920                if let Some(ref m) = misc {
921                    if let ModSym::Str = m {
922                        cast = true;
923                    }
924                    match &identifier.pattern {
925                        Pattern::Any
926                        | Pattern::Regex(_)
927                        | Pattern::Contains(_)
928                        | Pattern::EndsWith(_)
929                        | Pattern::Exact(_)
930                        | Pattern::StartsWith(_) => {
931                            if let ModSym::Int = m {
932                                return Err(crate::error::parse_invalid_ident(format!(
933                                    "cannot cast string to integer, encountered - {:?}",
934                                    k
935                                )));
936                            }
937                        }
938                        Pattern::Equal(_)
939                        | Pattern::GreaterThan(_)
940                        | Pattern::GreaterThanOrEqual(_)
941                        | Pattern::LessThan(_)
942                        | Pattern::LessThanOrEqual(_)
943                        | Pattern::FEqual(_)
944                        | Pattern::FGreaterThan(_)
945                        | Pattern::FGreaterThanOrEqual(_)
946                        | Pattern::FLessThan(_)
947                        | Pattern::FLessThanOrEqual(_) => {
948                            if let ModSym::Str = m {
949                                return Err(crate::error::parse_invalid_ident(format!(
950                                    "cannot cast integer to string, encountered - {:?}",
951                                    k
952                                )));
953                            }
954                        }
955                    }
956                }
957                match identifier.pattern {
958                    Pattern::Equal(i) => Expression::BooleanExpression(
959                        Box::new(e.clone()),
960                        BoolSym::Equal,
961                        Box::new(Expression::Integer(i)),
962                    ),
963                    Pattern::GreaterThan(i) => Expression::BooleanExpression(
964                        Box::new(e.clone()),
965                        BoolSym::GreaterThan,
966                        Box::new(Expression::Integer(i)),
967                    ),
968                    Pattern::GreaterThanOrEqual(i) => Expression::BooleanExpression(
969                        Box::new(e.clone()),
970                        BoolSym::GreaterThanOrEqual,
971                        Box::new(Expression::Integer(i)),
972                    ),
973                    Pattern::LessThan(i) => Expression::BooleanExpression(
974                        Box::new(e.clone()),
975                        BoolSym::LessThan,
976                        Box::new(Expression::Integer(i)),
977                    ),
978                    Pattern::LessThanOrEqual(i) => Expression::BooleanExpression(
979                        Box::new(e.clone()),
980                        BoolSym::LessThanOrEqual,
981                        Box::new(Expression::Integer(i)),
982                    ),
983                    Pattern::FEqual(i) => Expression::BooleanExpression(
984                        Box::new(e.clone()),
985                        BoolSym::Equal,
986                        Box::new(Expression::Float(i)),
987                    ),
988                    Pattern::FGreaterThan(i) => Expression::BooleanExpression(
989                        Box::new(e.clone()),
990                        BoolSym::GreaterThan,
991                        Box::new(Expression::Float(i)),
992                    ),
993                    Pattern::FGreaterThanOrEqual(i) => Expression::BooleanExpression(
994                        Box::new(e.clone()),
995                        BoolSym::GreaterThanOrEqual,
996                        Box::new(Expression::Float(i)),
997                    ),
998                    Pattern::FLessThan(i) => Expression::BooleanExpression(
999                        Box::new(e.clone()),
1000                        BoolSym::LessThan,
1001                        Box::new(Expression::Float(i)),
1002                    ),
1003                    Pattern::FLessThanOrEqual(i) => Expression::BooleanExpression(
1004                        Box::new(e.clone()),
1005                        BoolSym::LessThanOrEqual,
1006                        Box::new(Expression::Float(i)),
1007                    ),
1008                    Pattern::Any => Expression::Search(Search::Any, f.to_owned(), cast),
1009                    Pattern::Regex(c) => Expression::Search(
1010                        Search::Regex(c, identifier.ignore_case),
1011                        f.to_owned(),
1012                        cast,
1013                    ),
1014                    Pattern::Contains(c) => Expression::Search(
1015                        if identifier.ignore_case {
1016                            Search::AhoCorasick(
1017                                Box::new(
1018                                    AhoCorasickBuilder::new()
1019                                        .ascii_case_insensitive(true)
1020                                        .kind(Some(AhoCorasickKind::DFA))
1021                                        .build(vec![c.clone()])
1022                                        .expect("failed to build dfa"),
1023                                ),
1024                                vec![MatchType::Contains(c)],
1025                                true,
1026                            )
1027                        } else {
1028                            Search::Contains(c)
1029                        },
1030                        f.to_owned(),
1031                        cast,
1032                    ),
1033                    Pattern::EndsWith(c) => Expression::Search(
1034                        if identifier.ignore_case {
1035                            Search::AhoCorasick(
1036                                Box::new(
1037                                    AhoCorasickBuilder::new()
1038                                        .ascii_case_insensitive(true)
1039                                        .kind(Some(AhoCorasickKind::DFA))
1040                                        .build(vec![c.clone()])
1041                                        .expect("failed to build dfa"),
1042                                ),
1043                                vec![MatchType::EndsWith(c)],
1044                                true,
1045                            )
1046                        } else {
1047                            Search::EndsWith(c)
1048                        },
1049                        f.to_owned(),
1050                        cast,
1051                    ),
1052                    Pattern::Exact(c) => Expression::Search(
1053                        if !c.is_empty() && identifier.ignore_case {
1054                            Search::AhoCorasick(
1055                                Box::new(
1056                                    AhoCorasickBuilder::new()
1057                                        .ascii_case_insensitive(true)
1058                                        .kind(Some(AhoCorasickKind::DFA))
1059                                        .build(vec![c.clone()])
1060                                        .expect("failed to build dfa"),
1061                                ),
1062                                vec![MatchType::Exact(c)],
1063                                true,
1064                            )
1065                        } else {
1066                            Search::Exact(c)
1067                        },
1068                        f.to_owned(),
1069                        cast,
1070                    ),
1071                    Pattern::StartsWith(c) => Expression::Search(
1072                        if identifier.ignore_case {
1073                            Search::AhoCorasick(
1074                                Box::new(
1075                                    AhoCorasickBuilder::new()
1076                                        .ascii_case_insensitive(true)
1077                                        .kind(Some(AhoCorasickKind::DFA))
1078                                        .build(vec![c.clone()])
1079                                        .expect("failed to build dfa"),
1080                                ),
1081                                vec![MatchType::StartsWith(c)],
1082                                true,
1083                            )
1084                        } else {
1085                            Search::StartsWith(c)
1086                        },
1087                        f.to_owned(),
1088                        cast,
1089                    ),
1090                }
1091            }
1092            Yaml::Mapping(m) => {
1093                if misc.is_some() {
1094                    return Err(crate::error::parse_invalid_ident(format!(
1095                        "nested mappings are not supported when casting or negating a field, encountered - {:?}",
1096                        k
1097                    )));
1098                }
1099                Expression::Nested(f.to_owned(), Box::new(parse_mapping(m)?))
1100            }
1101            Yaml::Sequence(s) => {
1102                // TODO: This block could probably be cleaned...
1103                // Now we need to be as fast as possible it turns out that builtin strings functions are
1104                // fastest when we only need to check a single condition, when we need to check more that
1105                // one AhoCorasick becomes the quicker, even though AC should be as fast as starts_with and
1106                // contains... We also want to order in terms of quickest on average:
1107                //
1108                //  1. ExactMatch
1109                //  2. StartsWith
1110                //  3. EndsWith
1111                //  4. Contains
1112                //  5. AhoCorasick
1113                //  6. Regex
1114                //
1115                //  And for the above use AhoCorasick when list is more than one for 2,3,4
1116                let mut exact: Vec<Identifier> = vec![];
1117                let mut starts_with: Vec<Identifier> = vec![];
1118                let mut ends_with: Vec<Identifier> = vec![];
1119                let mut contains: Vec<Identifier> = vec![];
1120                let mut regex: Vec<Identifier> = vec![];
1121                let mut rest: Vec<Expression> = vec![]; // NOTE: Don't care about speed of numbers atm
1122
1123                let mut boolean = false;
1124                let mut cast = false;
1125                let mut mapping = false;
1126                let mut number = false;
1127                let mut string = false;
1128
1129                let unmatched_e = if let Expression::Match(_, e) = &e {
1130                    *e.clone()
1131                } else {
1132                    e.clone()
1133                };
1134
1135                for value in s {
1136                    let identifier = match value {
1137                        Yaml::Bool(b) => {
1138                            if let Some(ModSym::Int) = misc {
1139                                number = true;
1140                                rest.push(Expression::BooleanExpression(
1141                                    Box::new(unmatched_e.clone()),
1142                                    BoolSym::Equal,
1143                                    Box::new(Expression::Integer(if *b { 1 } else { 0 })),
1144                                ))
1145                            } else if let Some(ModSym::Str) = misc {
1146                                string = true;
1147                                exact.push(Identifier {
1148                                    ignore_case: false,
1149                                    pattern: Pattern::Exact(b.to_string()),
1150                                });
1151                            } else {
1152                                boolean = true;
1153                                rest.push(Expression::BooleanExpression(
1154                                    Box::new(e.clone()),
1155                                    BoolSym::Equal,
1156                                    Box::new(Expression::Boolean(*b)),
1157                                ))
1158                            }
1159                            continue;
1160                        }
1161                        Yaml::Null => {
1162                            rest.push(Expression::BooleanExpression(
1163                                Box::new(unmatched_e.clone()),
1164                                BoolSym::Equal,
1165                                Box::new(Expression::Null),
1166                            ));
1167                            continue;
1168                        }
1169                        Yaml::Number(n) => {
1170                            if let Some(i) = n.as_i64() {
1171                                if let Some(ModSym::Str) = misc {
1172                                    string = true;
1173                                    exact.push(Identifier {
1174                                        ignore_case: false,
1175                                        pattern: Pattern::Exact(i.to_string()),
1176                                    });
1177                                } else {
1178                                    number = true;
1179                                    rest.push(Expression::BooleanExpression(
1180                                        Box::new(e.clone()),
1181                                        BoolSym::Equal,
1182                                        Box::new(Expression::Integer(i)),
1183                                    ));
1184                                }
1185                                continue;
1186                            } else if let Some(i) = n.as_f64() {
1187                                if let Some(ModSym::Int) = misc {
1188                                    return Err(crate::error::parse_invalid_ident(format!(
1189                                        "float cannot be cast into an integer, encountered - {:?}",
1190                                        k
1191                                    )));
1192                                } else if let Some(ModSym::Str) = misc {
1193                                    string = true;
1194                                    exact.push(Identifier {
1195                                        ignore_case: false,
1196                                        pattern: Pattern::Exact(i.to_string()),
1197                                    });
1198                                } else {
1199                                    number = true;
1200                                    rest.push(Expression::BooleanExpression(
1201                                        Box::new(e.clone()),
1202                                        BoolSym::Equal,
1203                                        Box::new(Expression::Float(i)),
1204                                    ))
1205                                }
1206                                continue;
1207                            }
1208                            return Err(crate::error::parse_invalid_ident(format!(
1209                                "number must be a signed integer or float, encountered - {:?}",
1210                                k
1211                            )));
1212                        }
1213                        Yaml::String(s) => s.clone().into_identifier()?,
1214
1215                        Yaml::Mapping(m) => {
1216                            if misc.is_some() {
1217                                return Err(crate::error::parse_invalid_ident(format!(
1218                                    "nested mappings are not supported when casting or negating a field, encountered - {:?}",
1219                                    k
1220                                )));
1221                            }
1222                            mapping = true;
1223                            // FIXME: We should be nesting at the end of the squence, currently we
1224                            // have to shake to remove this...
1225                            rest.push(Expression::Nested(
1226                                f.to_owned(),
1227                                Box::new(parse_mapping(m)?),
1228                            ));
1229                            continue;
1230                        }
1231                        _ => {
1232                            return Err(crate::error::parse_invalid_ident(format!(
1233                                "value must be a mapping or string, encountered - {:?}",
1234                                k
1235                            )));
1236                        }
1237                    };
1238                    if let Some(ref m) = misc {
1239                        if let ModSym::Str = m {
1240                            cast = true;
1241                        }
1242                        match &identifier.pattern {
1243                            Pattern::Any
1244                            | Pattern::Regex(_)
1245                            | Pattern::Contains(_)
1246                            | Pattern::EndsWith(_)
1247                            | Pattern::Exact(_)
1248                            | Pattern::StartsWith(_) => {
1249                                if let ModSym::Int = m {
1250                                    return Err(crate::error::parse_invalid_ident(format!(
1251                                        "cannot cast string to integer, encountered - {:?}",
1252                                        k
1253                                    )));
1254                                }
1255                            }
1256                            Pattern::Equal(_)
1257                            | Pattern::GreaterThan(_)
1258                            | Pattern::GreaterThanOrEqual(_)
1259                            | Pattern::LessThan(_)
1260                            | Pattern::LessThanOrEqual(_)
1261                            | Pattern::FEqual(_)
1262                            | Pattern::FGreaterThan(_)
1263                            | Pattern::FGreaterThanOrEqual(_)
1264                            | Pattern::FLessThan(_)
1265                            | Pattern::FLessThanOrEqual(_) => {
1266                                if let ModSym::Str = m {
1267                                    return Err(crate::error::parse_invalid_ident(format!(
1268                                        "cannot cast integer to string, encountered - {:?}",
1269                                        k
1270                                    )));
1271                                }
1272                            }
1273                        }
1274                    }
1275                    match identifier.pattern {
1276                        Pattern::Exact(_) => {
1277                            string = true;
1278                            exact.push(identifier)
1279                        }
1280                        Pattern::StartsWith(_) => {
1281                            string = true;
1282                            starts_with.push(identifier)
1283                        }
1284                        Pattern::EndsWith(_) => {
1285                            string = true;
1286                            ends_with.push(identifier)
1287                        }
1288                        Pattern::Contains(_) => {
1289                            string = true;
1290                            contains.push(identifier)
1291                        }
1292                        Pattern::Regex(_) => {
1293                            string = true;
1294                            regex.push(identifier)
1295                        }
1296                        Pattern::Any => {
1297                            string = true;
1298                            rest.push(Expression::Search(Search::Any, f.to_owned(), cast))
1299                        }
1300                        Pattern::Equal(i) => {
1301                            number = true;
1302                            rest.push(Expression::BooleanExpression(
1303                                Box::new(e.clone()),
1304                                BoolSym::Equal,
1305                                Box::new(Expression::Integer(i)),
1306                            ))
1307                        }
1308                        Pattern::GreaterThan(i) => {
1309                            number = true;
1310                            rest.push(Expression::BooleanExpression(
1311                                Box::new(e.clone()),
1312                                BoolSym::GreaterThan,
1313                                Box::new(Expression::Integer(i)),
1314                            ))
1315                        }
1316                        Pattern::GreaterThanOrEqual(i) => {
1317                            number = true;
1318                            rest.push(Expression::BooleanExpression(
1319                                Box::new(e.clone()),
1320                                BoolSym::GreaterThanOrEqual,
1321                                Box::new(Expression::Integer(i)),
1322                            ))
1323                        }
1324                        Pattern::LessThan(i) => {
1325                            number = true;
1326                            rest.push(Expression::BooleanExpression(
1327                                Box::new(e.clone()),
1328                                BoolSym::LessThan,
1329                                Box::new(Expression::Integer(i)),
1330                            ))
1331                        }
1332                        Pattern::LessThanOrEqual(i) => {
1333                            number = true;
1334                            rest.push(Expression::BooleanExpression(
1335                                Box::new(e.clone()),
1336                                BoolSym::LessThanOrEqual,
1337                                Box::new(Expression::Integer(i)),
1338                            ))
1339                        }
1340                        Pattern::FEqual(i) => {
1341                            number = true;
1342                            rest.push(Expression::BooleanExpression(
1343                                Box::new(e.clone()),
1344                                BoolSym::Equal,
1345                                Box::new(Expression::Float(i)),
1346                            ))
1347                        }
1348                        Pattern::FGreaterThan(i) => {
1349                            number = true;
1350                            rest.push(Expression::BooleanExpression(
1351                                Box::new(e.clone()),
1352                                BoolSym::GreaterThan,
1353                                Box::new(Expression::Float(i)),
1354                            ))
1355                        }
1356                        Pattern::FGreaterThanOrEqual(i) => {
1357                            number = true;
1358                            rest.push(Expression::BooleanExpression(
1359                                Box::new(e.clone()),
1360                                BoolSym::GreaterThanOrEqual,
1361                                Box::new(Expression::Float(i)),
1362                            ))
1363                        }
1364                        Pattern::FLessThan(i) => {
1365                            number = true;
1366                            rest.push(Expression::BooleanExpression(
1367                                Box::new(e.clone()),
1368                                BoolSym::LessThan,
1369                                Box::new(Expression::Float(i)),
1370                            ))
1371                        }
1372                        Pattern::FLessThanOrEqual(i) => {
1373                            number = true;
1374                            rest.push(Expression::BooleanExpression(
1375                                Box::new(e.clone()),
1376                                BoolSym::LessThanOrEqual,
1377                                Box::new(Expression::Float(i)),
1378                            ))
1379                        }
1380                    }
1381                }
1382                let mut multiple = false;
1383                let mut group: Vec<Expression> = vec![];
1384                let mut context: Vec<MatchType> = vec![];
1385                let mut needles: Vec<String> = vec![];
1386                let mut icontext: Vec<MatchType> = vec![];
1387                let mut ineedles: Vec<String> = vec![];
1388                let mut regex_set: Vec<Regex> = vec![];
1389                let mut iregex_set: Vec<Regex> = vec![];
1390                for i in starts_with.into_iter() {
1391                    if let Pattern::StartsWith(s) = i.pattern {
1392                        if i.ignore_case {
1393                            icontext.push(MatchType::StartsWith(s.clone()));
1394                            ineedles.push(s);
1395                        } else {
1396                            context.push(MatchType::StartsWith(s.clone()));
1397                            needles.push(s);
1398                        }
1399                    }
1400                }
1401                for i in contains.into_iter() {
1402                    if let Pattern::Contains(s) = i.pattern {
1403                        if i.ignore_case {
1404                            icontext.push(MatchType::Contains(s.clone()));
1405                            ineedles.push(s);
1406                        } else {
1407                            context.push(MatchType::Contains(s.clone()));
1408                            needles.push(s);
1409                        }
1410                    }
1411                }
1412                for i in ends_with.into_iter() {
1413                    if let Pattern::EndsWith(s) = i.pattern {
1414                        if i.ignore_case {
1415                            icontext.push(MatchType::EndsWith(s.clone()));
1416                            ineedles.push(s);
1417                        } else {
1418                            context.push(MatchType::EndsWith(s.clone()));
1419                            needles.push(s);
1420                        }
1421                    }
1422                }
1423                for i in exact.into_iter() {
1424                    if let Pattern::Exact(s) = i.pattern {
1425                        // NOTE: Do not allow empty string into the needles as it causes massive slow down,
1426                        // don't ask me why I have not looked into it!
1427                        if s.is_empty() {
1428                            group.push(Expression::Search(Search::Exact(s), f.to_owned(), cast));
1429                        } else if i.ignore_case {
1430                            icontext.push(MatchType::Exact(s.clone()));
1431                            ineedles.push(s);
1432                        } else {
1433                            context.push(MatchType::Exact(s.clone()));
1434                            needles.push(s);
1435                        }
1436                    }
1437                }
1438                for i in regex.into_iter() {
1439                    if let Pattern::Regex(r) = i.pattern {
1440                        if i.ignore_case {
1441                            iregex_set.push(r);
1442                        } else {
1443                            regex_set.push(r);
1444                        }
1445                    }
1446                }
1447                if !needles.is_empty() {
1448                    if needles.len() == 1 {
1449                        let s = match context.into_iter().next().expect("failed to get context") {
1450                            MatchType::Contains(c) => Search::Contains(c),
1451                            MatchType::EndsWith(c) => Search::EndsWith(c),
1452                            MatchType::Exact(c) => Search::Exact(c),
1453                            MatchType::StartsWith(c) => Search::StartsWith(c),
1454                        };
1455                        group.push(Expression::Search(s, f.to_owned(), cast));
1456                    } else {
1457                        multiple = true;
1458                        group.push(Expression::Search(
1459                            Search::AhoCorasick(
1460                                Box::new(
1461                                    AhoCorasickBuilder::new()
1462                                        .kind(Some(AhoCorasickKind::DFA))
1463                                        .build(needles)
1464                                        .expect("failed to build dfa"),
1465                                ),
1466                                context,
1467                                false,
1468                            ),
1469                            f.to_owned(),
1470                            cast,
1471                        ));
1472                    }
1473                }
1474                if !ineedles.is_empty() {
1475                    multiple = true;
1476                    group.push(Expression::Search(
1477                        Search::AhoCorasick(
1478                            Box::new(
1479                                AhoCorasickBuilder::new()
1480                                    .ascii_case_insensitive(true)
1481                                    .kind(Some(AhoCorasickKind::DFA))
1482                                    .build(ineedles)
1483                                    .expect("failed to build dfa"),
1484                            ),
1485                            icontext,
1486                            true,
1487                        ),
1488                        f.to_owned(),
1489                        cast,
1490                    ));
1491                }
1492                if !regex_set.is_empty() {
1493                    if regex_set.len() == 1 {
1494                        group.push(Expression::Search(
1495                            Search::Regex(
1496                                regex_set.into_iter().next().expect("failed to get regex"),
1497                                false,
1498                            ),
1499                            f.to_owned(),
1500                            cast,
1501                        ));
1502                    } else {
1503                        multiple = true;
1504                        group.push(Expression::Search(
1505                            Search::RegexSet(
1506                                RegexSetBuilder::new(
1507                                    regex_set
1508                                        .into_iter()
1509                                        .map(|r| r.as_str().to_string())
1510                                        .collect::<Vec<_>>(),
1511                                )
1512                                .build()
1513                                .expect("could not build regex set"),
1514                                false,
1515                            ),
1516                            f.to_owned(),
1517                            cast,
1518                        ));
1519                    }
1520                }
1521                if !iregex_set.is_empty() {
1522                    if iregex_set.len() == 1 {
1523                        group.push(Expression::Search(
1524                            Search::Regex(
1525                                iregex_set.into_iter().next().expect("failed to get regex"),
1526                                true,
1527                            ),
1528                            f.to_owned(),
1529                            cast,
1530                        ));
1531                    } else {
1532                        multiple = true;
1533                        group.push(Expression::Search(
1534                            Search::RegexSet(
1535                                RegexSetBuilder::new(
1536                                    iregex_set
1537                                        .into_iter()
1538                                        .map(|r| r.as_str().to_string())
1539                                        .collect::<Vec<_>>(),
1540                                )
1541                                .case_insensitive(true)
1542                                .build()
1543                                .expect("could not build regex set"),
1544                                true,
1545                            ),
1546                            f.to_owned(),
1547                            cast,
1548                        ));
1549                    }
1550                }
1551                group.extend(rest);
1552                if let Expression::Match(Match::All, _) | Expression::Match(Match::Of(_), _) = &e {
1553                    if boolean as i32 + mapping as i32 + number as i32 + string as i32 > 1 {
1554                        return Err(crate::error::parse_invalid_ident(
1555                            "when using sequence modifiers the all expressions must be of the same type",
1556                        ));
1557                    }
1558                }
1559                if let Some(misc) = &misc {
1560                    if let ModSym::Int = misc {
1561                        if boolean || mapping || string {
1562                            return Err(crate::error::parse_invalid_ident(
1563                                "when casting to int all expressions must be of type int",
1564                            ));
1565                        }
1566                    }
1567                    if let ModSym::Str = &misc {
1568                        if boolean || mapping || number {
1569                            return Err(crate::error::parse_invalid_ident(
1570                                "when casting to str all expressions must be of type str",
1571                            ));
1572                        }
1573                    }
1574                }
1575                if group.is_empty() {
1576                    return Err(crate::error::parse_invalid_ident("failed to parse mapping"));
1577                } else if !multiple && group.len() == 1 {
1578                    group.into_iter().next().expect("could not get expression")
1579                } else if let Expression::Match(m, _) = e {
1580                    if group.len() == 1 {
1581                        let group = group.into_iter().next().expect("could not get expression");
1582                        Expression::Match(m, Box::new(group))
1583                    } else {
1584                        Expression::Match(m, Box::new(Expression::BooleanGroup(BoolSym::Or, group)))
1585                    }
1586                } else {
1587                    Expression::BooleanGroup(BoolSym::Or, group)
1588                }
1589            }
1590            Yaml::Tagged(_) => {
1591                return Err(crate::error::parse_invalid_ident(
1592                    "!Tag syntax is not supported",
1593                ));
1594            }
1595        };
1596        if let Some(ModSym::Not) = misc {
1597            expressions.push(Expression::Negate(Box::new(expression)));
1598        } else {
1599            expressions.push(expression);
1600        }
1601    }
1602    if expressions.is_empty() {
1603        return Err(crate::error::parse_invalid_ident("failed to parse mapping"));
1604    } else if expressions.len() == 1 {
1605        return Ok(expressions.into_iter().next().expect("missing expression"));
1606    }
1607    Ok(Expression::BooleanGroup(BoolSym::And, expressions))
1608}
1609
1610#[cfg(test)]
1611mod tests {
1612    use super::*;
1613
1614    use serde_yaml::Value as Yaml;
1615
1616    #[test]
1617    fn parse_bool_group_match_search() {
1618        let identifier = r"all(foo): [bar, '*']";
1619        let yaml: Yaml = serde_yaml::from_str(identifier).unwrap();
1620        let e = super::parse_identifier(&yaml).unwrap();
1621        assert_eq!(
1622            Expression::Match(
1623                Match::All,
1624                Box::new(Expression::BooleanGroup(
1625                    BoolSym::Or,
1626                    vec![
1627                        Expression::Search(
1628                            Search::Exact("bar".to_owned()),
1629                            "foo".to_owned(),
1630                            false
1631                        ),
1632                        Expression::Search(Search::Any, "foo".to_owned(), false)
1633                    ]
1634                ))
1635            ),
1636            e
1637        );
1638    }
1639
1640    #[test]
1641    fn parse_bool_group_match_search_shake() {
1642        let identifier = r"all(foo): [bar]";
1643        let yaml: Yaml = serde_yaml::from_str(identifier).unwrap();
1644        let e = super::parse_identifier(&yaml).unwrap();
1645        assert_eq!(
1646            Expression::Search(Search::Exact("bar".to_owned()), "foo".to_string(), false),
1647            e
1648        );
1649    }
1650
1651    #[test]
1652    fn parse_bool_expr() {
1653        let e = parse(&vec![
1654            Token::Identifier("foo".to_string()),
1655            Token::Operator(BoolSym::And),
1656            Token::Identifier("bar".to_string()),
1657        ])
1658        .unwrap();
1659        assert_eq!(
1660            Expression::BooleanExpression(
1661                Box::new(Expression::Identifier("foo".to_string())),
1662                BoolSym::And,
1663                Box::new(Expression::Identifier("bar".to_string()))
1664            ),
1665            e
1666        );
1667    }
1668
1669    #[test]
1670    fn parse_cast() {
1671        let e = parse(&vec![
1672            Token::Modifier(ModSym::Int),
1673            Token::Delimiter(DelSym::LeftParenthesis),
1674            Token::Identifier("identifier".to_owned()),
1675            Token::Delimiter(DelSym::RightParenthesis),
1676        ])
1677        .unwrap();
1678        assert_eq!(Expression::Cast("identifier".to_string(), ModSym::Int), e);
1679
1680        let e = parse(&vec![
1681            Token::Modifier(ModSym::Not),
1682            Token::Delimiter(DelSym::LeftParenthesis),
1683            Token::Identifier("identifier".to_owned()),
1684            Token::Delimiter(DelSym::RightParenthesis),
1685        ])
1686        .unwrap();
1687        assert_eq!(Expression::Cast("identifier".to_string(), ModSym::Not), e);
1688
1689        let e = parse(&vec![
1690            Token::Modifier(ModSym::Str),
1691            Token::Delimiter(DelSym::LeftParenthesis),
1692            Token::Identifier("identifier".to_owned()),
1693            Token::Delimiter(DelSym::RightParenthesis),
1694        ])
1695        .unwrap();
1696        assert_eq!(Expression::Cast("identifier".to_string(), ModSym::Str), e);
1697    }
1698
1699    #[test]
1700    fn parse_identifier() {
1701        let e = parse(&vec![Token::Identifier("condition".to_string())]).unwrap();
1702        assert_eq!(Expression::Identifier("condition".to_string()), e);
1703    }
1704
1705    #[test]
1706    fn parse_integer() {
1707        let e = parse(&vec![Token::Integer(1)]).unwrap();
1708        assert_eq!(Expression::Integer(1), e);
1709    }
1710
1711    #[test]
1712    fn parse_negate() {
1713        let e = parse(&vec![
1714            Token::Miscellaneous(MiscSym::Not),
1715            Token::Delimiter(DelSym::LeftParenthesis),
1716            Token::Identifier("foo".to_string()),
1717            Token::Operator(BoolSym::Or),
1718            Token::Identifier("bar".to_string()),
1719            Token::Delimiter(DelSym::RightParenthesis),
1720        ])
1721        .unwrap();
1722        assert_eq!(
1723            Expression::Negate(Box::new(Expression::BooleanExpression(
1724                Box::new(Expression::Identifier("foo".to_string())),
1725                BoolSym::Or,
1726                Box::new(Expression::Identifier("bar".to_string()))
1727            ))),
1728            e
1729        );
1730    }
1731
1732    #[test]
1733    fn parse_nested() {
1734        let identifier = r"foo: {bar: baz}";
1735        let yaml: Yaml = serde_yaml::from_str(identifier).unwrap();
1736        let e = super::parse_identifier(&yaml).unwrap();
1737        assert_eq!(
1738            Expression::Nested(
1739                "foo".to_owned(),
1740                Box::new(Expression::Search(
1741                    Search::Exact("baz".to_owned()),
1742                    "bar".to_owned(),
1743                    false
1744                ))
1745            ),
1746            e
1747        );
1748    }
1749
1750    #[test]
1751    fn parse_expression_0() {
1752        let t = parse(&vec![
1753            Token::Delimiter(DelSym::LeftParenthesis),
1754            Token::Identifier("foo".to_string()),
1755            Token::Operator(BoolSym::And),
1756            Token::Identifier("bar".to_string()),
1757            Token::Delimiter(DelSym::RightParenthesis),
1758            Token::Operator(BoolSym::Or),
1759            Token::Identifier("fooz".to_string()),
1760        ])
1761        .unwrap();
1762        assert_eq!(
1763            Expression::BooleanExpression(
1764                Box::new(Expression::BooleanExpression(
1765                    Box::new(Expression::Identifier("foo".to_string())),
1766                    BoolSym::And,
1767                    Box::new(Expression::Identifier("bar".to_string()))
1768                )),
1769                BoolSym::Or,
1770                Box::new(Expression::Identifier("fooz".to_string()))
1771            ),
1772            t
1773        );
1774    }
1775
1776    #[test]
1777    fn parse_expression_1() {
1778        let t = parse(&vec![
1779            Token::Identifier("foo".to_string()),
1780            Token::Operator(BoolSym::And),
1781            Token::Delimiter(DelSym::LeftParenthesis),
1782            Token::Identifier("bar".to_string()),
1783            Token::Operator(BoolSym::Or),
1784            Token::Identifier("fooz".to_string()),
1785            Token::Delimiter(DelSym::RightParenthesis),
1786        ])
1787        .unwrap();
1788        assert_eq!(
1789            Expression::BooleanExpression(
1790                Box::new(Expression::Identifier("foo".to_string())),
1791                BoolSym::And,
1792                Box::new(Expression::BooleanExpression(
1793                    Box::new(Expression::Identifier("bar".to_string())),
1794                    BoolSym::Or,
1795                    Box::new(Expression::Identifier("fooz".to_string()))
1796                ))
1797            ),
1798            t
1799        );
1800    }
1801
1802    #[test]
1803    fn parse_expression_2() {
1804        let t = parse(&vec![
1805            Token::Identifier("foo".to_string()),
1806            Token::Operator(BoolSym::And),
1807            Token::Delimiter(DelSym::LeftParenthesis),
1808            Token::Miscellaneous(MiscSym::Not),
1809            Token::Identifier("bar".to_string()),
1810            Token::Operator(BoolSym::And),
1811            Token::Miscellaneous(MiscSym::Not),
1812            Token::Identifier("fooz".to_string()),
1813            Token::Delimiter(DelSym::RightParenthesis),
1814        ])
1815        .unwrap();
1816        assert_eq!(
1817            Expression::BooleanExpression(
1818                Box::new(Expression::Identifier("foo".to_string())),
1819                BoolSym::And,
1820                Box::new(Expression::BooleanExpression(
1821                    Box::new(Expression::Negate(Box::new(Expression::Identifier(
1822                        "bar".to_string()
1823                    )))),
1824                    BoolSym::And,
1825                    Box::new(Expression::Negate(Box::new(Expression::Identifier(
1826                        "fooz".to_string()
1827                    ))))
1828                ))
1829            ),
1830            t
1831        );
1832    }
1833
1834    #[test]
1835    fn parse_identifiers_0() {
1836        let identifier = "[foo: bar]";
1837        let yaml: Yaml = serde_yaml::from_str(&identifier).unwrap();
1838        let e = super::parse_identifier(&yaml).unwrap();
1839        assert_eq!(
1840            Expression::BooleanGroup(
1841                BoolSym::Or,
1842                vec![Expression::Search(
1843                    Search::Exact("bar".to_owned()),
1844                    "foo".to_owned(),
1845                    false
1846                )]
1847            ),
1848            e
1849        );
1850    }
1851
1852    #[test]
1853    fn parse_invalid_0() {
1854        let e = parse(&vec![
1855            Token::Miscellaneous(MiscSym::Not),
1856            Token::Modifier(ModSym::Int),
1857            Token::Delimiter(DelSym::LeftParenthesis),
1858            Token::Identifier("condition".to_string()),
1859            Token::Delimiter(DelSym::RightParenthesis),
1860        ]);
1861        assert!(e.is_err());
1862    }
1863}