pest_meta_tmp/
parser.rs

1// pest. The Elegant Parser
2// Copyright (c) 2018 DragoČ™ Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10use std::char;
11use std::iter::Peekable;
12
13use pest::error::{Error, ErrorVariant};
14use pest::iterators::{Pair, Pairs};
15use pest::prec_climber::{Assoc, Operator, PrecClimber};
16use pest::{Parser, Span};
17
18use ast::{Expr, Rule as AstRule, RuleType};
19use validator;
20
21include!("grammar.rs");
22
23pub fn parse(rule: Rule, data: &str) -> Result<Pairs<Rule>, Error<Rule>> {
24    PestParser::parse(rule, data)
25}
26
27#[derive(Clone, Debug, Eq, PartialEq)]
28pub struct ParserRule<'i> {
29    pub name: String,
30    pub span: Span<'i>,
31    pub ty: RuleType,
32    pub node: ParserNode<'i>,
33}
34
35#[derive(Clone, Debug, Eq, PartialEq)]
36pub struct ParserNode<'i> {
37    pub expr: ParserExpr<'i>,
38    pub span: Span<'i>,
39}
40
41impl<'i> ParserNode<'i> {
42    pub fn filter_map_top_down<F, T>(self, mut f: F) -> Vec<T>
43    where
44        F: FnMut(ParserNode<'i>) -> Option<T>,
45    {
46        pub fn filter_internal<'i, F, T>(node: ParserNode<'i>, f: &mut F, result: &mut Vec<T>)
47        where
48            F: FnMut(ParserNode<'i>) -> Option<T>,
49        {
50            if let Some(value) = f(node.clone()) {
51                result.push(value);
52            }
53
54            match node.expr {
55                // TODO: Use box syntax when it gets stabilized.
56                ParserExpr::PosPred(node) => {
57                    filter_internal(*node, f, result);
58                }
59                ParserExpr::NegPred(node) => {
60                    filter_internal(*node, f, result);
61                }
62                ParserExpr::Seq(lhs, rhs) => {
63                    filter_internal(*lhs, f, result);
64                    filter_internal(*rhs, f, result);
65                }
66                ParserExpr::Choice(lhs, rhs) => {
67                    filter_internal(*lhs, f, result);
68                    filter_internal(*rhs, f, result);
69                }
70                ParserExpr::Rep(node) => {
71                    filter_internal(*node, f, result);
72                }
73                ParserExpr::RepOnce(node) => {
74                    filter_internal(*node, f, result);
75                }
76                ParserExpr::RepExact(node, _) => {
77                    filter_internal(*node, f, result);
78                }
79                ParserExpr::RepMin(node, _) => {
80                    filter_internal(*node, f, result);
81                }
82                ParserExpr::RepMax(node, _) => {
83                    filter_internal(*node, f, result);
84                }
85                ParserExpr::RepMinMax(node, ..) => {
86                    filter_internal(*node, f, result);
87                }
88                ParserExpr::Opt(node) => {
89                    filter_internal(*node, f, result);
90                }
91                ParserExpr::Push(node) => {
92                    filter_internal(*node, f, result);
93                }
94                _ => (),
95            }
96        }
97
98        let mut result = vec![];
99
100        filter_internal(self, &mut f, &mut result);
101
102        result
103    }
104}
105
106#[derive(Clone, Debug, Eq, PartialEq)]
107pub enum ParserExpr<'i> {
108    Str(String),
109    Insens(String),
110    Range(String, String),
111    Ident(String),
112    PeekSlice(i32, Option<i32>),
113    PosPred(Box<ParserNode<'i>>),
114    NegPred(Box<ParserNode<'i>>),
115    Seq(Box<ParserNode<'i>>, Box<ParserNode<'i>>),
116    Choice(Box<ParserNode<'i>>, Box<ParserNode<'i>>),
117    Opt(Box<ParserNode<'i>>),
118    Rep(Box<ParserNode<'i>>),
119    RepOnce(Box<ParserNode<'i>>),
120    RepExact(Box<ParserNode<'i>>, u32),
121    RepMin(Box<ParserNode<'i>>, u32),
122    RepMax(Box<ParserNode<'i>>, u32),
123    RepMinMax(Box<ParserNode<'i>>, u32, u32),
124    Push(Box<ParserNode<'i>>),
125}
126
127fn convert_rule(rule: ParserRule) -> AstRule {
128    match rule {
129        ParserRule { name, ty, node, .. } => {
130            let expr = convert_node(node);
131
132            AstRule { name, ty, expr }
133        }
134    }
135}
136
137fn convert_node(node: ParserNode) -> Expr {
138    match node.expr {
139        ParserExpr::Str(string) => Expr::Str(string),
140        ParserExpr::Insens(string) => Expr::Insens(string),
141        ParserExpr::Range(start, end) => Expr::Range(start, end),
142        ParserExpr::Ident(ident) => Expr::Ident(ident),
143        ParserExpr::PeekSlice(start, end) => Expr::PeekSlice(start, end),
144        ParserExpr::PosPred(node) => Expr::PosPred(Box::new(convert_node(*node))),
145        ParserExpr::NegPred(node) => Expr::NegPred(Box::new(convert_node(*node))),
146        ParserExpr::Seq(node1, node2) => Expr::Seq(
147            Box::new(convert_node(*node1)),
148            Box::new(convert_node(*node2)),
149        ),
150        ParserExpr::Choice(node1, node2) => Expr::Choice(
151            Box::new(convert_node(*node1)),
152            Box::new(convert_node(*node2)),
153        ),
154        ParserExpr::Opt(node) => Expr::Opt(Box::new(convert_node(*node))),
155        ParserExpr::Rep(node) => Expr::Rep(Box::new(convert_node(*node))),
156        ParserExpr::RepOnce(node) => Expr::RepOnce(Box::new(convert_node(*node))),
157        ParserExpr::RepExact(node, num) => Expr::RepExact(Box::new(convert_node(*node)), num),
158        ParserExpr::RepMin(node, max) => Expr::RepMin(Box::new(convert_node(*node)), max),
159        ParserExpr::RepMax(node, max) => Expr::RepMax(Box::new(convert_node(*node)), max),
160        ParserExpr::RepMinMax(node, min, max) => {
161            Expr::RepMinMax(Box::new(convert_node(*node)), min, max)
162        }
163        ParserExpr::Push(node) => Expr::Push(Box::new(convert_node(*node))),
164    }
165}
166
167pub fn consume_rules(pairs: Pairs<Rule>) -> Result<Vec<AstRule>, Vec<Error<Rule>>> {
168    let rules = consume_rules_with_spans(pairs)?;
169    let errors = validator::validate_ast(&rules);
170    if errors.is_empty() {
171        Ok(rules.into_iter().map(convert_rule).collect())
172    } else {
173        Err(errors)
174    }
175}
176
177fn consume_rules_with_spans<'i>(
178    pairs: Pairs<'i, Rule>,
179) -> Result<Vec<ParserRule<'i>>, Vec<Error<Rule>>> {
180    let climber = PrecClimber::new(vec![
181        Operator::new(Rule::choice_operator, Assoc::Left),
182        Operator::new(Rule::sequence_operator, Assoc::Left),
183    ]);
184
185    pairs
186        .filter(|pair| pair.as_rule() == Rule::grammar_rule)
187        .map(|pair| {
188            let mut pairs = pair.into_inner().peekable();
189
190            let span = pairs.next().unwrap().into_span();
191            let name = span.as_str().to_owned();
192
193            pairs.next().unwrap(); // assignment_operator
194
195            let ty = if pairs.peek().unwrap().as_rule() != Rule::opening_brace {
196                match pairs.next().unwrap().as_rule() {
197                    Rule::silent_modifier => RuleType::Silent,
198                    Rule::atomic_modifier => RuleType::Atomic,
199                    Rule::compound_atomic_modifier => RuleType::CompoundAtomic,
200                    Rule::non_atomic_modifier => RuleType::NonAtomic,
201                    _ => unreachable!(),
202                }
203            } else {
204                RuleType::Normal
205            };
206
207            pairs.next().unwrap(); // opening_brace
208
209            let node = consume_expr(pairs.next().unwrap().into_inner().peekable(), &climber)?;
210
211            Ok(ParserRule {
212                name,
213                span,
214                ty,
215                node,
216            })
217        })
218        .collect()
219}
220
221fn consume_expr<'i>(
222    pairs: Peekable<Pairs<'i, Rule>>,
223    climber: &PrecClimber<Rule>,
224) -> Result<ParserNode<'i>, Vec<Error<Rule>>> {
225    fn unaries<'i>(
226        mut pairs: Peekable<Pairs<'i, Rule>>,
227        climber: &PrecClimber<Rule>,
228    ) -> Result<ParserNode<'i>, Vec<Error<Rule>>> {
229        let pair = pairs.next().unwrap();
230
231        let node = match pair.as_rule() {
232            Rule::opening_paren => {
233                let node = unaries(pairs, climber)?;
234                let end = node.span.end_pos();
235
236                ParserNode {
237                    expr: node.expr,
238                    span: pair.into_span().start_pos().span(&end),
239                }
240            }
241            Rule::positive_predicate_operator => {
242                let node = unaries(pairs, climber)?;
243                let end = node.span.end_pos();
244
245                ParserNode {
246                    expr: ParserExpr::PosPred(Box::new(node)),
247                    span: pair.into_span().start_pos().span(&end),
248                }
249            }
250            Rule::negative_predicate_operator => {
251                let node = unaries(pairs, climber)?;
252                let end = node.span.end_pos();
253
254                ParserNode {
255                    expr: ParserExpr::NegPred(Box::new(node)),
256                    span: pair.into_span().start_pos().span(&end),
257                }
258            }
259            other_rule => {
260                let node = match other_rule {
261                    Rule::expression => consume_expr(pair.into_inner().peekable(), climber)?,
262                    Rule::_push => {
263                        let start = pair.clone().into_span().start_pos();
264                        let mut pairs = pair.into_inner();
265                        pairs.next().unwrap(); // opening_paren
266                        let pair = pairs.next().unwrap();
267
268                        let node = consume_expr(pair.into_inner().peekable(), climber)?;
269                        let end = node.span.end_pos();
270
271                        ParserNode {
272                            expr: ParserExpr::Push(Box::new(node)),
273                            span: start.span(&end),
274                        }
275                    }
276                    Rule::peek_slice => {
277                        let mut pairs = pair.clone().into_inner();
278                        pairs.next().unwrap(); // opening_brack
279                        let pair_start = pairs.next().unwrap(); // .. or integer
280                        let start: i32 = match pair_start.as_rule() {
281                            Rule::range_operator => 0,
282                            Rule::integer => {
283                                pairs.next().unwrap(); // ..
284                                pair_start.as_str().parse().unwrap()
285                            }
286                            _ => unreachable!(),
287                        };
288                        let pair_end = pairs.next().unwrap(); // integer or }
289                        let end: Option<i32> = match pair_end.as_rule() {
290                            Rule::closing_brack => None,
291                            Rule::integer => {
292                                pairs.next().unwrap(); // }
293                                Some(pair_end.as_str().parse().unwrap())
294                            }
295                            _ => unreachable!(),
296                        };
297                        ParserNode {
298                            expr: ParserExpr::PeekSlice(start, end),
299                            span: pair.into_span(),
300                        }
301                    }
302                    Rule::identifier => ParserNode {
303                        expr: ParserExpr::Ident(pair.as_str().to_owned()),
304                        span: pair.clone().into_span(),
305                    },
306                    Rule::string => {
307                        let string = unescape(pair.as_str()).expect("incorrect string literal");
308                        ParserNode {
309                            expr: ParserExpr::Str(string[1..string.len() - 1].to_owned()),
310                            span: pair.clone().into_span(),
311                        }
312                    }
313                    Rule::insensitive_string => {
314                        let string = unescape(pair.as_str()).expect("incorrect string literal");
315                        ParserNode {
316                            expr: ParserExpr::Insens(string[2..string.len() - 1].to_owned()),
317                            span: pair.clone().into_span(),
318                        }
319                    }
320                    Rule::range => {
321                        let mut pairs = pair.into_inner();
322                        let pair = pairs.next().unwrap();
323                        let start = unescape(pair.as_str()).expect("incorrect char literal");
324                        let start_pos = pair.clone().into_span().start_pos();
325                        pairs.next();
326                        let pair = pairs.next().unwrap();
327                        let end = unescape(pair.as_str()).expect("incorrect char literal");
328                        let end_pos = pair.clone().into_span().end_pos();
329
330                        ParserNode {
331                            expr: ParserExpr::Range(
332                                start[1..start.len() - 1].to_owned(),
333                                end[1..end.len() - 1].to_owned(),
334                            ),
335                            span: start_pos.span(&end_pos),
336                        }
337                    }
338                    _ => unreachable!(),
339                };
340
341                pairs.fold(
342                    Ok(node),
343                    |node: Result<ParserNode<'i>, Vec<Error<Rule>>>, pair| {
344                        let node = node?;
345
346                        let node = match pair.as_rule() {
347                            Rule::optional_operator => {
348                                let start = node.span.start_pos();
349                                ParserNode {
350                                    expr: ParserExpr::Opt(Box::new(node)),
351                                    span: start.span(&pair.into_span().end_pos()),
352                                }
353                            }
354                            Rule::repeat_operator => {
355                                let start = node.span.start_pos();
356                                ParserNode {
357                                    expr: ParserExpr::Rep(Box::new(node)),
358                                    span: start.span(&pair.into_span().end_pos()),
359                                }
360                            }
361                            Rule::repeat_once_operator => {
362                                let start = node.span.start_pos();
363                                ParserNode {
364                                    expr: ParserExpr::RepOnce(Box::new(node)),
365                                    span: start.span(&pair.into_span().end_pos()),
366                                }
367                            }
368                            Rule::repeat_exact => {
369                                let mut inner = pair.clone().into_inner();
370
371                                inner.next().unwrap(); // opening_brace
372
373                                let number = inner.next().unwrap();
374                                let num = if let Ok(num) = number.as_str().parse::<u32>() {
375                                    num
376                                } else {
377                                    return Err(vec![Error::new_from_span(
378                                        ErrorVariant::CustomError {
379                                            message: "number cannot overflow u32".to_owned(),
380                                        },
381                                        number.into_span(),
382                                    )]);
383                                };
384
385                                if num == 0 {
386                                    let error: Error<Rule> = Error::new_from_span(
387                                        ErrorVariant::CustomError {
388                                            message: "cannot repeat 0 times".to_owned(),
389                                        },
390                                        number.into_span(),
391                                    );
392
393                                    return Err(vec![error]);
394                                }
395
396                                let start = node.span.start_pos();
397                                ParserNode {
398                                    expr: ParserExpr::RepExact(Box::new(node), num),
399                                    span: start.span(&pair.into_span().end_pos()),
400                                }
401                            }
402                            Rule::repeat_min => {
403                                let mut inner = pair.clone().into_inner();
404
405                                inner.next().unwrap(); // opening_brace
406
407                                let min_number = inner.next().unwrap();
408                                let min = if let Ok(min) = min_number.as_str().parse::<u32>() {
409                                    min
410                                } else {
411                                    return Err(vec![Error::new_from_span(
412                                        ErrorVariant::CustomError {
413                                            message: "number cannot overflow u32".to_owned(),
414                                        },
415                                        min_number.into_span(),
416                                    )]);
417                                };
418
419                                let start = node.span.start_pos();
420                                ParserNode {
421                                    expr: ParserExpr::RepMin(Box::new(node), min),
422                                    span: start.span(&pair.into_span().end_pos()),
423                                }
424                            }
425                            Rule::repeat_max => {
426                                let mut inner = pair.clone().into_inner();
427
428                                inner.next().unwrap(); // opening_brace
429                                inner.next().unwrap(); // comma
430
431                                let max_number = inner.next().unwrap();
432                                let max = if let Ok(max) = max_number.as_str().parse::<u32>() {
433                                    max
434                                } else {
435                                    return Err(vec![Error::new_from_span(
436                                        ErrorVariant::CustomError {
437                                            message: "number cannot overflow u32".to_owned(),
438                                        },
439                                        max_number.into_span(),
440                                    )]);
441                                };
442
443                                if max == 0 {
444                                    let error: Error<Rule> = Error::new_from_span(
445                                        ErrorVariant::CustomError {
446                                            message: "cannot repeat 0 times".to_owned(),
447                                        },
448                                        max_number.into_span(),
449                                    );
450
451                                    return Err(vec![error]);
452                                }
453
454                                let start = node.span.start_pos();
455                                ParserNode {
456                                    expr: ParserExpr::RepMax(Box::new(node), max),
457                                    span: start.span(&pair.into_span().end_pos()),
458                                }
459                            }
460                            Rule::repeat_min_max => {
461                                let mut inner = pair.clone().into_inner();
462
463                                inner.next().unwrap(); // opening_brace
464
465                                let min_number = inner.next().unwrap();
466                                let min = if let Ok(min) = min_number.as_str().parse::<u32>() {
467                                    min
468                                } else {
469                                    return Err(vec![Error::new_from_span(
470                                        ErrorVariant::CustomError {
471                                            message: "number cannot overflow u32".to_owned(),
472                                        },
473                                        min_number.into_span(),
474                                    )]);
475                                };
476
477                                inner.next().unwrap(); // comma
478
479                                let max_number = inner.next().unwrap();
480                                let max = if let Ok(max) = max_number.as_str().parse::<u32>() {
481                                    max
482                                } else {
483                                    return Err(vec![Error::new_from_span(
484                                        ErrorVariant::CustomError {
485                                            message: "number cannot overflow u32".to_owned(),
486                                        },
487                                        max_number.into_span(),
488                                    )]);
489                                };
490
491                                if max == 0 {
492                                    let error: Error<Rule> = Error::new_from_span(
493                                        ErrorVariant::CustomError {
494                                            message: "cannot repeat 0 times".to_owned(),
495                                        },
496                                        max_number.into_span(),
497                                    );
498
499                                    return Err(vec![error]);
500                                }
501
502                                let start = node.span.start_pos();
503                                ParserNode {
504                                    expr: ParserExpr::RepMinMax(Box::new(node), min, max),
505                                    span: start.span(&pair.into_span().end_pos()),
506                                }
507                            }
508                            Rule::closing_paren => {
509                                let start = node.span.start_pos();
510
511                                ParserNode {
512                                    expr: node.expr,
513                                    span: start.span(&pair.into_span().end_pos()),
514                                }
515                            }
516                            _ => unreachable!(),
517                        };
518
519                        Ok(node)
520                    },
521                )?
522            }
523        };
524
525        Ok(node)
526    }
527
528    let term = |pair: Pair<'i, Rule>| unaries(pair.into_inner().peekable(), climber);
529    let infix = |lhs: Result<ParserNode<'i>, Vec<Error<Rule>>>,
530                 op: Pair<'i, Rule>,
531                 rhs: Result<ParserNode<'i>, Vec<Error<Rule>>>| match op.as_rule() {
532        Rule::sequence_operator => {
533            let lhs = lhs?;
534            let rhs = rhs?;
535
536            let start = lhs.span.start_pos();
537            let end = rhs.span.end_pos();
538
539            Ok(ParserNode {
540                expr: ParserExpr::Seq(Box::new(lhs), Box::new(rhs)),
541                span: start.span(&end),
542            })
543        }
544        Rule::choice_operator => {
545            let lhs = lhs?;
546            let rhs = rhs?;
547
548            let start = lhs.span.start_pos();
549            let end = rhs.span.end_pos();
550
551            Ok(ParserNode {
552                expr: ParserExpr::Choice(Box::new(lhs), Box::new(rhs)),
553                span: start.span(&end),
554            })
555        }
556        _ => unreachable!(),
557    };
558
559    climber.climb(pairs, term, infix)
560}
561
562fn unescape(string: &str) -> Option<String> {
563    let mut result = String::new();
564    let mut chars = string.chars();
565
566    loop {
567        match chars.next() {
568            Some('\\') => match chars.next()? {
569                '"' => result.push('"'),
570                '\\' => result.push('\\'),
571                'r' => result.push('\r'),
572                'n' => result.push('\n'),
573                't' => result.push('\t'),
574                '0' => result.push('\0'),
575                '\'' => result.push('\''),
576                'x' => {
577                    let string: String = chars.clone().take(2).collect();
578
579                    if string.len() != 2 {
580                        return None;
581                    }
582
583                    for _ in 0..string.len() {
584                        chars.next()?;
585                    }
586
587                    let value = u8::from_str_radix(&string, 16).ok()?;
588
589                    result.push(char::from(value));
590                }
591                'u' => {
592                    if chars.next()? != '{' {
593                        return None;
594                    }
595
596                    let string: String = chars.clone().take_while(|c| *c != '}').collect();
597
598                    if string.len() < 2 || 6 < string.len() {
599                        return None;
600                    }
601
602                    for _ in 0..string.len() + 1 {
603                        chars.next()?;
604                    }
605
606                    let value = u32::from_str_radix(&string, 16).ok()?;
607
608                    result.push(char::from_u32(value)?);
609                }
610                _ => return None,
611            },
612            Some(c) => result.push(c),
613            None => return Some(result),
614        };
615    }
616}
617
618#[cfg(test)]
619mod tests {
620    use super::super::unwrap_or_report;
621    use super::*;
622
623    #[test]
624    fn rules() {
625        parses_to! {
626            parser: PestParser,
627            input: "a = { b } c = { d }",
628            rule: Rule::grammar_rules,
629            tokens: [
630                grammar_rule(0, 9, [
631                    identifier(0, 1),
632                    assignment_operator(2, 3),
633                    opening_brace(4, 5),
634                    expression(6, 8, [
635                        term(6, 8, [
636                            identifier(6, 7)
637                        ])
638                    ]),
639                    closing_brace(8, 9)
640                ]),
641                grammar_rule(10, 19, [
642                    identifier(10, 11),
643                    assignment_operator(12, 13),
644                    opening_brace(14, 15),
645                    expression(16, 18, [
646                        term(16, 18, [
647                            identifier(16, 17)
648                        ])
649                    ]),
650                    closing_brace(18, 19)
651                ])
652            ]
653        };
654    }
655
656    #[test]
657    fn rule() {
658        parses_to! {
659            parser: PestParser,
660            input: "a = ! { b ~ c }",
661            rule: Rule::grammar_rule,
662            tokens: [
663                grammar_rule(0, 15, [
664                    identifier(0, 1),
665                    assignment_operator(2, 3),
666                    non_atomic_modifier(4, 5),
667                    opening_brace(6, 7),
668                    expression(8, 14, [
669                        term(8, 10, [
670                            identifier(8, 9)
671                        ]),
672                        sequence_operator(10, 11),
673                        term(12, 14, [
674                            identifier(12, 13)
675                        ])
676                    ]),
677                    closing_brace(14, 15)
678                ])
679            ]
680        };
681    }
682
683    #[test]
684    fn expression() {
685        parses_to! {
686            parser: PestParser,
687            input: "_a | 'a'..'b' ~ !^\"abc\" ~ (d | e)*?",
688            rule: Rule::expression,
689            tokens: [
690                expression(0, 35, [
691                    term(0, 3, [
692                        identifier(0, 2)
693                    ]),
694                    choice_operator(3, 4),
695                    term(5, 14, [
696                        range(5, 13, [
697                            character(5, 8, [
698                                single_quote(5, 6),
699                                inner_chr(6, 7),
700                                single_quote(7, 8)
701                            ]),
702                            range_operator(8, 10),
703                            character(10, 13, [
704                                single_quote(10, 11),
705                                inner_chr(11, 12),
706                                single_quote(12, 13)
707                            ])
708                        ])
709                    ]),
710                    sequence_operator(14, 15),
711                    term(16, 24, [
712                        negative_predicate_operator(16, 17),
713                        insensitive_string(17, 23, [
714                            string(18, 23, [
715                                quote(18, 19),
716                                inner_str(19, 22),
717                                quote(22, 23)
718                            ])
719                        ])
720                    ]),
721                    sequence_operator(24, 25),
722                    term(26, 35, [
723                        opening_paren(26, 27),
724                        expression(27, 32, [
725                            term(27, 29, [
726                                identifier(27, 28)
727                            ]),
728                            choice_operator(29, 30),
729                            term(31, 32, [
730                                identifier(31, 32)
731                            ])
732                        ]),
733                        closing_paren(32, 33),
734                        repeat_operator(33, 34),
735                        optional_operator(34, 35)
736                    ])
737                ])
738            ]
739        };
740    }
741
742    #[test]
743    fn repeat_exact() {
744        parses_to! {
745            parser: PestParser,
746            input: "{1}",
747            rule: Rule::repeat_exact,
748            tokens: [
749                repeat_exact(0, 3, [
750                    opening_brace(0, 1),
751                    number(1, 2),
752                    closing_brace(2, 3)
753                ])
754            ]
755        };
756    }
757
758    #[test]
759    fn repeat_min() {
760        parses_to! {
761            parser: PestParser,
762            input: "{2,}",
763            rule: Rule::repeat_min,
764            tokens: [
765                repeat_min(0, 4, [
766                    opening_brace(0,1),
767                    number(1,2),
768                    comma(2,3),
769                    closing_brace(3,4)
770                ])
771            ]
772        }
773    }
774
775    #[test]
776    fn repeat_max() {
777        parses_to! {
778            parser: PestParser,
779            input: "{, 3}",
780            rule: Rule::repeat_max,
781            tokens: [
782                repeat_max(0, 5, [
783                    opening_brace(0,1),
784                    comma(1,2),
785                    number(3,4),
786                    closing_brace(4,5)
787                ])
788            ]
789        }
790    }
791
792    #[test]
793    fn repeat_min_max() {
794        parses_to! {
795            parser: PestParser,
796            input: "{1, 2}",
797            rule: Rule::repeat_min_max,
798            tokens: [
799                repeat_min_max(0, 6, [
800                    opening_brace(0, 1),
801                    number(1, 2),
802                    comma(2, 3),
803                    number(4, 5),
804                    closing_brace(5, 6)
805                ])
806            ]
807        };
808    }
809
810    #[test]
811    fn push() {
812        parses_to! {
813            parser: PestParser,
814            input: "PUSH ( a )",
815            rule: Rule::_push,
816            tokens: [
817                _push(0, 10, [
818                    opening_paren(5, 6),
819                    expression(7, 9, [
820                        term(7, 9, [
821                            identifier(7, 8)
822                        ])
823                    ]),
824                    closing_paren(9, 10)
825                ])
826            ]
827        };
828    }
829
830    #[test]
831    fn peek_slice_all() {
832        parses_to! {
833            parser: PestParser,
834            input: "PEEK[..]",
835            rule: Rule::peek_slice,
836            tokens: [
837                peek_slice(0, 8, [
838                    opening_brack(4, 5),
839                    range_operator(5, 7),
840                    closing_brack(7, 8)
841                ])
842            ]
843        };
844    }
845
846    #[test]
847    fn peek_slice_start() {
848        parses_to! {
849            parser: PestParser,
850            input: "PEEK[1..]",
851            rule: Rule::peek_slice,
852            tokens: [
853                peek_slice(0, 9, [
854                    opening_brack(4, 5),
855                    integer(5, 6),
856                    range_operator(6, 8),
857                    closing_brack(8, 9)
858                ])
859            ]
860        };
861    }
862
863    #[test]
864    fn peek_slice_end() {
865        parses_to! {
866            parser: PestParser,
867            input: "PEEK[ ..-1]",
868            rule: Rule::peek_slice,
869            tokens: [
870                peek_slice(0, 11, [
871                    opening_brack(4, 5),
872                    range_operator(6, 8),
873                    integer(8, 10),
874                    closing_brack(10, 11)
875                ])
876            ]
877        };
878    }
879
880    #[test]
881    fn peek_slice_start_end() {
882        parses_to! {
883            parser: PestParser,
884            input: "PEEK[-5..10]",
885            rule: Rule::peek_slice,
886            tokens: [
887                peek_slice(0, 12, [
888                    opening_brack(4, 5),
889                    integer(5, 7),
890                    range_operator(7, 9),
891                    integer(9, 11),
892                    closing_brack(11, 12)
893                ])
894            ]
895        };
896    }
897
898    #[test]
899    fn identifier() {
900        parses_to! {
901            parser: PestParser,
902            input: "_a8943",
903            rule: Rule::identifier,
904            tokens: [
905                identifier(0, 6)
906            ]
907        };
908    }
909
910    #[test]
911    fn string() {
912        parses_to! {
913            parser: PestParser,
914            input: "\"aaaaa\\n\\r\\t\\\\\\0\\'\\\"\\x0F\\u{123abC}\\u{12}aaaaa\"",
915            rule: Rule::string,
916            tokens: [
917                string(0, 46, [
918                    quote(0, 1),
919                    inner_str(1, 45),
920                    quote(45, 46)
921                ])
922            ]
923        };
924    }
925
926    #[test]
927    fn insensitive_string() {
928        parses_to! {
929            parser: PestParser,
930            input: "^  \"\\\"hi\"",
931            rule: Rule::insensitive_string,
932            tokens: [
933                insensitive_string(0, 9, [
934                    string(3, 9, [
935                        quote(3, 4),
936                        inner_str(4, 8),
937                        quote(8, 9)
938                    ])
939                ])
940            ]
941        };
942    }
943
944    #[test]
945    fn range() {
946        parses_to! {
947            parser: PestParser,
948            input: "'\\n' .. '\\x1a'",
949            rule: Rule::range,
950            tokens: [
951                range(0, 14, [
952                    character(0, 4, [
953                        single_quote(0, 1),
954                        inner_chr(1, 3),
955                        single_quote(3, 4)
956                    ]),
957                    range_operator(5, 7),
958                    character(8, 14, [
959                        single_quote(8, 9),
960                        inner_chr(9, 13),
961                        single_quote(13, 14)
962                    ])
963                ])
964            ]
965        };
966    }
967
968    #[test]
969    fn character() {
970        parses_to! {
971            parser: PestParser,
972            input: "'\\u{123abC}'",
973            rule: Rule::character,
974            tokens: [
975                character(0, 12, [
976                    single_quote(0, 1),
977                    inner_chr(1, 11),
978                    single_quote(11, 12)
979                ])
980            ]
981        };
982    }
983
984    #[test]
985    fn number() {
986        parses_to! {
987            parser: PestParser,
988            input: "0123",
989            rule: Rule::number,
990            tokens: [
991                number(0, 4)
992            ]
993        };
994    }
995
996    #[test]
997    fn comment() {
998        parses_to! {
999            parser: PestParser,
1000            input: "a ~    // asda\n b",
1001            rule: Rule::expression,
1002            tokens: [
1003                expression(0, 17, [
1004                    term(0, 2, [
1005                        identifier(0, 1)
1006                    ]),
1007                    sequence_operator(2, 3),
1008                    term(16, 17, [
1009                        identifier(16, 17)
1010                    ])
1011                ])
1012            ]
1013        };
1014    }
1015
1016    #[test]
1017    fn wrong_identifier() {
1018        fails_with! {
1019            parser: PestParser,
1020            input: "0",
1021            rule: Rule::grammar_rules,
1022            positives: vec![Rule::identifier],
1023            negatives: vec![],
1024            pos: 0
1025        };
1026    }
1027
1028    #[test]
1029    fn missing_assignment_operator() {
1030        fails_with! {
1031            parser: PestParser,
1032            input: "a {}",
1033            rule: Rule::grammar_rules,
1034            positives: vec![Rule::assignment_operator],
1035            negatives: vec![],
1036            pos: 2
1037        };
1038    }
1039
1040    #[test]
1041    fn wrong_modifier() {
1042        fails_with! {
1043            parser: PestParser,
1044            input: "a = *{}",
1045            rule: Rule::grammar_rules,
1046            positives: vec![
1047                Rule::opening_brace,
1048                Rule::silent_modifier,
1049                Rule::atomic_modifier,
1050                Rule::compound_atomic_modifier,
1051                Rule::non_atomic_modifier
1052            ],
1053            negatives: vec![],
1054            pos: 4
1055        };
1056    }
1057
1058    #[test]
1059    fn missing_opening_brace() {
1060        fails_with! {
1061            parser: PestParser,
1062            input: "a = _",
1063            rule: Rule::grammar_rules,
1064            positives: vec![Rule::opening_brace],
1065            negatives: vec![],
1066            pos: 5
1067        };
1068    }
1069
1070    #[test]
1071    fn empty_rule() {
1072        fails_with! {
1073            parser: PestParser,
1074            input: "a = {}",
1075            rule: Rule::grammar_rules,
1076            positives: vec![Rule::term],
1077            negatives: vec![],
1078            pos: 5
1079        };
1080    }
1081
1082    #[test]
1083    fn missing_rhs() {
1084        fails_with! {
1085            parser: PestParser,
1086            input: "a = { b ~ }",
1087            rule: Rule::grammar_rules,
1088            positives: vec![Rule::term],
1089            negatives: vec![],
1090            pos: 10
1091        };
1092    }
1093
1094    #[test]
1095    fn wrong_op() {
1096        fails_with! {
1097            parser: PestParser,
1098            input: "a = { b % }",
1099            rule: Rule::grammar_rules,
1100            positives: vec![
1101                Rule::opening_brace,
1102                Rule::closing_brace,
1103                Rule::sequence_operator,
1104                Rule::choice_operator,
1105                Rule::optional_operator,
1106                Rule::repeat_operator,
1107                Rule::repeat_once_operator
1108            ],
1109            negatives: vec![],
1110            pos: 8
1111        };
1112    }
1113
1114    #[test]
1115    fn missing_closing_paren() {
1116        fails_with! {
1117            parser: PestParser,
1118            input: "a = { (b }",
1119            rule: Rule::grammar_rules,
1120            positives: vec![
1121                Rule::opening_brace,
1122                Rule::closing_paren,
1123                Rule::sequence_operator,
1124                Rule::choice_operator,
1125                Rule::optional_operator,
1126                Rule::repeat_operator,
1127                Rule::repeat_once_operator
1128            ],
1129            negatives: vec![],
1130            pos: 9
1131        };
1132    }
1133
1134    #[test]
1135    fn missing_term() {
1136        fails_with! {
1137            parser: PestParser,
1138            input: "a = { ! }",
1139            rule: Rule::grammar_rules,
1140            positives: vec![
1141                Rule::opening_paren,
1142                Rule::positive_predicate_operator,
1143                Rule::negative_predicate_operator,
1144                Rule::_push,
1145                Rule::peek_slice,
1146                Rule::identifier,
1147                Rule::insensitive_string,
1148                Rule::quote,
1149                Rule::single_quote
1150            ],
1151            negatives: vec![],
1152            pos: 8
1153        };
1154    }
1155
1156    #[test]
1157    fn string_missing_ending_quote() {
1158        fails_with! {
1159            parser: PestParser,
1160            input: "a = { \" }",
1161            rule: Rule::grammar_rules,
1162            positives: vec![Rule::quote],
1163            negatives: vec![],
1164            pos: 9
1165        };
1166    }
1167
1168    #[test]
1169    fn insensitive_missing_string() {
1170        fails_with! {
1171            parser: PestParser,
1172            input: "a = { ^ }",
1173            rule: Rule::grammar_rules,
1174            positives: vec![Rule::quote],
1175            negatives: vec![],
1176            pos: 8
1177        };
1178    }
1179
1180    #[test]
1181    fn char_missing_ending_single_quote() {
1182        fails_with! {
1183            parser: PestParser,
1184            input: "a = { \' }",
1185            rule: Rule::grammar_rules,
1186            positives: vec![Rule::single_quote],
1187            negatives: vec![],
1188            pos: 8
1189        };
1190    }
1191
1192    #[test]
1193    fn range_missing_range_operator() {
1194        fails_with! {
1195            parser: PestParser,
1196            input: "a = { \'a\' }",
1197            rule: Rule::grammar_rules,
1198            positives: vec![Rule::range_operator],
1199            negatives: vec![],
1200            pos: 10
1201        };
1202    }
1203
1204    #[test]
1205    fn wrong_postfix() {
1206        fails_with! {
1207            parser: PestParser,
1208            input: "a = { a& }",
1209            rule: Rule::grammar_rules,
1210            positives: vec![
1211                Rule::opening_brace,
1212                Rule::closing_brace,
1213                Rule::sequence_operator,
1214                Rule::choice_operator,
1215                Rule::optional_operator,
1216                Rule::repeat_operator,
1217                Rule::repeat_once_operator
1218            ],
1219            negatives: vec![],
1220            pos: 7
1221        };
1222    }
1223
1224    #[test]
1225    fn ast() {
1226        let input =
1227            "rule = _{ a{1} ~ \"a\"{3,} ~ b{, 2} ~ \"b\"{1, 2} | !(^\"c\" | PUSH('d'..'e'))?* }";
1228
1229        let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1230        let ast = consume_rules_with_spans(pairs).unwrap();
1231        let ast: Vec<_> = ast.into_iter().map(|rule| convert_rule(rule)).collect();
1232
1233        assert_eq!(
1234            ast,
1235            vec![AstRule {
1236                name: "rule".to_owned(),
1237                ty: RuleType::Silent,
1238                expr: Expr::Choice(
1239                    Box::new(Expr::Seq(
1240                        Box::new(Expr::Seq(
1241                            Box::new(Expr::Seq(
1242                                Box::new(Expr::RepExact(Box::new(Expr::Ident("a".to_owned())), 1)),
1243                                Box::new(Expr::RepMin(Box::new(Expr::Str("a".to_owned())), 3))
1244                            )),
1245                            Box::new(Expr::RepMax(Box::new(Expr::Ident("b".to_owned())), 2))
1246                        )),
1247                        Box::new(Expr::RepMinMax(Box::new(Expr::Str("b".to_owned())), 1, 2))
1248                    )),
1249                    Box::new(Expr::NegPred(Box::new(Expr::Rep(Box::new(Expr::Opt(
1250                        Box::new(Expr::Choice(
1251                            Box::new(Expr::Insens("c".to_owned())),
1252                            Box::new(Expr::Push(Box::new(Expr::Range(
1253                                "d".to_owned(),
1254                                "e".to_owned()
1255                            ))))
1256                        ))
1257                    ))))))
1258                )
1259            },]
1260        );
1261    }
1262
1263    #[test]
1264    fn ast_peek_slice() {
1265        let input = "rule = _{ PEEK[-04..] ~ PEEK[..3] }";
1266
1267        let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1268        let ast = consume_rules_with_spans(pairs).unwrap();
1269        let ast: Vec<_> = ast.into_iter().map(|rule| convert_rule(rule)).collect();
1270
1271        assert_eq!(
1272            ast,
1273            vec![AstRule {
1274                name: "rule".to_owned(),
1275                ty: RuleType::Silent,
1276                expr: Expr::Seq(
1277                    Box::new(Expr::PeekSlice(-4, None)),
1278                    Box::new(Expr::PeekSlice(0, Some(3))),
1279                )
1280            }],
1281        );
1282    }
1283
1284    #[test]
1285    #[should_panic(expected = "grammar error
1286
1287 --> 1:13
1288  |
12891 | rule = { \"\"{4294967297} }
1290  |             ^--------^
1291  |
1292  = number cannot overflow u32")]
1293    fn repeat_exact_overflow() {
1294        let input = "rule = { \"\"{4294967297} }";
1295
1296        let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1297        unwrap_or_report(consume_rules_with_spans(pairs));
1298    }
1299
1300    #[test]
1301    #[should_panic(expected = "grammar error
1302
1303 --> 1:13
1304  |
13051 | rule = { \"\"{0} }
1306  |             ^
1307  |
1308  = cannot repeat 0 times")]
1309    fn repeat_exact_zero() {
1310        let input = "rule = { \"\"{0} }";
1311
1312        let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1313        unwrap_or_report(consume_rules_with_spans(pairs));
1314    }
1315
1316    #[test]
1317    #[should_panic(expected = "grammar error
1318
1319 --> 1:13
1320  |
13211 | rule = { \"\"{4294967297,} }
1322  |             ^--------^
1323  |
1324  = number cannot overflow u32")]
1325    fn repeat_min_overflow() {
1326        let input = "rule = { \"\"{4294967297,} }";
1327
1328        let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1329        unwrap_or_report(consume_rules_with_spans(pairs));
1330    }
1331
1332    #[test]
1333    #[should_panic(expected = "grammar error
1334
1335 --> 1:14
1336  |
13371 | rule = { \"\"{,4294967297} }
1338  |              ^--------^
1339  |
1340  = number cannot overflow u32")]
1341    fn repeat_max_overflow() {
1342        let input = "rule = { \"\"{,4294967297} }";
1343
1344        let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1345        unwrap_or_report(consume_rules_with_spans(pairs));
1346    }
1347
1348    #[test]
1349    #[should_panic(expected = "grammar error
1350
1351 --> 1:14
1352  |
13531 | rule = { \"\"{,0} }
1354  |              ^
1355  |
1356  = cannot repeat 0 times")]
1357    fn repeat_max_zero() {
1358        let input = "rule = { \"\"{,0} }";
1359
1360        let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1361        unwrap_or_report(consume_rules_with_spans(pairs));
1362    }
1363
1364    #[test]
1365    #[should_panic(expected = "grammar error
1366
1367 --> 1:13
1368  |
13691 | rule = { \"\"{4294967297,4294967298} }
1370  |             ^--------^
1371  |
1372  = number cannot overflow u32")]
1373    fn repeat_min_max_overflow() {
1374        let input = "rule = { \"\"{4294967297,4294967298} }";
1375
1376        let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1377        unwrap_or_report(consume_rules_with_spans(pairs));
1378    }
1379
1380    #[test]
1381    #[should_panic(expected = "grammar error
1382
1383 --> 1:15
1384  |
13851 | rule = { \"\"{0,0} }
1386  |               ^
1387  |
1388  = cannot repeat 0 times")]
1389    fn repeat_min_max_zero() {
1390        let input = "rule = { \"\"{0,0} }";
1391
1392        let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1393        unwrap_or_report(consume_rules_with_spans(pairs));
1394    }
1395
1396    #[test]
1397    fn unescape_all() {
1398        let string = r"a\nb\x55c\u{111}d";
1399
1400        assert_eq!(unescape(string), Some("a\nb\x55c\u{111}d".to_owned()));
1401    }
1402
1403    #[test]
1404    fn unescape_empty_escape() {
1405        let string = r"\";
1406
1407        assert_eq!(unescape(string), None);
1408    }
1409
1410    #[test]
1411    fn unescape_wrong_escape() {
1412        let string = r"\w";
1413
1414        assert_eq!(unescape(string), None);
1415    }
1416
1417    #[test]
1418    fn unescape_backslash() {
1419        let string = "\\\\";
1420        assert_eq!(unescape(string), Some("\\".to_owned()));
1421    }
1422
1423    #[test]
1424    fn unescape_return() {
1425        let string = "\\r";
1426        assert_eq!(unescape(string), Some("\r".to_owned()));
1427    }
1428
1429    #[test]
1430    fn unescape_tab() {
1431        let string = "\\t";
1432        assert_eq!(unescape(string), Some("\t".to_owned()));
1433    }
1434
1435    #[test]
1436    fn unescape_null() {
1437        let string = "\\0";
1438        assert_eq!(unescape(string), Some("\0".to_owned()));
1439    }
1440
1441    #[test]
1442    fn unescape_single_quote() {
1443        let string = "\\'";
1444        assert_eq!(unescape(string), Some("\'".to_owned()));
1445    }
1446
1447    #[test]
1448    fn unescape_wrong_byte() {
1449        let string = r"\xfg";
1450
1451        assert_eq!(unescape(string), None);
1452    }
1453
1454    #[test]
1455    fn unescape_short_byte() {
1456        let string = r"\xf";
1457
1458        assert_eq!(unescape(string), None);
1459    }
1460
1461    #[test]
1462    fn unescape_no_open_brace_unicode() {
1463        let string = r"\u11";
1464
1465        assert_eq!(unescape(string), None);
1466    }
1467
1468    #[test]
1469    fn unescape_no_close_brace_unicode() {
1470        let string = r"\u{11";
1471
1472        assert_eq!(unescape(string), None);
1473    }
1474
1475    #[test]
1476    fn unescape_short_unicode() {
1477        let string = r"\u{1}";
1478
1479        assert_eq!(unescape(string), None);
1480    }
1481
1482    #[test]
1483    fn unescape_long_unicode() {
1484        let string = r"\u{1111111}";
1485
1486        assert_eq!(unescape(string), None);
1487    }
1488}