1use std::char;
11use std::iter::Peekable;
12use std::sync::Arc;
13
14use pest::error::{Error, ErrorVariant};
15use pest::iterators::{Pair, Pairs};
16use pest::prec_climber::{Assoc, Operator, PrecClimber};
17use pest::{Parser, Span};
18
19use ast::{Expr, Rule as AstRule, RuleType};
20use validator;
21
22mod grammar {
23 #![allow(unknown_lints, clippy::all)]
24
25 include!("grammar.rs");
26}
27
28pub use self::grammar::*;
29
30pub fn parse(rule: Rule, data: Arc<str>) -> Result<Pairs<Rule>, Error<Rule>> {
31 PestParser::parse(rule, data)
32}
33
34#[derive(Clone, Debug, Eq, PartialEq)]
35pub struct ParserRule {
36 pub name: String,
37 pub span: Span,
38 pub ty: RuleType,
39 pub node: ParserNode,
40}
41
42#[derive(Clone, Debug, Eq, PartialEq)]
43pub struct ParserNode {
44 pub expr: ParserExpr,
45 pub span: Span,
46}
47
48impl ParserNode {
49 pub fn filter_map_top_down<F, T>(self, mut f: F) -> Vec<T>
50 where
51 F: FnMut(ParserNode) -> Option<T>,
52 {
53 pub fn filter_internal<F, T>(node: ParserNode, f: &mut F, result: &mut Vec<T>)
54 where
55 F: FnMut(ParserNode) -> Option<T>,
56 {
57 if let Some(value) = f(node.clone()) {
58 result.push(value);
59 }
60
61 match node.expr {
62 ParserExpr::PosPred(node) => {
64 filter_internal(*node, f, result);
65 }
66 ParserExpr::NegPred(node) => {
67 filter_internal(*node, f, result);
68 }
69 ParserExpr::Seq(lhs, rhs) => {
70 filter_internal(*lhs, f, result);
71 filter_internal(*rhs, f, result);
72 }
73 ParserExpr::Choice(lhs, rhs) => {
74 filter_internal(*lhs, f, result);
75 filter_internal(*rhs, f, result);
76 }
77 ParserExpr::Rep(node) => {
78 filter_internal(*node, f, result);
79 }
80 ParserExpr::RepOnce(node) => {
81 filter_internal(*node, f, result);
82 }
83 ParserExpr::RepExact(node, _) => {
84 filter_internal(*node, f, result);
85 }
86 ParserExpr::RepMin(node, _) => {
87 filter_internal(*node, f, result);
88 }
89 ParserExpr::RepMax(node, _) => {
90 filter_internal(*node, f, result);
91 }
92 ParserExpr::RepMinMax(node, ..) => {
93 filter_internal(*node, f, result);
94 }
95 ParserExpr::Opt(node) => {
96 filter_internal(*node, f, result);
97 }
98 ParserExpr::Push(node) => {
99 filter_internal(*node, f, result);
100 }
101 _ => (),
102 }
103 }
104
105 let mut result = vec![];
106
107 filter_internal(self, &mut f, &mut result);
108
109 result
110 }
111}
112
113#[derive(Clone, Debug, Eq, PartialEq)]
114pub enum ParserExpr {
115 Str(String),
116 Insens(String),
117 Range(String, String),
118 Ident(String),
119 PeekSlice(i32, Option<i32>),
120 PosPred(Box<ParserNode>),
121 NegPred(Box<ParserNode>),
122 Seq(Box<ParserNode>, Box<ParserNode>),
123 Choice(Box<ParserNode>, Box<ParserNode>),
124 Opt(Box<ParserNode>),
125 Rep(Box<ParserNode>),
126 RepOnce(Box<ParserNode>),
127 RepExact(Box<ParserNode>, u32),
128 RepMin(Box<ParserNode>, u32),
129 RepMax(Box<ParserNode>, u32),
130 RepMinMax(Box<ParserNode>, u32, u32),
131 Push(Box<ParserNode>),
132}
133
134fn convert_rule(rule: ParserRule) -> AstRule {
135 let ParserRule { name, ty, node, .. } = rule;
136 let expr = convert_node(node);
137 AstRule { name, ty, expr }
138}
139
140fn convert_node(node: ParserNode) -> Expr {
141 match node.expr {
142 ParserExpr::Str(string) => Expr::Str(string),
143 ParserExpr::Insens(string) => Expr::Insens(string),
144 ParserExpr::Range(start, end) => Expr::Range(start, end),
145 ParserExpr::Ident(ident) => Expr::Ident(ident),
146 ParserExpr::PeekSlice(start, end) => Expr::PeekSlice(start, end),
147 ParserExpr::PosPred(node) => Expr::PosPred(Box::new(convert_node(*node))),
148 ParserExpr::NegPred(node) => Expr::NegPred(Box::new(convert_node(*node))),
149 ParserExpr::Seq(node1, node2) => Expr::Seq(
150 Box::new(convert_node(*node1)),
151 Box::new(convert_node(*node2)),
152 ),
153 ParserExpr::Choice(node1, node2) => Expr::Choice(
154 Box::new(convert_node(*node1)),
155 Box::new(convert_node(*node2)),
156 ),
157 ParserExpr::Opt(node) => Expr::Opt(Box::new(convert_node(*node))),
158 ParserExpr::Rep(node) => Expr::Rep(Box::new(convert_node(*node))),
159 ParserExpr::RepOnce(node) => Expr::RepOnce(Box::new(convert_node(*node))),
160 ParserExpr::RepExact(node, num) => Expr::RepExact(Box::new(convert_node(*node)), num),
161 ParserExpr::RepMin(node, max) => Expr::RepMin(Box::new(convert_node(*node)), max),
162 ParserExpr::RepMax(node, max) => Expr::RepMax(Box::new(convert_node(*node)), max),
163 ParserExpr::RepMinMax(node, min, max) => {
164 Expr::RepMinMax(Box::new(convert_node(*node)), min, max)
165 }
166 ParserExpr::Push(node) => Expr::Push(Box::new(convert_node(*node))),
167 }
168}
169
170pub fn consume_rules(pairs: Pairs<Rule>) -> Result<Vec<AstRule>, Vec<Error<Rule>>> {
171 let rules = consume_rules_with_spans(pairs)?;
172 let errors = validator::validate_ast(&rules);
173 if errors.is_empty() {
174 Ok(rules.into_iter().map(convert_rule).collect())
175 } else {
176 Err(errors)
177 }
178}
179
180fn consume_rules_with_spans(pairs: Pairs<Rule>) -> Result<Vec<ParserRule>, Vec<Error<Rule>>> {
181 let climber = PrecClimber::new(vec![
182 Operator::new(Rule::choice_operator, Assoc::Left),
183 Operator::new(Rule::sequence_operator, Assoc::Left),
184 ]);
185
186 pairs
187 .filter(|pair| pair.as_rule() == Rule::grammar_rule)
188 .map(|pair| {
189 let mut pairs = pair.into_inner().peekable();
190
191 let span = pairs.next().unwrap().as_span();
192 let name = span.as_str().to_owned();
193
194 pairs.next().unwrap(); let ty = if pairs.peek().unwrap().as_rule() != Rule::opening_brace {
197 match pairs.next().unwrap().as_rule() {
198 Rule::silent_modifier => RuleType::Silent,
199 Rule::atomic_modifier => RuleType::Atomic,
200 Rule::compound_atomic_modifier => RuleType::CompoundAtomic,
201 Rule::non_atomic_modifier => RuleType::NonAtomic,
202 _ => unreachable!(),
203 }
204 } else {
205 RuleType::Normal
206 };
207
208 pairs.next().unwrap(); let node = consume_expr(pairs.next().unwrap().into_inner().peekable(), &climber)?;
211
212 Ok(ParserRule {
213 name,
214 span,
215 ty,
216 node,
217 })
218 })
219 .collect()
220}
221
222fn consume_expr(
223 pairs: Peekable<Pairs<Rule>>,
224 climber: &PrecClimber<Rule>,
225) -> Result<ParserNode, Vec<Error<Rule>>> {
226 fn unaries(
227 mut pairs: Peekable<Pairs<Rule>>,
228 climber: &PrecClimber<Rule>,
229 ) -> Result<ParserNode, Vec<Error<Rule>>> {
230 let pair = pairs.next().unwrap();
231
232 let node = match pair.as_rule() {
233 Rule::opening_paren => {
234 let node = unaries(pairs, climber)?;
235 let end = node.span.end_pos();
236
237 ParserNode {
238 expr: node.expr,
239 span: pair.as_span().start_pos().span(&end),
240 }
241 }
242 Rule::positive_predicate_operator => {
243 let node = unaries(pairs, climber)?;
244 let end = node.span.end_pos();
245
246 ParserNode {
247 expr: ParserExpr::PosPred(Box::new(node)),
248 span: pair.as_span().start_pos().span(&end),
249 }
250 }
251 Rule::negative_predicate_operator => {
252 let node = unaries(pairs, climber)?;
253 let end = node.span.end_pos();
254
255 ParserNode {
256 expr: ParserExpr::NegPred(Box::new(node)),
257 span: pair.as_span().start_pos().span(&end),
258 }
259 }
260 other_rule => {
261 let node = match other_rule {
262 Rule::expression => consume_expr(pair.into_inner().peekable(), climber)?,
263 Rule::_push => {
264 let start = pair.clone().as_span().start_pos();
265 let mut pairs = pair.into_inner();
266 pairs.next().unwrap(); let pair = pairs.next().unwrap();
268
269 let node = consume_expr(pair.into_inner().peekable(), climber)?;
270 let end = node.span.end_pos();
271
272 ParserNode {
273 expr: ParserExpr::Push(Box::new(node)),
274 span: start.span(&end),
275 }
276 }
277 Rule::peek_slice => {
278 let mut pairs = pair.clone().into_inner();
279 pairs.next().unwrap(); let pair_start = pairs.next().unwrap(); let start: i32 = match pair_start.as_rule() {
282 Rule::range_operator => 0,
283 Rule::integer => {
284 pairs.next().unwrap(); pair_start.as_str().parse().unwrap()
286 }
287 _ => unreachable!(),
288 };
289 let pair_end = pairs.next().unwrap(); let end: Option<i32> = match pair_end.as_rule() {
291 Rule::closing_brack => None,
292 Rule::integer => {
293 pairs.next().unwrap(); Some(pair_end.as_str().parse().unwrap())
295 }
296 _ => unreachable!(),
297 };
298 ParserNode {
299 expr: ParserExpr::PeekSlice(start, end),
300 span: pair.as_span(),
301 }
302 }
303 Rule::identifier => ParserNode {
304 expr: ParserExpr::Ident(pair.as_str().to_owned()),
305 span: pair.clone().as_span(),
306 },
307 Rule::string => {
308 let string = unescape(pair.as_str()).expect("incorrect string literal");
309 ParserNode {
310 expr: ParserExpr::Str(string[1..string.len() - 1].to_owned()),
311 span: pair.clone().as_span(),
312 }
313 }
314 Rule::insensitive_string => {
315 let string = unescape(pair.as_str()).expect("incorrect string literal");
316 ParserNode {
317 expr: ParserExpr::Insens(string[2..string.len() - 1].to_owned()),
318 span: pair.clone().as_span(),
319 }
320 }
321 Rule::range => {
322 let mut pairs = pair.into_inner();
323 let pair = pairs.next().unwrap();
324 let start = unescape(pair.as_str()).expect("incorrect char literal");
325 let start_pos = pair.clone().as_span().start_pos();
326 pairs.next();
327 let pair = pairs.next().unwrap();
328 let end = unescape(pair.as_str()).expect("incorrect char literal");
329 let end_pos = pair.clone().as_span().end_pos();
330
331 ParserNode {
332 expr: ParserExpr::Range(
333 start[1..start.len() - 1].to_owned(),
334 end[1..end.len() - 1].to_owned(),
335 ),
336 span: start_pos.span(&end_pos),
337 }
338 }
339 _ => unreachable!(),
340 };
341
342 pairs.fold(
343 Ok(node),
344 |node: Result<ParserNode, Vec<Error<Rule>>>, pair| {
345 let node = node?;
346
347 let node = match pair.as_rule() {
348 Rule::optional_operator => {
349 let start = node.span.start_pos();
350 ParserNode {
351 expr: ParserExpr::Opt(Box::new(node)),
352 span: start.span(&pair.as_span().end_pos()),
353 }
354 }
355 Rule::repeat_operator => {
356 let start = node.span.start_pos();
357 ParserNode {
358 expr: ParserExpr::Rep(Box::new(node)),
359 span: start.span(&pair.as_span().end_pos()),
360 }
361 }
362 Rule::repeat_once_operator => {
363 let start = node.span.start_pos();
364 ParserNode {
365 expr: ParserExpr::RepOnce(Box::new(node)),
366 span: start.span(&pair.as_span().end_pos()),
367 }
368 }
369 Rule::repeat_exact => {
370 let mut inner = pair.clone().into_inner();
371
372 inner.next().unwrap(); let number = inner.next().unwrap();
375 let num = if let Ok(num) = number.as_str().parse::<u32>() {
376 num
377 } else {
378 return Err(vec![Error::new_from_span(
379 ErrorVariant::CustomError {
380 message: "number cannot overflow u32".to_owned(),
381 },
382 number.as_span(),
383 )]);
384 };
385
386 if num == 0 {
387 let error: Error<Rule> = Error::new_from_span(
388 ErrorVariant::CustomError {
389 message: "cannot repeat 0 times".to_owned(),
390 },
391 number.as_span(),
392 );
393
394 return Err(vec![error]);
395 }
396
397 let start = node.span.start_pos();
398 ParserNode {
399 expr: ParserExpr::RepExact(Box::new(node), num),
400 span: start.span(&pair.as_span().end_pos()),
401 }
402 }
403 Rule::repeat_min => {
404 let mut inner = pair.clone().into_inner();
405
406 inner.next().unwrap(); let min_number = inner.next().unwrap();
409 let min = if let Ok(min) = min_number.as_str().parse::<u32>() {
410 min
411 } else {
412 return Err(vec![Error::new_from_span(
413 ErrorVariant::CustomError {
414 message: "number cannot overflow u32".to_owned(),
415 },
416 min_number.as_span(),
417 )]);
418 };
419
420 let start = node.span.start_pos();
421 ParserNode {
422 expr: ParserExpr::RepMin(Box::new(node), min),
423 span: start.span(&pair.as_span().end_pos()),
424 }
425 }
426 Rule::repeat_max => {
427 let mut inner = pair.clone().into_inner();
428
429 inner.next().unwrap(); inner.next().unwrap(); let max_number = inner.next().unwrap();
433 let max = if let Ok(max) = max_number.as_str().parse::<u32>() {
434 max
435 } else {
436 return Err(vec![Error::new_from_span(
437 ErrorVariant::CustomError {
438 message: "number cannot overflow u32".to_owned(),
439 },
440 max_number.as_span(),
441 )]);
442 };
443
444 if max == 0 {
445 let error: Error<Rule> = Error::new_from_span(
446 ErrorVariant::CustomError {
447 message: "cannot repeat 0 times".to_owned(),
448 },
449 max_number.as_span(),
450 );
451
452 return Err(vec![error]);
453 }
454
455 let start = node.span.start_pos();
456 ParserNode {
457 expr: ParserExpr::RepMax(Box::new(node), max),
458 span: start.span(&pair.as_span().end_pos()),
459 }
460 }
461 Rule::repeat_min_max => {
462 let mut inner = pair.clone().into_inner();
463
464 inner.next().unwrap(); let min_number = inner.next().unwrap();
467 let min = if let Ok(min) = min_number.as_str().parse::<u32>() {
468 min
469 } else {
470 return Err(vec![Error::new_from_span(
471 ErrorVariant::CustomError {
472 message: "number cannot overflow u32".to_owned(),
473 },
474 min_number.as_span(),
475 )]);
476 };
477
478 inner.next().unwrap(); let max_number = inner.next().unwrap();
481 let max = if let Ok(max) = max_number.as_str().parse::<u32>() {
482 max
483 } else {
484 return Err(vec![Error::new_from_span(
485 ErrorVariant::CustomError {
486 message: "number cannot overflow u32".to_owned(),
487 },
488 max_number.as_span(),
489 )]);
490 };
491
492 if max == 0 {
493 let error: Error<Rule> = Error::new_from_span(
494 ErrorVariant::CustomError {
495 message: "cannot repeat 0 times".to_owned(),
496 },
497 max_number.as_span(),
498 );
499
500 return Err(vec![error]);
501 }
502
503 let start = node.span.start_pos();
504 ParserNode {
505 expr: ParserExpr::RepMinMax(Box::new(node), min, max),
506 span: start.span(&pair.as_span().end_pos()),
507 }
508 }
509 Rule::closing_paren => {
510 let start = node.span.start_pos();
511
512 ParserNode {
513 expr: node.expr,
514 span: start.span(&pair.as_span().end_pos()),
515 }
516 }
517 _ => unreachable!(),
518 };
519
520 Ok(node)
521 },
522 )?
523 }
524 };
525
526 Ok(node)
527 }
528
529 let term = |pair: Pair<Rule>| unaries(pair.into_inner().peekable(), climber);
530 let infix = |lhs: Result<ParserNode, Vec<Error<Rule>>>,
531 op: Pair<Rule>,
532 rhs: Result<ParserNode, Vec<Error<Rule>>>| match op.as_rule() {
533 Rule::sequence_operator => {
534 let lhs = lhs?;
535 let rhs = rhs?;
536
537 let start = lhs.span.start_pos();
538 let end = rhs.span.end_pos();
539
540 Ok(ParserNode {
541 expr: ParserExpr::Seq(Box::new(lhs), Box::new(rhs)),
542 span: start.span(&end),
543 })
544 }
545 Rule::choice_operator => {
546 let lhs = lhs?;
547 let rhs = rhs?;
548
549 let start = lhs.span.start_pos();
550 let end = rhs.span.end_pos();
551
552 Ok(ParserNode {
553 expr: ParserExpr::Choice(Box::new(lhs), Box::new(rhs)),
554 span: start.span(&end),
555 })
556 }
557 _ => unreachable!(),
558 };
559
560 climber.climb(pairs, term, infix)
561}
562
563fn unescape(string: &str) -> Option<String> {
564 let mut result = String::new();
565 let mut chars = string.chars();
566
567 loop {
568 match chars.next() {
569 Some('\\') => match chars.next()? {
570 '"' => result.push('"'),
571 '\\' => result.push('\\'),
572 'r' => result.push('\r'),
573 'n' => result.push('\n'),
574 't' => result.push('\t'),
575 '0' => result.push('\0'),
576 '\'' => result.push('\''),
577 'x' => {
578 let string: String = chars.clone().take(2).collect();
579
580 if string.len() != 2 {
581 return None;
582 }
583
584 for _ in 0..string.len() {
585 chars.next()?;
586 }
587
588 let value = u8::from_str_radix(&string, 16).ok()?;
589
590 result.push(char::from(value));
591 }
592 'u' => {
593 if chars.next()? != '{' {
594 return None;
595 }
596
597 let string: String = chars.clone().take_while(|c| *c != '}').collect();
598
599 if string.len() < 2 || 6 < string.len() {
600 return None;
601 }
602
603 for _ in 0..string.len() + 1 {
604 chars.next()?;
605 }
606
607 let value = u32::from_str_radix(&string, 16).ok()?;
608
609 result.push(char::from_u32(value)?);
610 }
611 _ => return None,
612 },
613 Some(c) => result.push(c),
614 None => return Some(result),
615 };
616 }
617}
618
619#[cfg(test)]
620mod tests {
621 use super::super::unwrap_or_report;
622 use super::*;
623
624 #[test]
625 fn rules() {
626 parses_to! {
627 parser: PestParser,
628 input: Arc::from("a = { b } c = { d }"),
629 rule: Rule::grammar_rules,
630 tokens: [
631 grammar_rule(0, 9, [
632 identifier(0, 1),
633 assignment_operator(2, 3),
634 opening_brace(4, 5),
635 expression(6, 8, [
636 term(6, 8, [
637 identifier(6, 7)
638 ])
639 ]),
640 closing_brace(8, 9)
641 ]),
642 grammar_rule(10, 19, [
643 identifier(10, 11),
644 assignment_operator(12, 13),
645 opening_brace(14, 15),
646 expression(16, 18, [
647 term(16, 18, [
648 identifier(16, 17)
649 ])
650 ]),
651 closing_brace(18, 19)
652 ])
653 ]
654 };
655 }
656
657 #[test]
658 fn rule() {
659 parses_to! {
660 parser: PestParser,
661 input: Arc::from("a = ! { b ~ c }"),
662 rule: Rule::grammar_rule,
663 tokens: [
664 grammar_rule(0, 15, [
665 identifier(0, 1),
666 assignment_operator(2, 3),
667 non_atomic_modifier(4, 5),
668 opening_brace(6, 7),
669 expression(8, 14, [
670 term(8, 10, [
671 identifier(8, 9)
672 ]),
673 sequence_operator(10, 11),
674 term(12, 14, [
675 identifier(12, 13)
676 ])
677 ]),
678 closing_brace(14, 15)
679 ])
680 ]
681 };
682 }
683
684 #[test]
685 fn expression() {
686 parses_to! {
687 parser: PestParser,
688 input: Arc::from("_a | 'a'..'b' ~ !^\"abc\" ~ (d | e)*?"),
689 rule: Rule::expression,
690 tokens: [
691 expression(0, 35, [
692 term(0, 3, [
693 identifier(0, 2)
694 ]),
695 choice_operator(3, 4),
696 term(5, 14, [
697 range(5, 13, [
698 character(5, 8, [
699 single_quote(5, 6),
700 inner_chr(6, 7),
701 single_quote(7, 8)
702 ]),
703 range_operator(8, 10),
704 character(10, 13, [
705 single_quote(10, 11),
706 inner_chr(11, 12),
707 single_quote(12, 13)
708 ])
709 ])
710 ]),
711 sequence_operator(14, 15),
712 term(16, 24, [
713 negative_predicate_operator(16, 17),
714 insensitive_string(17, 23, [
715 string(18, 23, [
716 quote(18, 19),
717 inner_str(19, 22),
718 quote(22, 23)
719 ])
720 ])
721 ]),
722 sequence_operator(24, 25),
723 term(26, 35, [
724 opening_paren(26, 27),
725 expression(27, 32, [
726 term(27, 29, [
727 identifier(27, 28)
728 ]),
729 choice_operator(29, 30),
730 term(31, 32, [
731 identifier(31, 32)
732 ])
733 ]),
734 closing_paren(32, 33),
735 repeat_operator(33, 34),
736 optional_operator(34, 35)
737 ])
738 ])
739 ]
740 };
741 }
742
743 #[test]
744 fn repeat_exact() {
745 parses_to! {
746 parser: PestParser,
747 input: Arc::from("{1}"),
748 rule: Rule::repeat_exact,
749 tokens: [
750 repeat_exact(0, 3, [
751 opening_brace(0, 1),
752 number(1, 2),
753 closing_brace(2, 3)
754 ])
755 ]
756 };
757 }
758
759 #[test]
760 fn repeat_min() {
761 parses_to! {
762 parser: PestParser,
763 input: Arc::from("{2,}"),
764 rule: Rule::repeat_min,
765 tokens: [
766 repeat_min(0, 4, [
767 opening_brace(0,1),
768 number(1,2),
769 comma(2,3),
770 closing_brace(3,4)
771 ])
772 ]
773 }
774 }
775
776 #[test]
777 fn repeat_max() {
778 parses_to! {
779 parser: PestParser,
780 input: Arc::from("{, 3}"),
781 rule: Rule::repeat_max,
782 tokens: [
783 repeat_max(0, 5, [
784 opening_brace(0,1),
785 comma(1,2),
786 number(3,4),
787 closing_brace(4,5)
788 ])
789 ]
790 }
791 }
792
793 #[test]
794 fn repeat_min_max() {
795 parses_to! {
796 parser: PestParser,
797 input: Arc::from("{1, 2}"),
798 rule: Rule::repeat_min_max,
799 tokens: [
800 repeat_min_max(0, 6, [
801 opening_brace(0, 1),
802 number(1, 2),
803 comma(2, 3),
804 number(4, 5),
805 closing_brace(5, 6)
806 ])
807 ]
808 };
809 }
810
811 #[test]
812 fn push() {
813 parses_to! {
814 parser: PestParser,
815 input: Arc::from("PUSH ( a )"),
816 rule: Rule::_push,
817 tokens: [
818 _push(0, 10, [
819 opening_paren(5, 6),
820 expression(7, 9, [
821 term(7, 9, [
822 identifier(7, 8)
823 ])
824 ]),
825 closing_paren(9, 10)
826 ])
827 ]
828 };
829 }
830
831 #[test]
832 fn peek_slice_all() {
833 parses_to! {
834 parser: PestParser,
835 input: Arc::from("PEEK[..]"),
836 rule: Rule::peek_slice,
837 tokens: [
838 peek_slice(0, 8, [
839 opening_brack(4, 5),
840 range_operator(5, 7),
841 closing_brack(7, 8)
842 ])
843 ]
844 };
845 }
846
847 #[test]
848 fn peek_slice_start() {
849 parses_to! {
850 parser: PestParser,
851 input: Arc::from("PEEK[1..]"),
852 rule: Rule::peek_slice,
853 tokens: [
854 peek_slice(0, 9, [
855 opening_brack(4, 5),
856 integer(5, 6),
857 range_operator(6, 8),
858 closing_brack(8, 9)
859 ])
860 ]
861 };
862 }
863
864 #[test]
865 fn peek_slice_end() {
866 parses_to! {
867 parser: PestParser,
868 input: Arc::from("PEEK[ ..-1]"),
869 rule: Rule::peek_slice,
870 tokens: [
871 peek_slice(0, 11, [
872 opening_brack(4, 5),
873 range_operator(6, 8),
874 integer(8, 10),
875 closing_brack(10, 11)
876 ])
877 ]
878 };
879 }
880
881 #[test]
882 fn peek_slice_start_end() {
883 parses_to! {
884 parser: PestParser,
885 input: Arc::from("PEEK[-5..10]"),
886 rule: Rule::peek_slice,
887 tokens: [
888 peek_slice(0, 12, [
889 opening_brack(4, 5),
890 integer(5, 7),
891 range_operator(7, 9),
892 integer(9, 11),
893 closing_brack(11, 12)
894 ])
895 ]
896 };
897 }
898
899 #[test]
900 fn identifier() {
901 parses_to! {
902 parser: PestParser,
903 input: Arc::from("_a8943"),
904 rule: Rule::identifier,
905 tokens: [
906 identifier(0, 6)
907 ]
908 };
909 }
910
911 #[test]
912 fn string() {
913 parses_to! {
914 parser: PestParser,
915 input: Arc::from("\"aaaaa\\n\\r\\t\\\\\\0\\'\\\"\\x0F\\u{123abC}\\u{12}aaaaa\""),
916 rule: Rule::string,
917 tokens: [
918 string(0, 46, [
919 quote(0, 1),
920 inner_str(1, 45),
921 quote(45, 46)
922 ])
923 ]
924 };
925 }
926
927 #[test]
928 fn insensitive_string() {
929 parses_to! {
930 parser: PestParser,
931 input: Arc::from("^ \"\\\"hi\""),
932 rule: Rule::insensitive_string,
933 tokens: [
934 insensitive_string(0, 9, [
935 string(3, 9, [
936 quote(3, 4),
937 inner_str(4, 8),
938 quote(8, 9)
939 ])
940 ])
941 ]
942 };
943 }
944
945 #[test]
946 fn range() {
947 parses_to! {
948 parser: PestParser,
949 input: Arc::from("'\\n' .. '\\x1a'"),
950 rule: Rule::range,
951 tokens: [
952 range(0, 14, [
953 character(0, 4, [
954 single_quote(0, 1),
955 inner_chr(1, 3),
956 single_quote(3, 4)
957 ]),
958 range_operator(5, 7),
959 character(8, 14, [
960 single_quote(8, 9),
961 inner_chr(9, 13),
962 single_quote(13, 14)
963 ])
964 ])
965 ]
966 };
967 }
968
969 #[test]
970 fn character() {
971 parses_to! {
972 parser: PestParser,
973 input: Arc::from("'\\u{123abC}'"),
974 rule: Rule::character,
975 tokens: [
976 character(0, 12, [
977 single_quote(0, 1),
978 inner_chr(1, 11),
979 single_quote(11, 12)
980 ])
981 ]
982 };
983 }
984
985 #[test]
986 fn number() {
987 parses_to! {
988 parser: PestParser,
989 input: Arc::from("0123"),
990 rule: Rule::number,
991 tokens: [
992 number(0, 4)
993 ]
994 };
995 }
996
997 #[test]
998 fn comment() {
999 parses_to! {
1000 parser: PestParser,
1001 input: Arc::from("a ~ // asda\n b"),
1002 rule: Rule::expression,
1003 tokens: [
1004 expression(0, 17, [
1005 term(0, 2, [
1006 identifier(0, 1)
1007 ]),
1008 sequence_operator(2, 3),
1009 term(16, 17, [
1010 identifier(16, 17)
1011 ])
1012 ])
1013 ]
1014 };
1015 }
1016
1017 #[test]
1018 fn wrong_identifier() {
1019 fails_with! {
1020 parser: PestParser,
1021 input: Arc::from("0"),
1022 rule: Rule::grammar_rules,
1023 positives: vec![Rule::identifier],
1024 negatives: vec![],
1025 pos: 0
1026 };
1027 }
1028
1029 #[test]
1030 fn missing_assignment_operator() {
1031 fails_with! {
1032 parser: PestParser,
1033 input: Arc::from("a {}"),
1034 rule: Rule::grammar_rules,
1035 positives: vec![Rule::assignment_operator],
1036 negatives: vec![],
1037 pos: 2
1038 };
1039 }
1040
1041 #[test]
1042 fn wrong_modifier() {
1043 fails_with! {
1044 parser: PestParser,
1045 input: Arc::from("a = *{}"),
1046 rule: Rule::grammar_rules,
1047 positives: vec![
1048 Rule::opening_brace,
1049 Rule::silent_modifier,
1050 Rule::atomic_modifier,
1051 Rule::compound_atomic_modifier,
1052 Rule::non_atomic_modifier
1053 ],
1054 negatives: vec![],
1055 pos: 4
1056 };
1057 }
1058
1059 #[test]
1060 fn missing_opening_brace() {
1061 fails_with! {
1062 parser: PestParser,
1063 input: Arc::from("a = _"),
1064 rule: Rule::grammar_rules,
1065 positives: vec![Rule::opening_brace],
1066 negatives: vec![],
1067 pos: 5
1068 };
1069 }
1070
1071 #[test]
1072 fn empty_rule() {
1073 fails_with! {
1074 parser: PestParser,
1075 input: Arc::from("a = {}"),
1076 rule: Rule::grammar_rules,
1077 positives: vec![Rule::term],
1078 negatives: vec![],
1079 pos: 5
1080 };
1081 }
1082
1083 #[test]
1084 fn missing_rhs() {
1085 fails_with! {
1086 parser: PestParser,
1087 input: Arc::from("a = { b ~ }"),
1088 rule: Rule::grammar_rules,
1089 positives: vec![Rule::term],
1090 negatives: vec![],
1091 pos: 10
1092 };
1093 }
1094
1095 #[test]
1096 fn wrong_op() {
1097 fails_with! {
1098 parser: PestParser,
1099 input: Arc::from("a = { b % }"),
1100 rule: Rule::grammar_rules,
1101 positives: vec![
1102 Rule::opening_brace,
1103 Rule::closing_brace,
1104 Rule::sequence_operator,
1105 Rule::choice_operator,
1106 Rule::optional_operator,
1107 Rule::repeat_operator,
1108 Rule::repeat_once_operator
1109 ],
1110 negatives: vec![],
1111 pos: 8
1112 };
1113 }
1114
1115 #[test]
1116 fn missing_closing_paren() {
1117 fails_with! {
1118 parser: PestParser,
1119 input: Arc::from("a = { (b }"),
1120 rule: Rule::grammar_rules,
1121 positives: vec![
1122 Rule::opening_brace,
1123 Rule::closing_paren,
1124 Rule::sequence_operator,
1125 Rule::choice_operator,
1126 Rule::optional_operator,
1127 Rule::repeat_operator,
1128 Rule::repeat_once_operator
1129 ],
1130 negatives: vec![],
1131 pos: 9
1132 };
1133 }
1134
1135 #[test]
1136 fn missing_term() {
1137 fails_with! {
1138 parser: PestParser,
1139 input: Arc::from("a = { ! }"),
1140 rule: Rule::grammar_rules,
1141 positives: vec![
1142 Rule::opening_paren,
1143 Rule::positive_predicate_operator,
1144 Rule::negative_predicate_operator,
1145 Rule::_push,
1146 Rule::peek_slice,
1147 Rule::identifier,
1148 Rule::insensitive_string,
1149 Rule::quote,
1150 Rule::single_quote
1151 ],
1152 negatives: vec![],
1153 pos: 8
1154 };
1155 }
1156
1157 #[test]
1158 fn string_missing_ending_quote() {
1159 fails_with! {
1160 parser: PestParser,
1161 input: Arc::from("a = { \" }"),
1162 rule: Rule::grammar_rules,
1163 positives: vec![Rule::quote],
1164 negatives: vec![],
1165 pos: 9
1166 };
1167 }
1168
1169 #[test]
1170 fn insensitive_missing_string() {
1171 fails_with! {
1172 parser: PestParser,
1173 input: Arc::from("a = { ^ }"),
1174 rule: Rule::grammar_rules,
1175 positives: vec![Rule::quote],
1176 negatives: vec![],
1177 pos: 8
1178 };
1179 }
1180
1181 #[test]
1182 fn char_missing_ending_single_quote() {
1183 fails_with! {
1184 parser: PestParser,
1185 input: Arc::from("a = { \' }"),
1186 rule: Rule::grammar_rules,
1187 positives: vec![Rule::single_quote],
1188 negatives: vec![],
1189 pos: 8
1190 };
1191 }
1192
1193 #[test]
1194 fn range_missing_range_operator() {
1195 fails_with! {
1196 parser: PestParser,
1197 input: Arc::from("a = { \'a\' }"),
1198 rule: Rule::grammar_rules,
1199 positives: vec![Rule::range_operator],
1200 negatives: vec![],
1201 pos: 10
1202 };
1203 }
1204
1205 #[test]
1206 fn wrong_postfix() {
1207 fails_with! {
1208 parser: PestParser,
1209 input: Arc::from("a = { a& }"),
1210 rule: Rule::grammar_rules,
1211 positives: vec![
1212 Rule::opening_brace,
1213 Rule::closing_brace,
1214 Rule::sequence_operator,
1215 Rule::choice_operator,
1216 Rule::optional_operator,
1217 Rule::repeat_operator,
1218 Rule::repeat_once_operator
1219 ],
1220 negatives: vec![],
1221 pos: 7
1222 };
1223 }
1224
1225 #[test]
1226 fn ast() {
1227 let input = Arc::from(
1228 "rule = _{ a{1} ~ \"a\"{3,} ~ b{, 2} ~ \"b\"{1, 2} | !(^\"c\" | PUSH('d'..'e'))?* }",
1229 );
1230
1231 let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1232 let ast = consume_rules_with_spans(pairs).unwrap();
1233 let ast: Vec<_> = ast.into_iter().map(|rule| convert_rule(rule)).collect();
1234
1235 assert_eq!(
1236 ast,
1237 vec![AstRule {
1238 name: "rule".to_owned(),
1239 ty: RuleType::Silent,
1240 expr: Expr::Choice(
1241 Box::new(Expr::Seq(
1242 Box::new(Expr::Seq(
1243 Box::new(Expr::Seq(
1244 Box::new(Expr::RepExact(Box::new(Expr::Ident("a".to_owned())), 1)),
1245 Box::new(Expr::RepMin(Box::new(Expr::Str("a".to_owned())), 3))
1246 )),
1247 Box::new(Expr::RepMax(Box::new(Expr::Ident("b".to_owned())), 2))
1248 )),
1249 Box::new(Expr::RepMinMax(Box::new(Expr::Str("b".to_owned())), 1, 2))
1250 )),
1251 Box::new(Expr::NegPred(Box::new(Expr::Rep(Box::new(Expr::Opt(
1252 Box::new(Expr::Choice(
1253 Box::new(Expr::Insens("c".to_owned())),
1254 Box::new(Expr::Push(Box::new(Expr::Range(
1255 "d".to_owned(),
1256 "e".to_owned()
1257 ))))
1258 ))
1259 ))))))
1260 )
1261 },]
1262 );
1263 }
1264
1265 #[test]
1266 fn ast_peek_slice() {
1267 let input: Arc<str> = Arc::from("rule = _{ PEEK[-04..] ~ PEEK[..3] }");
1268
1269 let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1270 let ast = consume_rules_with_spans(pairs).unwrap();
1271 let ast: Vec<_> = ast.into_iter().map(|rule| convert_rule(rule)).collect();
1272
1273 assert_eq!(
1274 ast,
1275 vec![AstRule {
1276 name: "rule".to_owned(),
1277 ty: RuleType::Silent,
1278 expr: Expr::Seq(
1279 Box::new(Expr::PeekSlice(-4, None)),
1280 Box::new(Expr::PeekSlice(0, Some(3))),
1281 )
1282 }],
1283 );
1284 }
1285
1286 #[test]
1287 #[should_panic(expected = "grammar error
1288
1289 --> 1:13
1290 |
12911 | rule = { \"\"{4294967297} }
1292 | ^--------^
1293 |
1294 = number cannot overflow u32")]
1295 fn repeat_exact_overflow() {
1296 let input: Arc<str> = Arc::from("rule = { \"\"{4294967297} }");
1297
1298 let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1299 unwrap_or_report(consume_rules_with_spans(pairs));
1300 }
1301
1302 #[test]
1303 #[should_panic(expected = "grammar error
1304
1305 --> 1:13
1306 |
13071 | rule = { \"\"{0} }
1308 | ^
1309 |
1310 = cannot repeat 0 times")]
1311 fn repeat_exact_zero() {
1312 let input: Arc<str> = Arc::from("rule = { \"\"{0} }");
1313
1314 let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1315 unwrap_or_report(consume_rules_with_spans(pairs));
1316 }
1317
1318 #[test]
1319 #[should_panic(expected = "grammar error
1320
1321 --> 1:13
1322 |
13231 | rule = { \"\"{4294967297,} }
1324 | ^--------^
1325 |
1326 = number cannot overflow u32")]
1327 fn repeat_min_overflow() {
1328 let input: Arc<str> = Arc::from("rule = { \"\"{4294967297,} }");
1329
1330 let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1331 unwrap_or_report(consume_rules_with_spans(pairs));
1332 }
1333
1334 #[test]
1335 #[should_panic(expected = "grammar error
1336
1337 --> 1:14
1338 |
13391 | rule = { \"\"{,4294967297} }
1340 | ^--------^
1341 |
1342 = number cannot overflow u32")]
1343 fn repeat_max_overflow() {
1344 let input: Arc<str> = Arc::from("rule = { \"\"{,4294967297} }");
1345
1346 let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1347 unwrap_or_report(consume_rules_with_spans(pairs));
1348 }
1349
1350 #[test]
1351 #[should_panic(expected = "grammar error
1352
1353 --> 1:14
1354 |
13551 | rule = { \"\"{,0} }
1356 | ^
1357 |
1358 = cannot repeat 0 times")]
1359 fn repeat_max_zero() {
1360 let input: Arc<str> = Arc::from("rule = { \"\"{,0} }");
1361
1362 let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1363 unwrap_or_report(consume_rules_with_spans(pairs));
1364 }
1365
1366 #[test]
1367 #[should_panic(expected = "grammar error
1368
1369 --> 1:13
1370 |
13711 | rule = { \"\"{4294967297,4294967298} }
1372 | ^--------^
1373 |
1374 = number cannot overflow u32")]
1375 fn repeat_min_max_overflow() {
1376 let input: Arc<str> = Arc::from("rule = { \"\"{4294967297,4294967298} }");
1377
1378 let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1379 unwrap_or_report(consume_rules_with_spans(pairs));
1380 }
1381
1382 #[test]
1383 #[should_panic(expected = "grammar error
1384
1385 --> 1:15
1386 |
13871 | rule = { \"\"{0,0} }
1388 | ^
1389 |
1390 = cannot repeat 0 times")]
1391 fn repeat_min_max_zero() {
1392 let input: Arc<str> = Arc::from("rule = { \"\"{0,0} }");
1393
1394 let pairs = PestParser::parse(Rule::grammar_rules, input).unwrap();
1395 unwrap_or_report(consume_rules_with_spans(pairs));
1396 }
1397
1398 #[test]
1399 fn unescape_all() {
1400 let string = r"a\nb\x55c\u{111}d";
1401
1402 assert_eq!(unescape(string), Some("a\nb\x55c\u{111}d".to_owned()));
1403 }
1404
1405 #[test]
1406 fn unescape_empty_escape() {
1407 let string = r"\";
1408
1409 assert_eq!(unescape(string), None);
1410 }
1411
1412 #[test]
1413 fn unescape_wrong_escape() {
1414 let string = r"\w";
1415
1416 assert_eq!(unescape(string), None);
1417 }
1418
1419 #[test]
1420 fn unescape_backslash() {
1421 let string = "\\\\";
1422 assert_eq!(unescape(string), Some("\\".to_owned()));
1423 }
1424
1425 #[test]
1426 fn unescape_return() {
1427 let string = "\\r";
1428 assert_eq!(unescape(string), Some("\r".to_owned()));
1429 }
1430
1431 #[test]
1432 fn unescape_tab() {
1433 let string = "\\t";
1434 assert_eq!(unescape(string), Some("\t".to_owned()));
1435 }
1436
1437 #[test]
1438 fn unescape_null() {
1439 let string = "\\0";
1440 assert_eq!(unescape(string), Some("\0".to_owned()));
1441 }
1442
1443 #[test]
1444 fn unescape_single_quote() {
1445 let string = "\\'";
1446 assert_eq!(unescape(string), Some("\'".to_owned()));
1447 }
1448
1449 #[test]
1450 fn unescape_wrong_byte() {
1451 let string = r"\xfg";
1452
1453 assert_eq!(unescape(string), None);
1454 }
1455
1456 #[test]
1457 fn unescape_short_byte() {
1458 let string = r"\xf";
1459
1460 assert_eq!(unescape(string), None);
1461 }
1462
1463 #[test]
1464 fn unescape_no_open_brace_unicode() {
1465 let string = r"\u11";
1466
1467 assert_eq!(unescape(string), None);
1468 }
1469
1470 #[test]
1471 fn unescape_no_close_brace_unicode() {
1472 let string = r"\u{11";
1473
1474 assert_eq!(unescape(string), None);
1475 }
1476
1477 #[test]
1478 fn unescape_short_unicode() {
1479 let string = r"\u{1}";
1480
1481 assert_eq!(unescape(string), None);
1482 }
1483
1484 #[test]
1485 fn unescape_long_unicode() {
1486 let string = r"\u{1111111}";
1487
1488 assert_eq!(unescape(string), None);
1489 }
1490}