spindle_lib/
ir.rs

1//! Intermediary representation (ir) for a parsed grammar.
2
3use peg::parser;
4
5/// The maxium repititions in `+` and `*` rules.
6/// This can be overriden with explicit range rules,
7/// e.g. `"3"{0,2345678}` repeats up to 2345678 `"3"`s.
8pub const MAX_REPEAT: u32 = 255;
9
10parser! {
11/// This parser is not meant to efficient, since parsing the grammar is not meant to be
12/// on the hot path (unlike generating expressions).
13pub grammar bnf() for str {
14    pub rule expr() -> Vec<(String, Expr)>
15        = l:(definition() ++ _)
16
17    rule definition() -> (String, Expr)
18        = _ s:reference() _ ":" _ e:branch() _ ";" _ { (s, e) }
19
20    rule branch() -> Expr
21        = or()
22        / branch_inner()
23
24    rule branch_inner() -> Expr
25        = _ x:concat() _ { x }
26        / _ x:concat_inner() _ { x }
27
28    rule concat_inner() -> Expr
29        = rep()
30        / choice()
31        / expression()
32
33    rule expression() -> Expr
34        = terminal()
35        / group()
36
37    rule terminal() -> Expr
38        = regex()
39        / bytes()
40        / s:reference() { Expr::Reference(s) }
41        / literal()
42
43    rule group() -> Expr
44        = "(" _ r:branch() _ ")" { Expr::Group(Box::new(r)) }
45
46    rule or() -> Expr
47        = l:(branch_inner() **<2,64> "|") { Expr::Or(l) }
48
49    rule rep() -> Expr
50        = g:expression() _ "*" { Expr::Repetition(Box::new(g), 0, MAX_REPEAT) }
51        / g:expression() _ "+" { Expr::Repetition(Box::new(g), 1, MAX_REPEAT) }
52        / g:expression() _ "{" _ n:$(['0'..='9']+) _ "}" {?
53            n.parse().map_or(Err("u32"), |reps| Ok(Expr::Repetition(Box::new(g), reps, reps)))
54        }
55        / g:expression() _ "{" _ n1:$(['0'..='9']+) _ "," _ n2:$(['0'..='9']+) _ "}" {?
56            let min_reps = n1.parse().or(Err("u32"))?;
57            let max_reps = n2.parse().or(Err("u32"))?;
58            match min_reps < max_reps {
59                true => Ok(Expr::Repetition(Box::new(g), min_reps, max_reps)),
60                false => Err("Min repetitions cannot be larger than max repetitions"),
61            }
62
63        }
64
65    rule choice() -> Expr
66        = g:expression() _ "?" { Expr::Optional(Box::new(g)) }
67
68    rule concat() -> Expr
69        = l:(concat_inner() **<2,64> __) { Expr::Concat(l) }
70
71    rule reference() -> String
72        = s:$(['a'..='z' | 'A'..='Z' | '_' | '0'..='9']+) { s.to_string() }
73
74    rule literal() -> Expr
75        = s:string() { Expr::Literal(s) }
76
77    rule regex() -> Expr
78        = "r" s:string() { Expr::Regex(s) }
79
80    rule _ = [' ' | '\n' | '\t']*
81    rule __ = [' ' | '\n' | '\t']+
82
83    rule string() -> String
84        = "\"" s:string_inner() "\"" { s }
85
86    rule bytes() -> Expr
87        = "[" s:bytes_inner() "]" { Expr::Bytes(s) }
88
89    rule bytes_inner() -> Vec<u8>
90        = l:(byte_ws() ** ",") { l }
91
92    rule byte_ws() -> u8
93        = _ b:byte() _ { b }
94
95    rule byte() -> u8
96        = n:$(['0'..='9']+) {? n.parse().or(Err("valid u8")) }
97
98    // checks for certain escape characters (todo, probably isn't a complete list)
99    rule escape_char() -> char
100        = "\\\"" { '"' }
101        / "\\n" { '\n' }
102        / "\\t" { '\t' }
103        / "\\\0" { '\0' }
104        / "\\u{" value:$(['0'..='9' | 'a'..='f' | 'A'..='F']+) "}" {?
105              u32::from_str_radix(value, 16).ok().and_then(char::from_u32).ok_or("valid unicode code point")
106          }
107        / expected!("valid escape sequence")
108
109    rule string_inner() -> String
110        = c:escape_char() s:string_inner() {
111            let mut x = c.to_string();
112            x.push_str(&s);
113            x
114        }
115        / c:[^'"'] s:string_inner() {
116            let mut x = c.to_string();
117            x.push_str(&s);
118            x
119        }
120        / "" { String::new() }
121}}
122
123#[derive(Debug)]
124pub enum Expr {
125    Or(Vec<Expr>),
126    Concat(Vec<Expr>),
127    Optional(Box<Expr>),
128    Repetition(Box<Expr>, u32, u32),
129    Reference(String),
130    Literal(String),
131    Regex(String),
132    Bytes(Vec<u8>),
133    Group(Box<Expr>),
134}