pest_railroad/
lib.rs

1use std::mem;
2
3use pest::{iterators::Pairs, Parser};
4use pest_derive::Parser;
5use railroad::{
6    Choice, Comment, Diagram, Empty, LabeledBox, Node, NonTerminal, Optional, Repeat, Sequence,
7    SimpleEnd, SimpleStart, Terminal, VerticalGrid,
8};
9
10#[derive(Parser)]
11#[grammar = "grammar.pest"]
12struct PestParser;
13
14fn make_repeat(pairs: Pairs<Rule>, old_term: Box<dyn Node>) -> Box<dyn Node> {
15    let mut comma_seen = false;
16    let mut min_repeat = None;
17    let mut max_repeat = None;
18
19    for repeat in pairs {
20        match repeat.as_rule() {
21            Rule::opening_brace => {
22                // No op - nothing to do
23            }
24            Rule::closing_brace if !comma_seen => {
25                // repeat_exact - max same as min
26                max_repeat = min_repeat;
27            }
28            Rule::closing_brace if comma_seen => {
29                // repeat_max - min is 0
30                if min_repeat.is_none() {
31                    min_repeat = Some(0);
32                }
33                // repeat_min - max is u32::MAX
34                if max_repeat.is_none() {
35                    max_repeat = Some(u32::MAX);
36                }
37            }
38            Rule::number => {
39                if comma_seen {
40                    // Panic safety: Guaranteed to be numbers from grammar
41                    max_repeat = Some(repeat.as_str().parse().expect("number"));
42                } else {
43                    // Panic safety: Guaranteed to be numbers from grammar
44                    min_repeat = Some(repeat.as_str().parse().expect("number"));
45                }
46            }
47            Rule::comma => {
48                comma_seen = true;
49            }
50            rule => unreachable!("Unexpected rule in repeat: {rule:?}"),
51        }
52    }
53
54    match (min_repeat, max_repeat) {
55        (Some(min), Some(max)) => {
56            // Figure out whether repeat should show that node must be traversed or not
57            let repeat = if min > 0 {
58                // One or more times
59                Box::new(Repeat::new(old_term, Box::new(Empty) as Box<dyn Node>)) as Box<dyn Node>
60            } else {
61                // Zero or more times
62                Box::new(Choice::new(vec![
63                    Box::new(Empty) as Box<dyn Node>,
64                    Box::new(Repeat::new(old_term, Empty)),
65                ]))
66            };
67
68            let label = if min == max {
69                format!("Repeat {min} time(s)")
70            } else if max == u32::MAX {
71                format!("Repeat {min} or more times")
72            } else if min == 0 {
73                format!("Repeat at most {max} time(s)")
74            } else {
75                format!("Repeat between {min} and {max} time(s)")
76            };
77
78            Box::new(LabeledBox::new(repeat, Comment::new(label)))
79        }
80        _ => unreachable!("Min and max not set"),
81    }
82}
83
84fn make_expr(pairs: Pairs<Rule>) -> (Box<dyn Node>, Vec<String>) {
85    let mut unsupported_warnings = Vec::new();
86
87    // Rule choices (or those without a choice operator this will be a single element)
88    let mut choices: Vec<Vec<Box<dyn Node>>> = Vec::new();
89    // Current choice
90    let mut curr_choice: Vec<Box<dyn Node>> = Vec::new();
91
92    for pair in pairs {
93        match pair.as_rule() {
94            Rule::term => {
95                let term_pairs = pair.into_inner();
96
97                // We might have a prefix and/or postfix operator, so store the term until we are sure
98                let mut term: Option<Box<dyn Node>> = None;
99                let mut positive_lookahead = 0;
100                let mut negative_lookahead = 0;
101
102                for term_pair in term_pairs {
103                    match term_pair.as_rule() {
104                        Rule::identifier => {
105                            term = Some(Box::new(NonTerminal::new(term_pair.as_str().into())));
106                        }
107                        // TODO: Is a carot sufficient for documenting insensitive strings?
108                        Rule::string | Rule::insensitive_string | Rule::range => {
109                            term = Some(Box::new(Terminal::new(term_pair.as_str().into())));
110                        }
111                        Rule::opening_paren | Rule::closing_paren => {
112                            // No op - nothing to do
113                        }
114                        Rule::expression => {
115                            let (expr, warnings) = make_expr(term_pair.into_inner());
116                            if !warnings.is_empty() {
117                                unsupported_warnings.extend(warnings);
118                            }
119                            term = Some(expr);
120                        }
121                        Rule::repeat_operator => {
122                            // Term would only not be populated if an unsupported rule was encountered
123                            if let Some(old_term) = term {
124                                term = Some(Box::new(Choice::new(vec![
125                                    Box::new(Empty) as Box<dyn Node>,
126                                    Box::new(Repeat::new(old_term, Empty)),
127                                ])));
128                            }
129                        }
130                        Rule::repeat_once_operator => {
131                            // Term would only not be populated if an unsupported rule was encountered
132                            if let Some(old_term) = term {
133                                term = Some(Box::new(Repeat::new(old_term, Empty)));
134                            }
135                        }
136                        Rule::optional_operator => {
137                            // Term would only not be populated if an unsupported rule was encountered
138                            if let Some(old_term) = term {
139                                term = Some(Box::new(Optional::new(old_term)));
140                            }
141                        }
142                        Rule::positive_predicate_operator => {
143                            positive_lookahead += 1;
144                        }
145                        Rule::negative_predicate_operator => {
146                            negative_lookahead += 1;
147                        }
148                        Rule::repeat_exact
149                        | Rule::repeat_min
150                        | Rule::repeat_max
151                        | Rule::repeat_min_max => {
152                            // Term would only not be populated if an unsupported rule was encountered
153                            if let Some(old_term) = term {
154                                term = Some(make_repeat(term_pair.into_inner(), old_term));
155                            }
156                        }
157                        _ => {
158                            unsupported_warnings.push(format!(
159                                "### Unsupported rule in term: {:#?} ###",
160                                term_pair.as_rule()
161                            ));
162                        }
163                    }
164                }
165
166                // Term would only not be populated if an unsupported rule was encountered
167                if let Some(mut term) = term {
168                    // TODO: I don't really understand what multiple lookaheads would mean
169                    // (the stress test has double negative predicates. I am assume they cancel each other out?)
170                    if negative_lookahead > 0 && negative_lookahead % 2 != 0 {
171                        term = Box::new(LabeledBox::new(
172                            term,
173                            Comment::new("Lookahead: Can't match".into()),
174                        ));
175                    } else if positive_lookahead > 0 && positive_lookahead % 2 != 0 {
176                        term = Box::new(LabeledBox::new(
177                            term,
178                            Comment::new("Lookahead: Must match".into()),
179                        ));
180                    }
181                    curr_choice.push(term);
182                }
183            }
184            Rule::sequence_operator => {
185                // No op - nothing to do
186            }
187            Rule::choice_operator => {
188                // Store the current sequence and start a new one
189                choices.push(mem::take(&mut curr_choice))
190            }
191            rule => unreachable!("Unexpected rule in expression: {rule:?}"),
192        }
193    }
194
195    // Ensure that the last sequence is stored
196    if !curr_choice.is_empty() {
197        choices.push(curr_choice);
198    }
199
200    // Perform a custom flatten of our choices
201    let mut choices: Vec<_> = choices
202        .into_iter()
203        .map(|mut seq| {
204            match seq.len() {
205                // This can only happpen if rule starts with a choice operator
206                // TODO: Is empty the right choice here? What is the actual behavior of starting with a choice operator?
207                0 => Box::new(Empty),
208                // If we only have one element, return it directly
209                1 => seq.remove(0),
210                // Otherwise wrap in a sequence
211                _ => Box::new(Sequence::new(seq)),
212            }
213        })
214        .collect();
215
216    // If we only have one choice, return it directly
217    if choices.len() == 1 {
218        (choices.remove(0), unsupported_warnings)
219    } else {
220        (Box::new(Choice::new(choices)), unsupported_warnings)
221    }
222}
223
224fn make_rule(identifier: &str, pairs: Pairs<Rule>) -> (Box<dyn Node>, Vec<String>) {
225    let mut unsupported_warnings = Vec::new();
226
227    // Identifier stacked on top of a sequence
228    let mut grid: Vec<Box<dyn Node>> = Vec::with_capacity(2);
229    // Our rule sequence
230    let mut seq: Vec<Box<dyn Node>> = Vec::with_capacity(pairs.len());
231
232    let mut rule_ident = String::with_capacity(64);
233    rule_ident.push_str(identifier);
234
235    for pair in pairs {
236        match pair.as_rule() {
237            Rule::assignment_operator => {
238                // No op - nothing to do
239            }
240            Rule::silent_modifier => {
241                rule_ident.push_str(" (silent)");
242            }
243            Rule::atomic_modifier => {
244                rule_ident.push_str(" (atomic)");
245            }
246            Rule::compound_atomic_modifier => {
247                rule_ident.push_str(" (compound atomic)");
248            }
249            Rule::non_atomic_modifier => {
250                rule_ident.push_str(" (non-atomic)");
251            }
252            Rule::opening_brace => {
253                grid.push(Box::new(Comment::new(mem::take(&mut rule_ident))));
254                seq.push(Box::new(SimpleStart));
255            }
256            Rule::expression => {
257                let (expr, warnings) = make_expr(pair.into_inner());
258                if !warnings.is_empty() {
259                    unsupported_warnings.extend(warnings);
260                }
261
262                seq.push(expr);
263            }
264            Rule::closing_brace => {
265                seq.push(Box::new(SimpleEnd));
266            }
267            rule => unreachable!("Unexpected rule in grammar rule: {rule:?}"),
268        }
269    }
270
271    grid.push(Box::new(Sequence::new(seq)));
272    (Box::new(VerticalGrid::new(grid)), unsupported_warnings)
273}
274
275/// Creates a railroad (aka syntax) diagram from the grammar contained in the input string. It also returns a list of unsupported warnings for the pest rules that aren't supported.
276pub fn generate_diagram(
277    input: &str,
278) -> Result<(Diagram<VerticalGrid<Box<dyn Node>>>, Vec<String>), pest::error::Error<Rule>> {
279    let mut unsupported_warnings = Vec::new();
280
281    let pairs = PestParser::parse(Rule::grammar_rules, input)?;
282
283    let mut nodes: Vec<Box<dyn Node>> = Vec::with_capacity(pairs.len());
284
285    // Loop over all top level elements
286    for pair in pairs {
287        match pair.as_rule() {
288            // We only process grammar rules
289            Rule::grammar_rule => {
290                let mut rule_pairs = pair.into_inner();
291
292                // Panic safety: We know that the first element is either a line doc or an identifier from grammar
293                let first_pair = rule_pairs.next().expect("line doc or identifier");
294
295                match first_pair.as_rule() {
296                    Rule::line_doc => {
297                        nodes.push(Box::new(Comment::new(first_pair.as_str().into())));
298                    }
299                    Rule::identifier => {
300                        let (rule, warnings) = make_rule(first_pair.as_str(), rule_pairs);
301                        if !warnings.is_empty() {
302                            unsupported_warnings.extend(warnings);
303                        }
304                        nodes.push(rule);
305                    }
306                    rule => unreachable!("Unexpected first rule in grammar rule: {rule:?}"),
307                }
308            }
309            Rule::grammar_doc => {
310                // No op - unsupported
311            }
312            Rule::EOI => {
313                // No op - nothing to do
314            }
315            rule => unreachable!("Unexpected rule in top level grammar: {rule:?}"),
316        }
317    }
318
319    let root = VerticalGrid::new(nodes);
320    let diagram = Diagram::with_default_css(root);
321    Ok((diagram, unsupported_warnings))
322}