wagon_parser/parser/rule.rs
1use std::fmt::Display;
2
3use super::{helpers::{check_semi, between_sep}, LexerBridge, Parse, ParseResult, Peek, ResultNext, Rewrite, SpannableNode, Spannable, Tokens, WagParseError};
4use wagon_lexer::productions::{ImportType, Productions};
5use crate::firstpass::{FirstPassResult, FirstPassState, ReqAttributes};
6
7use super::rhs::Rhs;
8use super::Ident;
9
10use wagon_macros::match_error;
11
12use wagon_macros::new_unspanned;
13
14#[derive(PartialEq, Debug, Eq, Hash)]
15#[new_unspanned]
16/// A single rule in the WAG grammar.
17///
18/// # Grammar
19/// <span><pre>
20/// [Rule] -> [Ident] RuleType;
21/// RuleType -> RealRule | ImportRule;
22/// RealRule -> NTArgs? RuleArrow [Rhs];
23/// RuleArrow -> `"->" | "=>"`;
24/// ImportRule -> ImportArrow Identifier;
25/// ImportArrow -> `"<-" | "<=" | "<<" | "</"`;
26///
27/// NTArgs -> "<" AttrIdentifierList ">";
28/// AttrIdentifierList -> [Ident] "," AttrIdentifierList | [Ident];
29/// </pre></span>
30pub enum Rule {
31 /// An analytic rule (`->`).
32 Analytic(String, Vec<SpannableNode<Ident>>, Vec<SpannableNode<Rhs>>),
33 /// A generative rule (`=>`).
34 Generate(String, Vec<SpannableNode<Ident>>, Vec<SpannableNode<Rhs>>),
35 /// An import rule (`<-/=/<`).
36 Import(String, ImportType, String),
37 /// An import exclude rule (`</`).
38 Exclude(String, Vec<SpannableNode<String>>)
39}
40
41impl Parse for Rule {
42 fn parse(lexer: &mut LexerBridge) -> ParseResult<Self> {
43 let ident = match_error!(match lexer.next_result()? {
44 Tokens::ProductionToken(Productions::Identifier(wagon_ident::Ident::Unknown(s))) => Ok(s),
45 })?;
46 let args = if lexer.peek() == Some(&Ok(Tokens::ProductionToken(Productions::Lt))) {
47 between_sep(lexer, &Tokens::ProductionToken(Productions::Lt), &Tokens::ProductionToken(Productions::Gt), Tokens::ProductionToken(Productions::Comma))?
48 } else {
49 Vec::new()
50 };
51 let resp = match_error!(match lexer.next_result()? {
52 Tokens::ProductionToken(Productions::Produce) => {
53 let rhs = SpannableNode::parse_sep(lexer, Tokens::ProductionToken(Productions::Alternative))?;
54 Ok(Self::Analytic(ident, args, rhs))
55 },
56 Tokens::ProductionToken(Productions::Generate) => {
57 let rhs = SpannableNode::parse_sep(lexer, Tokens::ProductionToken(Productions::Alternative))?;
58 Ok(Self::Generate(ident, args, rhs))
59 },
60 Tokens::ProductionToken(Productions::Import(i)) => {
61 match i {
62 ImportType::Basic | ImportType::Full | ImportType::Recursive => {
63 match lexer.next_result()? {
64 Tokens::ProductionToken(Productions::Identifier(wagon_ident::Ident::Unknown(s))) => {
65 Ok(Self::Import(ident, i, s))
66 },
67 error => Err(WagParseError::Unexpected {
68 span: lexer.span(),
69 offender: error,
70 expected: vec![Tokens::ProductionToken(Productions::Identifier(Ident::default())).to_string()]
71 })
72 }
73 }
74 ImportType::Exclude => {
75 Ok(Self::Exclude(ident, SpannableNode::parse_sep(lexer, Tokens::ProductionToken(Productions::Additional))?))
76 }
77 }
78 }
79 });
80 check_semi(lexer)?;
81 resp
82 }
83}
84
85/*
86Ident format:
87
88{BASE}·{alt}·{chunk} - Default
89 ·p - Helper for '+'
90 [··{depth}]+ - Deeper layers of recursive EBNF
91 - - Default again but at this layer
92
93*/
94/// Convert every [`Chunk`](super::chunk::Chunk) inside a group or with an ebnf operator into it's own separate rule.
95///
96/// # Rewrite rules
97/// ## `?`
98/// | original | rewrite |
99/// |:----------:|:-------------------------------:|
100/// | `A -> B?;` | `A -> A·0·0;` `A·0·0 -> B \| ;` |
101/// ## `*`
102/// | original | rewrite |
103/// |:----------:|:-------------------------------------:|
104/// | `A -> B*;` | `A -> A·0·0;` `A·0·0 -> B A·0·0 \| ;` |
105/// ## `+`
106/// | original | rewrite |
107/// |:----------:|:---------------------------------------------------------------:|
108/// | `A -> B+;` | `A -> A·0·0;` `A·0·0 -> B A·0·0·p;` `A·0·0·p -> B A·0·0·p \| ;` |
109/// ## `()` (groups)
110/// | original | rewrite |
111/// |:-----------:|:---------------------------:|
112/// | `A -> (B);` | `A -> A·0·0;` `A·0·0 -> B;` |
113///
114/// # Helper rules and the meaning of ·
115/// · is a character allowed in identifiers in Rust because of backwards compatible ISO reasons. It is thus
116/// a really useful reserved character to make sure no collisions occur with user input.
117/// When factoring out EBNF or groups, we want to create helper rules and use the · character to denote the nature of each helper rule.
118///
119/// ## Format of helper rules
120/// The format of a helper rule is `{BASE}·{alt}·{chunk}`. {BASE} is the identifier for the rule we are creating
121/// the helper for. {alt} is which alternative of the rule we are creating it for, and chunk is the number of the chunk we are rewriting.
122///
123/// Additionally, we optionally append `·p` specifically as an additional helper rule for `+` operators.
124///
125/// ## Recursion
126/// Because of groups, it is possible that we need to recursively keep rewriting chunks/rules. The recursion depth is denoted by ··{depth}
127/// and when you see it, you know we've gone a layer deeper.
128///
129/// ## Attributes
130/// Any attributes that are passed to a chunk with an ebnf, or that are using inside a grouped chunk, must be available in the helper rules.
131/// Additionally, any modifications made to these attributes must be passed upwards as if the helper rules were completely inlined. As such,
132/// we pass all the required attributes encountered in the chunks as they were originally written to the first helper rule. From them on, we
133/// treat each attribute as synthesized, so that any changes will be properly passed up to the original calling rule.
134impl Rewrite<(Vec<Self>, ReqAttributes)> for SpannableNode<Rule> {
135 fn rewrite(&mut self, depth: usize, state: &mut FirstPassState) -> FirstPassResult<(Vec<Self>, ReqAttributes)> {
136 match &mut self.node {
137 Rule::Analytic(s, args, rhs) => {
138 let mut rules = Vec::new(); // All the new rules we create because of rewriting.
139 let mut req_attrs = ReqAttributes::new(); // All the attributes required for this rule.
140 for (i, alt) in rhs.iter_mut().enumerate() {
141 for (j, chunk) in alt.node.chunks.iter_mut().enumerate() {
142 let ident = format!("{s}·{i}·{j}"); // Construct an identifier for this helper rule.
143 let (chunk_node, span) = chunk.deconstruct();
144 let (new_rules, new_attrs) = chunk_node.rewrite(ident, span, Rule::Analytic, depth, state)?; // Rewrite the chunk.
145 rules.extend(new_rules); // Add any new rules we encountered to the list.
146 req_attrs.extend(new_attrs);
147 }
148 }
149 for arg in args {
150 state.add_parameter(s.clone(), arg.clone())?;
151 }
152 Ok((rules, req_attrs))
153 },
154 Rule::Generate(s, args, rhs) => {
155 let mut rules = Vec::new();
156 let mut req_attrs = ReqAttributes::new();
157 for (i, alt) in rhs.iter_mut().enumerate() {
158 for (j, chunk) in alt.node.chunks.iter_mut().enumerate() {
159 let ident = format!("{s}·{i}·{j}");
160 let (chunk_node, span) = chunk.deconstruct();
161 let (new_rules, new_attrs) = chunk_node.rewrite(ident, span, Rule::Analytic, depth, state)?;
162 rules.extend(new_rules);
163 req_attrs.extend(new_attrs);
164 }
165 }
166 for arg in args {
167 state.add_parameter(s.clone(), arg.clone())?;
168 }
169 Ok((rules, req_attrs))
170 },
171 Rule::Import(..) => todo!(),
172 Rule::Exclude(..) => todo!(),
173 }
174 }
175}
176
177use wagon_utils::comma_separated;
178use itertools::Itertools;
179impl Display for Rule {
180 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
181 match self {
182 Self::Analytic(s, args, rhs) => {
183 if args.is_empty() {
184 writeln!(f, "{s} -> {};", rhs.iter().join(" | "))
185 } else {
186 writeln!(f, "{s}<{}> -> {};", comma_separated(args), rhs.iter().join(" | "))
187 }
188 },
189 Self::Generate(s, args, rhs) => {
190 if args.is_empty() {
191 writeln!(f, "{s} => {};", rhs.iter().join(" | "))
192 } else {
193 writeln!(f, "{s}<{}> => {};", comma_separated(args), rhs.iter().join(" | "))
194 }
195 },
196 Self::Import(s1, imp, s2) => {
197 writeln!(f, "{s1} {imp} {s2};")
198 },
199 Self::Exclude(s, ex) => {
200 writeln!(f, "{s} </ {}", ex.iter().join(" & "))
201 },
202 }
203 }
204}