wagon_parser/parser/
chunk.rs

1
2use std::fmt::Display;
3
4use crate::WrapSpannable;
5use crate::firstpass::{FirstPassResult, FirstPassState, GetReqAttributes, ReqAttributes};
6use wagon_lexer::productions::{Productions, EbnfType};
7use super::CallingArgs;
8use super::{Parse, LexerBridge, ParseResult, Tokens, Spannable, WagParseError, Ident, Rewrite, rule::Rule, rhs::Rhs, symbol::Symbol, SpannableNode, Span, ResultPeek, ResultNext};
9
10use wagon_macros::new_unspanned;
11
12#[derive(PartialEq, Debug, Eq, Hash, Clone)]
13#[new_unspanned]
14/// A chunk of an [`Rhs`].
15///
16/// Chunks are symbols in () with optionally an EBNF token following it.
17/// If there are no (), there is only 1 symbol, which may still optionally have an EBNF token.
18///
19/// # Grammar
20/// <code>[Chunk] -> [ChunkP] [EbnfType]?;</code>
21pub struct Chunk {
22    /// The actual chunk part.
23	pub chunk: ChunkP,
24    /// The possible EBNF operator.
25	pub ebnf: Option<EbnfType>,
26}
27
28#[derive(PartialEq, Debug, Eq, Hash, Clone)]
29#[new_unspanned]
30/// The actual chunk part.
31///
32/// # Grammar
33/// <span><pre>
34/// [ChunkP] -> [Symbol]
35///        |  `"("` [Chunk]* `")"`
36///        ;
37/// </pre></span>
38pub enum ChunkP {
39    /// Just a [`Symbol`].
40	Unit(SpannableNode<Symbol>),
41    /// A group of [`Chunk`]s. Enclosed with `()`.
42	Group(Vec<SpannableNode<Chunk>>)
43}
44
45/// A sort-of way to handle Rust enums being not fully first-class. This should be one of the [`Rule`] variants as a constructor.
46type RuleConstructor = fn(String, Vec<SpannableNode<Ident>>, Vec<SpannableNode<Rhs>>) -> Rule;
47
48impl Chunk {
49
50    /// Given a chunk and ebnf operator, rewrite the chunk such that it expresses the same language without the operator.
51    ///
52    /// Adds new rules to the inserted `rules` vector which are helper rules to express the language.
53	fn rewrite_ebnf(
54            ebnf: &EbnfType, // The exact Ebnf operator
55            ident: String, // The identifier for the rule we are rewriting
56            args: Vec<SpannableNode<Ident>>, // The calling args for this rule (must be propagated)
57            symbol: SpannableNode<Symbol>,  // The exact symbol in the rule we are rewriting (I.E. The `B` in S -> A B?)
58            span: &Span, // The span information for this rule
59            rule_func: RuleConstructor, // The constructor function for the rule we are rewriting (analytic or generative)
60            rules: &mut Vec<SpannableNode<Rule>> // The vector to add helper rules to.
61        ) {
62        let chunks: Vec<SpannableNode<Rhs>> = match ebnf { // Assuming the rule is S -> A{op}
63            EbnfType::Some => { // +
64                let helper_ident = format!("{ident}·p"); // Requires a second helper rule to do the plus step 
65                rules.push(SpannableNode::new(rule_func(helper_ident.clone(), args.clone(),
66                    vec![ // A·x·y·p -> A A·x·y·p | ;
67                        SpannableNode::new(Rhs {
68                            weight: None,
69                            chunks: vec![
70                                SpannableNode::new(Self {
71                                    ebnf: None,
72                                    chunk: ChunkP::Unit(symbol.clone())
73                                }, span.clone()),
74                                Self::simple_ident_spanned_with_args(&helper_ident, span.clone(), args.clone())
75                            ]
76                        }, span.clone()),
77                        Rhs::empty_spanned(span.clone())
78                    ]
79                ), span.to_owned()));
80                vec![ // A·x·y -> A A·x·y·p;
81                    SpannableNode::new(Rhs {
82                        weight: None,
83                        chunks: vec![
84                            SpannableNode::new(Self {
85                                ebnf: None,
86                                chunk: ChunkP::Unit(symbol)
87                            }, span.clone()),
88                            Self::simple_ident_spanned_with_args(&helper_ident, span.clone(), args.clone())
89                        ]
90                    }, span.clone())
91                ]
92            },
93            EbnfType::Many => { // *
94                vec![ // A·x·y -> A A·x·y | ;
95                    SpannableNode::new(Rhs {
96                        weight: None,
97                        chunks: vec![
98                            SpannableNode::new(Self {
99                                ebnf: None,
100                                chunk: ChunkP::Unit(symbol)
101                            }, span.clone()),
102                            Self::simple_ident_spanned_with_args(&ident, span.clone(), args.clone())
103                        ]
104                    }, span.clone()),
105                    Rhs::empty_spanned(span.clone())
106                ]
107            },
108            EbnfType::Maybe => { // ?
109                vec![ // A·x·y -> A | ;
110                    SpannableNode::new(Rhs {
111                        weight: None,
112                        chunks: vec![
113                            SpannableNode::new(Self {
114                                ebnf: None,
115                                chunk: ChunkP::Unit(symbol)
116                            }, span.clone())
117                        ]
118                    }, span.clone()),
119                    Rhs::empty_spanned(span.clone())
120                ]
121            },
122        };
123        rules.push(SpannableNode::new(rule_func(ident, args, chunks), span.to_owned())); // S -> A·x·y;
124	}
125
126    /// Rewrite according to the EBNF operator.
127    ///
128    /// If there is no EBNF operator, we do nothing.
129    /// If there is, we extract the chunk that it operates on and rewrite it as a new, separate rule. We do this recursively.
130    /// At the end, all EBNF operators are replaced by references to the new rules and we return a list of new rules to add to the grammar.
131	pub(crate) fn rewrite(
132            &mut self, 
133            ident: String, // The identifier for the new rule.
134            span: &Span,  // The span information for this rule
135            rule_func: RuleConstructor, // The constructor for the type of rule
136            depth: usize, // Recursive depth
137            state: &mut FirstPassState
138        ) -> FirstPassResult<(Vec<SpannableNode<Rule>>, ReqAttributes)> {
139		let mut rules = Vec::new();
140        let required_args = if let Some(e) = std::mem::take(&mut self.ebnf) { // There is an ebnf operator
141            match self {
142                Self { chunk: ChunkP::Unit(u), ..} => { // This is a singular chunk
143                    Self::rewrite_unit_ebnf(u, &e, rule_func, depth, ident, span, &mut rules)
144                },
145                Self { chunk: ChunkP::Group(g), ..} => { // This is a group of chunks in ()
146                    let real_g = std::mem::take(g);
147                    self.rewrite_group(real_g, Some(&e), rule_func, depth, ident, span, state, &mut rules)?
148                }
149            }
150        } else { // There is no ebnf operator
151            match self {
152                Self { chunk: ChunkP::Unit(u), ..} => {
153                    Self::rewrite_unit_no_ebnf(u, depth)
154                },
155                Self { chunk: ChunkP::Group(g), ..} => {
156                    let real_g = std::mem::take(g);
157                    self.rewrite_group(real_g, None, rule_func, depth, ident, span, state, &mut rules)?
158                }
159            } 
160        };
161        Ok((rules, required_args))
162	}
163
164    /// Rewrite a singular chunk with an EBNF operator (I.E. `S -> A?`)
165    fn rewrite_unit_ebnf(u: &mut SpannableNode<Symbol>, e: &EbnfType, rule_func: RuleConstructor, depth: usize, ident: String, span: &Span, rules: &mut Vec<SpannableNode<Rule>>) -> ReqAttributes {
166        let calling_args = u.to_inner().calling_args(); // Get the calling args for this symbol
167        let req_args = u.get_req_attributes(); // Get all the required attributes for this symbol
168        // We modify the rule in place by taking out the symbol (in this case `A`) and replacing
169        // it with that of the new helper rule that does the EBNF step.
170        let mut yanked = std::mem::replace(u,
171            SpannableNode::new(
172                Symbol::NonTerminal(
173                    SpannableNode::new(Ident::Unknown(ident.clone()), span.clone()), 
174                    CallingArgs::new(),
175                ), 
176                span.clone()
177            )
178        );
179        let symbol_args = yanked.to_inner_mut().rewrite().into_iter().collect();
180        if let Symbol::NonTerminal(_, v) = u.to_inner_mut() { // We get a reference to the vector of calling arguments for the new symbol (which is always a NT).
181            let main_args = if depth > 0 { // Unless this is our first pass
182                let mut as_synth = CallingArgs::with_capacity(calling_args.len()); // Convert all the calling arguments to be synthesized attributes.
183                for i in &calling_args {
184                    let s = i.to_inner().extract_string();
185                    as_synth.push(SpannableNode::new(Ident::Synth(s.to_string()), i.span()));
186                }
187                as_synth
188            } else {
189                calling_args // Otherwise, just use our old calling args
190            };
191            let _ = std::mem::replace(v, main_args); // And insert them into the vector.
192        }
193        Self::rewrite_ebnf(e, ident, symbol_args, yanked, span, rule_func, rules); // Construct the helper rules
194        req_args // Return all the attributes that are required for the original symbol.
195    }
196
197    fn rewrite_unit_no_ebnf(u: &mut SpannableNode<Symbol>, depth: usize) -> ReqAttributes {
198        let req = u.get_req_attributes(); // Simply get all the require attributes for this symbol
199        if depth > 0 {
200            u.to_inner_mut().rewrite(); // If this is a recursive call, rewrite calling attributes to synthesized.
201        }
202        req
203    }
204
205    #[allow(clippy::too_many_arguments)]
206    fn rewrite_group(
207            &mut self, 
208            g: Vec<SpannableNode<Self>>, 
209            e: Option<&EbnfType>, // Optionally, the type of Ebnf operator associated with this group
210            rule_func: RuleConstructor, 
211            depth: usize, 
212            ident: String, 
213            span: &Span, 
214            state: &mut FirstPassState, 
215            rules: &mut Vec<SpannableNode<Rule>>
216        ) -> FirstPassResult<ReqAttributes> {
217        let new_ident = if e.is_some() { // We have an ebnf operator, construct a new ident 
218            format!("{ident}··{depth}")
219        } else {
220            ident.clone()
221        };
222        let mut new_rule = SpannableNode::new( // Create a new rule which is like this grouped chunk, but has instead the group as it's chunks.
223            rule_func(
224                new_ident.clone(), 
225                CallingArgs::new(), // Should be as synthesized
226                vec![
227                    Rhs { weight: None, chunks: g }.into_spanned(span.clone())
228                ]
229            ), 
230            span.clone()
231        );
232        let (new_rules, req_args) = new_rule.rewrite(depth+1, state)?; // Do a recursive rewrite of this new rule.
233        let mut as_synth = CallingArgs::with_capacity(req_args.len()); // Create a list of all the required attributes for this new rule, but synthesized.
234        for i in &req_args {
235            let s = i.to_inner().extract_string();
236            as_synth.push(SpannableNode::new(Ident::Synth(s.to_string()), i.span())); 
237        }
238        match new_rule.to_inner_mut() {
239            Rule::Analytic(_, v, _) | Rule::Generate(_, v, _) => {
240                *v = as_synth.clone();
241            },
242            _ => {}
243        }
244        let symbol_args = if depth > 0 { // If this is a recusrive call, the args for the symbol should be synthesized
245            as_synth.clone()
246        } else { 
247            req_args.iter().cloned().collect() // Otherwise they should be as the original.
248        };
249        self.chunk = ChunkP::Unit(Symbol::simple_ident_spanned_with_args(&ident, span.clone(), symbol_args)); // Should be as expected
250        if let Some(eb) = e {
251            let symbol = Symbol::simple_ident_spanned_with_args(&new_ident, span.clone(), as_synth.clone()); // Should be as synthesized
252            Self::rewrite_ebnf(eb, ident, as_synth, symbol, span, rule_func, rules); // Should be as synthesized
253        }
254        rules.push(new_rule);
255        rules.extend(new_rules);
256        Ok(req_args)
257    }
258
259    /// Check if this chunk is a terminal
260    // pub(crate) fn is_terminal(&self) -> bool {
261    //     match &self.chunk {
262    //         ChunkP::Unit(s) => s.node.is_terminal(),
263    //         ChunkP::Group(_) => false,
264    //     }
265    // }
266
267    /// Automatically create a `Chunk` that is just a terminal. See [`Symbol::simple_terminal`].
268    #[must_use] 
269    pub fn simple_terminal(term: &str) -> Self {
270        Self { 
271            ebnf: None, 
272            chunk: ChunkP::Unit(Symbol::simple_terminal(term).into()) 
273        }
274    }
275
276    /// Automatically create a `Chunk` that is just an ident. See [`Symbol::simple_ident`].
277    #[must_use] 
278    pub fn simple_ident(ident: &str) -> Self {
279        Self {
280            ebnf: None,
281            chunk: ChunkP::Unit(Symbol::simple_ident(ident).into())
282        }
283    }
284
285    /// Automatically create a spanned `Chunk` that is just an ident.
286    // pub(crate) fn simple_ident_spanned(ident: &str, span: Span) -> SpannableNode<Self> {
287    //     Self::simple_ident_spanned_with_args(ident, span, Vec::new())
288    // }
289
290    pub(crate) fn simple_ident_spanned_with_args(ident: &str, span: Span, args: Vec<SpannableNode<Ident>>) -> SpannableNode<Self> {
291        SpannableNode::new(Self {
292            ebnf: None,
293            chunk: ChunkP::Unit(Symbol::simple_ident_spanned_with_args(ident, span.clone(), args))
294        }, span)
295    }
296
297    /// Automatically create an empty chunk.
298    #[must_use] 
299    pub fn empty() -> Self {
300        Self {
301            ebnf: None,
302            chunk: ChunkP::Unit(Symbol::Epsilon.into())
303        }
304    }
305
306    /// Automatically create a spanned empty chunk.
307    pub(crate) fn empty_spanned(span: Span) -> SpannableNode<Self> {
308        SpannableNode::new(
309            Self {
310                ebnf: None,
311                chunk: ChunkP::Unit(SpannableNode::new(Symbol::Epsilon, span.clone()))
312            }, span
313        )
314    }
315
316    /// Extract all the symbols that are in this chunk.
317    #[must_use] 
318    pub fn extract_symbols(self) -> Vec<SpannableNode<Symbol>> {
319        match self {
320            Self {chunk: ChunkP::Unit(s), ..} => vec![s],
321            Self {chunk: ChunkP::Group(g), ..} => {
322                let mut ret = Vec::with_capacity(g.len());
323                for chunk in g {
324                    ret.extend(chunk.into_inner().extract_symbols());
325                }
326                ret
327            }
328        }
329    }
330}
331
332impl Parse for Chunk {
333	fn parse(lexer: &mut LexerBridge) -> ParseResult<Self> { 
334		let chunk = match lexer.peek_result()? {
335			Tokens::ProductionToken(Productions::LPar) => {
336				let mut ret = Vec::new();
337				lexer.next();
338				while lexer.peek_result()? != &Tokens::ProductionToken(Productions::RPar) {
339					ret.push(SpannableNode::parse(lexer)?);
340				}
341				lexer.next();
342				ChunkP::Group(ret)
343			},
344			Tokens::ProductionToken(Productions::Semi) => { // Empty rule
345				return Ok(Self::empty())
346			}
347			_ => {
348				ChunkP::Unit(SpannableNode::parse(lexer)?)
349			}
350		};
351		if let Tokens::ProductionToken(Productions::Ebnf(_)) = lexer.peek_result()? {
352			if let Tokens::ProductionToken(Productions::Ebnf(x)) = lexer.next_result()? {
353				Ok(Self {chunk, ebnf: Some(x)})
354			} else { 
355    			Err(WagParseError::Fatal((lexer.span(), "Something went terribly wrong. Unwrapped non-ebnf when should have unwrapped ebnf".to_string())))  
356    		}
357		} else {
358			Ok(Self {chunk, ebnf: None})
359		}
360	}
361}
362
363impl GetReqAttributes for Chunk {
364    fn get_req_attributes(&self) -> ReqAttributes {
365        match &self.chunk {
366            ChunkP::Unit(s) => s.get_req_attributes(),
367            ChunkP::Group(g) => {
368                let mut req = ReqAttributes::new();
369                for c in g {
370                    req.extend(c.get_req_attributes());
371                }
372                req
373            },
374        }
375    }
376}
377
378use itertools::Itertools;
379impl Display for Chunk {
380    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
381        if let Some(ebnf) = &self.ebnf {
382            write!(f, "{}{ebnf}", self.chunk)
383        } else {
384            write!(f, "{}", self.chunk)
385        }
386    }
387}
388
389impl Display for ChunkP {
390    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
391        match self {
392            Self::Unit(s) => write!(f, "{s}"),
393            Self::Group(g) => write!(f, "({})", g.iter().join(" ")),
394        }
395    }
396}