wagon_parser/
parser.rs

1#[macro_use]
2/// The full WAG tree.
3pub mod wag;
4/// An assignment.
5pub mod assignment;
6/// An atom.
7pub mod atom;
8/// A chunk.
9pub mod chunk;
10/// A comparison.
11pub mod comp;
12/// `||`
13pub mod conjunct;
14/// `&&`
15pub mod disjunct;
16/// An expression.
17pub mod expression;
18/// `**`
19pub mod factor;
20/// Helper methods.
21pub mod helpers;
22/// `!`
23pub mod inverse;
24/// Metadata for the WAG.
25pub mod metadata;
26/// The right-hand-side of a rule.
27pub mod rhs;
28/// A rule.
29pub mod rule;
30/// `+` or `-`.
31pub mod sum;
32/// An individual symbol.
33pub mod symbol;
34/// `*` or `/`.
35pub mod term;
36/// A terminal.
37pub mod terminal;
38mod ident;
39
40use derivative::Derivative;
41use std::{error::Error, fmt::Display, write};
42use self::wag::Wag;
43use crate::firstpass::{WagCheckError, Rewrite};
44use crate::SpannableNode;
45
46use ordered_float::FloatIsNan;
47use wagon_ident::Ident;
48use wagon_utils::{Peek, comma_separated_with_or, string_vec, ResultNext, ResultPeek, Span, Spannable, ErrorReport};
49use wagon_lexer::{LexerBridge, Tokens, LexingError};
50
51type CallingArgs = Vec<SpannableNode<Ident>>;
52
53/// The main parser struct.
54///
55/// Uses a [`LexerBridge`] internally.
56/// # Example
57/// ```
58/// use wagon_parser::parser::Parser;
59/// 
60/// let s = "S -> A;";
61/// let mut parser = Parser::new(s);
62/// assert!(parser.parse().is_ok())
63/// ```
64pub struct Parser<'source> {
65	lexer: LexerBridge<'source>
66}
67
68impl<'source> Parser<'source> {
69	/// Given an input string, construct a parser.
70	#[must_use] pub fn new(data: &'source str) -> Self {
71		Self {
72			lexer: LexerBridge::new(data)
73		}
74	}
75
76	/// Start parsing and return a result.
77	///
78	/// # Errors
79	/// Returns a [`WagParseError`] if any error occurs during parsing.
80	pub fn parse(&mut self) -> ParseResult<Wag> {
81		Wag::parse(&mut self.lexer)
82	}
83}
84
85/// Any parse will either return the node we are trying to parse, or a [`WagParseError`].
86pub type ParseResult<T> = Result<T, WagParseError>;
87
88#[derive(Derivative, Debug)]
89#[derivative(PartialEq)]
90/// Any of the various errors that can occur during parsing.
91pub enum WagParseError {
92	/// An unexpected character was encountered.
93	Unexpected {
94		/// The span info for this character.
95		span: Span,
96		/// The token we found.
97		offender: Tokens,
98		/// String representations for the tokens we expected to see.
99		expected: Vec<String>
100	},
101	/// Something horrible happened that we do not have a specific error for.
102	Fatal((Span, String)),
103	/// A wrapper around [`WagCheckError`].
104	CheckError(WagCheckError),
105	/// A wrapper around [`LexingError`].
106	LexError(LexingError),
107	/// Expected a float but got a NaN
108	FloatError(FloatIsNan, Span),
109	/// Non-valid regex
110	RegexError(#[derivative(PartialEq="ignore")] Box<regex_automata::dfa::dense::BuildError>, Span, String), // Regex errors are big so we're allocating it on the heap
111}
112
113impl From<WagCheckError> for WagParseError {
114    fn from(value: WagCheckError) -> Self {
115        Self::CheckError(value)
116    }
117}
118
119impl From<LexingError> for WagParseError {
120	fn from(value: LexingError) -> Self {
121		Self::LexError(value)
122	}
123}
124
125impl Error for WagParseError {
126    fn source(&self) -> Option<&(dyn Error + 'static)> {
127        match self {
128            Self::Fatal(_) | Self::Unexpected { .. } => None,
129            Self::CheckError(e) => Some(e),
130            Self::LexError(e) => Some(e),
131            Self::FloatError(e, _) => Some(e),
132            Self::RegexError(e, _, _) => Some(&**e),
133        }
134    }
135}
136
137impl Display for WagParseError {
138    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
139    	let (head, msg) = self.msg();
140    	write!(f, "{head}: {msg}")
141    }
142}
143
144impl ErrorReport for WagParseError {
145	fn span(self) -> Span {
146		match self {
147		    Self::CheckError(check) => check.span(),
148		    Self::LexError(lex) => lex.span(),
149		    Self::FloatError(_, span) | Self::Fatal((span, _)) | Self::Unexpected { span, .. } => span,
150		    Self::RegexError(e, span, _) => {
151		    	match e.source() {
152		    		Some(e) => match e.source() { // NFA Build Error
153		    			Some(e) => { // Syntax Error
154		    				let e_span = match e.downcast_ref() {
155		    					Some(regex_syntax::Error::Parse(e)) => e.span(),
156		    					Some(regex_syntax::Error::Translate(e)) => e.span(),
157		    					_ => return span
158		    				};
159					    	let start = span.start + e_span.start.offset;
160					    	let end = span.start + e_span.end.offset;
161					    	Span { start, end }
162		    			},
163		    			None => span
164		    		},
165		    		None => span
166		    	}
167		    }
168		}
169	}
170
171	fn msg(&self) -> (String, String) {
172		match self {
173		    Self::Unexpected { span, offender, expected } => ("Unexpected Token".to_string(), 
174		    	format!("Encountered token {:?} at position {:?}. Expected {:#?}", offender, span, comma_separated_with_or(expected))),
175	        Self::Fatal((_, msg)) => ("Fatal Exception".to_string(), msg.to_string()),
176	        Self::CheckError(err) => err.msg(),
177    		Self::LexError(lex) => ("Lexing Error".to_string(), lex.to_string()),
178    		Self::FloatError(e, _) => ("Error converting floating point".to_string(), e.to_string()),
179    		Self::RegexError(e, _, s) => ("Regex Build Error:".to_string(), format!("Failed building pattern {s}: {e}")),
180		}
181	}
182}
183
184/// The main trait for parsing.
185///
186/// Any node that can be parsed must implement this trait.
187pub trait Parse {
188
189	/// Given a lexer, try to parse a valid instance of this node.
190	///
191	/// # Errors
192	/// Should return a [`WagParseError`] if the parsing fails.
193	fn parse(lexer: &mut LexerBridge) -> ParseResult<Self> where Self: Sized;
194
195	/// Parse multiple instances of this node, separated by a [`Tokens`].
196	///
197	/// # Errors
198	/// Should return a [`WagParseError`] if the parsing fails.
199	fn parse_sep(lexer: &mut LexerBridge, join: Tokens) -> ParseResult<Vec<Self>> where Self: Sized {
200		let mut res = Vec::new();
201		res.push(Self::parse(lexer)?);
202		while lexer.next_if(|x| x.as_ref() == Ok(&join)).is_some() {
203			res.push(Self::parse(lexer)?);
204		}
205		Ok(res)
206	}
207
208	/// Parse multiple instances of this node, separated by a [`Tokens`] end ended by a (possibly different) [`Tokens`].
209	///
210	/// # Errors
211	/// Should return a [`WagParseError`] if the parsing fails.
212	fn parse_sep_end(lexer: &mut LexerBridge, join: Tokens, end: Tokens) -> ParseResult<Vec<Self>> where Self: Sized {
213		let mut res = Vec::new();
214		res.push(Self::parse(lexer)?);
215		let mut done = false;
216		while !done {
217			if lexer.next_if(|x| x.as_ref() == Ok(&join)).is_some() {
218				if lexer.next_if(|x| x.as_ref() == Ok(&end)).is_some() {
219					done = true;
220				} else {
221					res.push(Self::parse(lexer)?);
222				}
223			} else if lexer.next_if(|x| x.as_ref() == Ok(&end)).is_some() {
224				done = true;
225			} else {
226				return Err(WagParseError::Unexpected{ offender: lexer.next_result()?, expected: string_vec![join, end], span: lexer.span()})
227			}
228		}
229		Ok(res)
230	}
231}
232
233/// Optionally parse the node.
234///
235/// Sometimes, we want to try parsing a node, but don't care if we fail in some ways, but do care in others. 
236/// In that case, we should implement this trait and return `Ok(None)` if the failure doesn't matter and `Err` if it does.
237trait ParseOption {
238
239	fn parse_option(lexer: &mut LexerBridge) -> ParseResult<Option<Self>> where Self: Sized;
240}
241
242#[cfg(test)]
243mod tests {
244
245    use wagon_lexer::math::Math;
246	use std::collections::BTreeMap;
247
248    use wagon_lexer::productions::EbnfType;
249	
250	use super::Parse;
251	use super::LexerBridge;
252	use super::sum::SumP;
253	use ordered_float::NotNan;
254	use wagon_macros::unspanned_tree;
255	
256	use super::assignment::Assignment;
257    use super::atom::Atom;
258    use super::comp::Comparison;
259    use super::conjunct::Conjunct;
260    use super::disjunct::Disjunct;
261    use super::expression::Expression;
262    use super::factor::Factor;
263    use super::inverse::Inverse;
264    use super::term::Term;
265    use super::{Parser, Wag, metadata::Metadata, rule::Rule, rhs::Rhs, chunk::Chunk, symbol::Symbol, terminal::Terminal, sum::Sum};
266    use wagon_lexer::Tokens;
267    use wagon_ident::Ident;
268    use super::chunk::ChunkP;
269    use super::string_vec;
270
271    use super::Span;
272    use pretty_assertions::assert_eq;
273
274    #[test]
275    #[allow(clippy::too_many_lines)]
276	fn test_example_wag() {
277		let input = r#"
278		include activities::other;
279		type: "conversational";
280		====================
281
282		start -> setup activity* 'stop'; /* a comment */
283		setup -> greet? getname | ;
284		greet -> ('hello' {hello = true;})+ | "good morning";
285		greet => 'greetings human!' 
286		| [0.3] "What is your name? ";
287		getname -> ;
288		"#;
289		let mut lexer = LexerBridge::new(input);
290		let output = Wag::parse(&mut lexer);
291		let expected = unspanned_tree!(Wag { 
292			metadata: Metadata {
293				includes: vec!["activities::other".to_string()],
294				mappings: BTreeMap::from([("type".to_string(), Atom::LitString("conversational".to_string()))])
295			}, 
296			grammar: vec![
297				Rule::Analytic("start".to_string(), Vec::new(), vec![
298					Rhs { 
299						weight: None,
300						chunks: vec![
301							Chunk { 
302								chunk: ChunkP::Unit(Symbol::simple_ident("setup")),
303								ebnf: None 
304							},
305							Chunk {
306								chunk: ChunkP::Unit(Symbol::simple_ident("activity")),
307								ebnf: Some(EbnfType::Many)
308							},
309							Chunk {
310								chunk: ChunkP::Unit(Symbol::simple_terminal("stop")),
311								ebnf: None
312							}
313						]
314					}
315				]),
316				Rule::Analytic("setup".to_string(), Vec::new(), vec![
317					Rhs {
318						weight: None,
319						chunks: vec![
320							Chunk {
321								chunk: ChunkP::Unit(Symbol::simple_ident("greet")),
322								ebnf: Some(EbnfType::Maybe)
323							},
324							Chunk {
325								chunk: ChunkP::Unit(Symbol::simple_ident("getname")),
326								ebnf: None
327							}
328						]
329					},
330					Rhs::empty()
331				]),
332				Rule::Analytic("greet".to_string(), Vec::new(), vec![
333					Rhs {
334						weight: None,
335						chunks: vec![
336							Chunk {
337								chunk: ChunkP::Group(vec![
338									Chunk { chunk: ChunkP::Unit(Symbol::Terminal(Terminal::LitString("hello".to_string()))), ebnf: None },
339									Chunk { chunk: ChunkP::Unit(Symbol::Assignment(vec![
340										Assignment { 
341											ident: Ident::Unknown("hello".to_string()), 
342											expr: Expression::Disjunct(
343												Disjunct(vec![
344													Conjunct(vec![
345														Inverse::Comparison(
346															Comparison {
347																sum: Sum {
348																	left: Term { 
349																		left: Factor::Primary(
350																			Atom::LitBool(true)
351																		), 
352																		cont: None 
353																	},
354																	cont: None
355																},
356																comp: None
357															}
358														)
359													])
360												])
361											)
362										}
363									])), ebnf: None
364									}
365								]),
366								ebnf: Some(EbnfType::Some)
367							}
368						]
369					},
370					Rhs {
371						weight: None,
372						chunks: vec![
373							Chunk {
374								chunk: ChunkP::Unit(Symbol::Terminal(Terminal::LitString("good morning".to_string()))),
375								ebnf: None
376							}
377						]
378					}
379				]),
380				Rule::Generate("greet".to_string(), Vec::new(), vec![
381					Rhs {
382						weight: None,
383						chunks: vec![
384							Chunk {
385								chunk: ChunkP::Unit(Symbol::Terminal(Terminal::LitString("greetings human!".to_string()))),
386								ebnf: None
387							}
388						]
389					},
390					Rhs {
391						weight: Some(
392							Expression::Disjunct(
393								Disjunct(vec![
394									Conjunct(vec![
395										Inverse::Comparison(
396											Comparison {
397												sum: Sum {
398													left: Term { 
399														left: Factor::Primary(
400															Atom::LitFloat(NotNan::new(0.3).unwrap())
401														), 
402														cont: None 
403													},
404													cont: None
405												},
406												comp: None
407											}
408										)
409									])
410								])
411							)
412						),
413						chunks: vec![
414							Chunk {
415								chunk: ChunkP::Unit(Symbol::Terminal(Terminal::LitString("What is your name? ".to_string()))),
416								ebnf: None
417							}
418						]
419					}
420				]),
421				Rule::Analytic("getname".to_string(), Vec::new(), vec![
422					Rhs::empty()
423				])
424			]
425		});
426		assert_eq!(Ok(expected), output);
427	}
428
429	#[test]
430	#[allow(clippy::too_many_lines)]
431	fn test_example_wag2() {
432		let input = r"
433		S -> {$x = 0; $y = 0;} X<$x, $y>;
434		X<*y, &x> -> 'a' {*y = *y + 1; &x = &x + 1;} B;
435		B -> 'b';
436		";
437		let mut lexer = LexerBridge::new(input);
438		let output = Wag::parse(&mut lexer);
439		let expected = unspanned_tree!(Wag {
440			metadata: Metadata { includes: vec![], mappings: BTreeMap::new() },
441			grammar: vec![
442				Rule::Analytic("S".to_string(), Vec::new(), vec![
443					Rhs {
444						weight: None,
445						chunks: vec![
446							Chunk { 
447								chunk: ChunkP::Unit(Symbol::Assignment(vec![
448									Assignment { 
449										ident: Ident::Local("x".to_string()), 
450										expr: Expression::Disjunct(
451											Disjunct(vec![
452												Conjunct(vec![
453													Inverse::Comparison(
454														Comparison {
455															sum: Sum {
456																left: Term { 
457																	left: Factor::Primary(
458																		Atom::LitNum(0)
459																	), 
460																	cont: None 
461																},
462																cont: None
463															},
464															comp: None
465														}
466													)
467												])
468											])
469										)
470									},
471									Assignment { 
472										ident: Ident::Local("y".to_string()), 
473										expr: Expression::Disjunct(
474											Disjunct(vec![
475												Conjunct(vec![
476													Inverse::Comparison(
477														Comparison {
478															sum: Sum {
479																left: Term { 
480																	left: Factor::Primary(
481																		Atom::LitNum(0)
482																	), 
483																	cont: None 
484																},
485																cont: None
486															},
487															comp: None
488														}
489													)
490												])
491											])
492										)
493									}
494								])),
495								ebnf: None, 
496							},
497							Chunk {
498								chunk: ChunkP::Unit(Symbol::NonTerminal(Ident::Unknown("X".to_string()), vec![Ident::Local("x".to_string()), Ident::Local("y".to_string())])),
499								ebnf: None,
500							}
501						]
502					}
503				]),
504				Rule::Analytic("X".to_string(), vec![Ident::Inherit("y".to_string()), Ident::Synth("x".to_string())], vec![
505					Rhs { 
506						weight: None,
507						chunks: vec![
508							Chunk::simple_terminal("a"),
509							Chunk { 
510								chunk: ChunkP::Unit(Symbol::Assignment(vec![
511									Assignment { 
512										ident: Ident::Inherit("y".to_string()), 
513										expr: Expression::Disjunct(
514											Disjunct(vec![
515												Conjunct(vec![
516													Inverse::Comparison(
517														Comparison {
518															sum: Sum {
519																left: Term { 
520																	left: Factor::Primary(
521																		Atom::Ident(Ident::Inherit("y".to_string()))
522																	), 
523																	cont: None 
524																},
525																cont: Some(SumP { 
526																	op: crate::parser::sum::Op1::Add, 
527																	right: Term {
528																		left: Factor::Primary(
529																			Atom::LitNum(1)
530																		), 
531																		cont: None 
532																	}, 
533																	cont: None
534																})
535															},
536															comp: None
537														}
538													)
539												])
540											])
541										)
542									},
543									Assignment { 
544										ident: Ident::Synth("x".to_string()), 
545										expr: Expression::Disjunct(
546											Disjunct(vec![
547												Conjunct(vec![
548													Inverse::Comparison(
549														Comparison {
550															sum: Sum {
551																left: Term { 
552																	left: Factor::Primary(
553																		Atom::Ident(Ident::Synth("x".to_string()))
554																	), 
555																	cont: None 
556																},
557																cont: Some(SumP { 
558																	op: crate::parser::sum::Op1::Add, 
559																	right: Term {
560																		left: Factor::Primary(
561																			Atom::LitNum(1)
562																		), 
563																		cont: None 
564																	}, 
565																	cont: None
566																})
567															},
568															comp: None
569														}
570													)
571												])
572											])
573										)
574									}
575								])),
576								ebnf: None, 
577							},
578							Chunk::simple_ident("B")
579						] 
580					}
581				]),
582				Rule::Analytic("B".to_string(), Vec::new(), vec![
583					Rhs::simple_terminal("b")
584				])
585			]
586		});
587		assert_eq!(Ok(expected), output);
588	}
589
590	#[test]
591	fn test_simple_empty_alt() {
592		let input = "S -> 'a' | ;";
593		let mut lexer = LexerBridge::new(input);
594		let output = Wag::parse(&mut lexer);
595		let expected = unspanned_tree!(Wag {
596		    metadata: Metadata { includes: vec![], mappings: BTreeMap::new() },
597		    grammar: vec![
598		    	Rule::Analytic("S".to_string(), Vec::new(), vec![
599		    		Rhs { 
600		    			weight: None,
601		    			chunks: vec![Chunk::simple_terminal("a")] 
602		    		},
603		    		Rhs::empty()
604		    	])
605		    ],
606		});
607		assert_eq!(Ok(expected), output);
608	}
609
610	#[test]
611	fn test_parse_error() {
612		let input = r#"
613		include activities::other;
614		type: conversational
615		==========================
616
617		start -> setup activity* 'stop'; /* a comment */
618		setup -> greet? getname;
619		greet -> ('hello' {hello = true;})+ | "good morning";
620		greet => 'greetings human!' 
621		| [0.3] "What is your name? ";
622		"#;
623		let mut parser = Parser::new(input);
624		let output = parser.parse();
625		let expected = Err(crate::parser::WagParseError::Unexpected { 
626			span: Span {start: 55, end: 57}, 
627			offender: Tokens::MathToken(Math::Eq), 
628			expected: string_vec![
629				Tokens::MathToken(Math::Semi)
630			]
631		});
632		assert_eq!(expected, output);
633	}
634
635}