mini_builder_rs/parser/
mod.rs

1//! Converts a sequence of tokens into an AST (abstract syntax tree).
2//!
3//! The AST defines how tokens are interpreted and how they relate to others.
4//! For example consider the following tokens: `1 + 2 * 3`.
5//! The parser will use its hardcoded rules of order of operations to create
6//! the following tree:
7//!           +
8//!          / \
9//!         1   *
10//!            / \
11//!           2   3
12//! The same principles apply to more complicated examples as well.
13//! Keep in mind that the AST isn't necessarily a binary tree!
14//!
15//! # [Node]
16//! The nodes of the AST are defined using the [Node] struct. There are three
17//! types of nodes:
18//! - Source - The source node holds a string slice to the source text (the
19//!            text that was used to create the tokens).
20//! - Expression - The expression node holds an expression - a sequence of
21//!                operations that return a [Value].
22//! - Control Flow - Nodes that decide which nodes are executed and which are
23//!                  not.
24//!
25//! # Example
26//! ```rust
27//! use mini_builder_rs::{
28//!     tokenizer::{Tokenizer, TokenizerOptions},
29//! 	parser::Parser,
30//! };
31//!
32//! let source = "is 1 bigger than 2? {{ 1 > 2 ? 'yes' : 'no' }}";
33//! let tokens = Tokenizer::new(source, TokenizerOptions::default())
34//! 	.tokenize()
35//! 	.unwrap();
36//! let tokens = tokens.as_slice();
37//! let nodes = Parser::new(tokens).parse().unwrap();
38//! // nodes is the root of the parsed AST
39//! ```
40
41pub mod expression;
42pub mod node;
43pub mod parser_error;
44
45use crate::{
46	tokenizer::token::{Token, TokenType},
47	value::Value,
48};
49
50use self::{
51	expression::{Expression, Expressions, NamedExpressions},
52	node::{Node, Nodes},
53	parser_error::ParserError,
54};
55
56type ExpressionResult<'a> = Result<Expression<'a>, ParserError>;
57type NodeResult<'a> = Result<Node<'a>, ParserError>;
58pub type NodesResult<'a> = Result<Nodes<'a>, ParserError>;
59
60/// Creates an AST from a sequence of tokens
61pub struct Parser<'a, 'b> {
62	tokens: &'b [Token<'a>],
63	i: usize,
64}
65
66impl<'a, 'b> Parser<'a, 'b> {
67	pub fn new(tokens: &'b [Token<'a>]) -> Self {
68		Self { tokens, i: 0 }
69	}
70
71	/// Produces a `ParserError::UnexpectedToken` from the current token
72	fn unexpected_token_error(&self) -> ParserError {
73		let token = &self.tokens[self.i];
74		ParserError::UnexpectedToken(token.location, token.tt)
75	}
76
77	/// Consumes and returns a token. If there are non returns None.
78	fn have_token(&mut self) -> Option<&'b Token<'a>> {
79		let ret = self.tokens.get(self.i);
80		self.advance();
81		ret
82	}
83
84	/// Returns the type of the next token, if there are no more, returns None
85	fn peek_type(&self) -> Option<TokenType> {
86		self.tokens.get(self.i).map(|t| t.tt.clone())
87	}
88
89	/// Consumes the next token only if it is of the type `tt`
90	fn have_specific_type(&mut self, tt: TokenType) -> bool {
91		if let Some(peeked_tt) = self.peek_type() {
92			if peeked_tt == tt {
93				self.advance();
94				true
95			} else {
96				false
97			}
98		} else {
99			false
100		}
101	}
102
103	/// Returns an error if the next token is not of the expected type
104	fn expect_type(&self, tt: TokenType) -> Result<(), ParserError> {
105		if let Some(peeked_tt) = self.peek_type() {
106			if peeked_tt == tt {
107				Ok(())
108			} else {
109				Err(ParserError::ExpectedButFound(
110					format!("{:?}", tt),
111					format!("{:?}", peeked_tt),
112				))
113			}
114		} else {
115			Err(ParserError::ExpectedButReachedEnd(format!("{:?}", tt)))
116		}
117	}
118
119	/// Consumes the next token and returns its content
120	fn have_content(&mut self) -> &'a str {
121		self.i += 1;
122		self.tokens[self.i - 1].content
123	}
124
125	/// Returns the token's type, if reached the end returns an error
126	fn peek_type_expect(&self) -> Result<TokenType, ParserError> {
127		self.peek_type()
128			.ok_or(ParserError::ExpectedButReachedEnd(format!("")))
129	}
130
131	/// Consumes the next token
132	fn advance(&mut self) {
133		self.i += 1;
134	}
135
136	/// Helper function for parsing binary expression
137	fn parse_binary_expression<F, G>(
138		&mut self,
139		op: TokenType,
140		parse_sub_expression: F,
141		package: G,
142	) -> ExpressionResult<'a>
143	where
144		F: Fn(&mut Self) -> ExpressionResult<'a>,
145		G: Fn(Expressions) -> Expression,
146	{
147		// parse the current seen expression
148		let expr = parse_sub_expression(self)?;
149
150		// if the operation symbol is the next token, parse that following expression
151		// and package them as the desired expression.
152		// parsing: <expr> [<symbol> <expr>]*
153		if Some(op) == self.peek_type() {
154			let mut expressions = vec![Box::new(expr)];
155			while self.have_specific_type(op) {
156				expressions.push(Box::new(parse_sub_expression(self)?));
157			}
158			Ok(package(expressions))
159		} else {
160			Ok(expr)
161		}
162	}
163
164	/// Helper function for parsing binary expression where two operations have the same order of operations
165	fn parse_binary_expression_2<'e, F, G>(
166		&mut self,
167		op_a: TokenType,
168		op_b: TokenType,
169		parse_sub_expression: F,
170		package: G,
171	) -> ExpressionResult<'a>
172	where
173		F: Fn(&mut Self) -> ExpressionResult<'a>,
174		G: Fn(Expressions<'a>, Expressions<'a>) -> Expression<'a>,
175	{
176		// parse the current seen expression
177		let expr = parse_sub_expression(self)?;
178
179		// if the operation symbol is the next token, parse that following expression
180		// and package them as the desired expression.
181		// parsing: <expr> [<symbol> <expr>]*
182		if let Some(peeked_tt) = self.peek_type() {
183			if peeked_tt == op_a || peeked_tt == op_b {
184				let mut expressions_a = vec![Box::new(expr)];
185				let mut expressions_b = vec![];
186				// parse [<symbol> <expr>]*
187				loop {
188					match self.peek_type() {
189						Some(tt) => {
190							if tt == op_a {
191								self.advance();
192								expressions_a.push(Box::new(parse_sub_expression(self)?));
193							} else if tt == op_b {
194								self.advance();
195								expressions_b.push(Box::new(parse_sub_expression(self)?));
196							} else {
197								break;
198							}
199						}
200						_ => break,
201					}
202				}
203				Ok(package(expressions_a, expressions_b))
204			} else {
205				Ok(expr)
206			}
207		} else {
208			Ok(expr)
209		}
210	}
211
212	fn parser_named_argument(&mut self) -> Result<Box<(&'a str, Expression<'a>)>, ParserError> {
213		// the name of the argument
214		self.expect_type(TokenType::Identifier)?;
215		let name = self.have_content();
216
217		// =
218		self.expect_type(TokenType::Assign)?;
219		self.advance();
220
221		// the expression (will produce the value of the argument)
222		let expression = self.parse_expression()?;
223
224		Ok(Box::new((name, expression)))
225	}
226
227	fn parse_named_arguments(
228		&mut self,
229	) -> Result<Vec<Box<(&'a str, Expression<'a>)>>, ParserError> {
230		self.expect_type(TokenType::LBracket)?;
231		self.advance();
232
233		if self.have_specific_type(TokenType::RBracket) {
234			// empty arguments
235			Ok(Vec::new())
236		} else {
237			// allocate buffer and parse the first argument
238			let mut args: NamedExpressions = Vec::new();
239			args.push(self.parser_named_argument()?);
240
241			// parse the rest of the arguments: [,arg = value]*
242			loop {
243				if let Some(peeked_tt) = self.peek_type() {
244					match peeked_tt {
245						// ) - reached the end of the arguments
246						TokenType::RBracket => {
247							self.advance();
248							break;
249						}
250						// identifier - parse an argument
251						TokenType::Comma => {
252							self.advance();
253							args.push(self.parser_named_argument()?);
254						}
255						_ => return Err(self.unexpected_token_error()),
256					}
257				} else {
258					return Err(ParserError::ExpectedButReachedEnd(format!(
259						"identifier or ','"
260					)));
261				}
262			}
263
264			Ok(args)
265		}
266	}
267
268	fn parse_arguments(&mut self) -> Result<Vec<Box<Expression<'a>>>, ParserError> {
269		self.expect_type(TokenType::LBracket)?;
270		self.advance();
271
272		if self.have_specific_type(TokenType::RBracket) {
273			// empty arguments
274			Ok(Vec::new())
275		} else {
276			// allocate buffer and parse the first argument
277			let mut args: Expressions = Vec::new();
278			args.push(Box::new(self.parse_expression()?));
279
280			// parse the rest of the arguments (expressions)
281			loop {
282				if let Some(peeked_tt) = self.peek_type() {
283					match peeked_tt {
284						// ) - reached the end of the arguments
285						TokenType::RBracket => {
286							self.advance();
287							break;
288						}
289						// expression - parse an argument
290						TokenType::Comma => {
291							self.advance();
292							args.push(Box::new(self.parse_expression()?));
293						}
294						_ => return Err(self.unexpected_token_error()),
295					}
296				} else {
297					return Err(ParserError::ExpectedButReachedEnd(format!(
298						"expression or ','"
299					)));
300				}
301			}
302
303			Ok(args)
304		}
305	}
306
307	fn parse_builder(&mut self) -> ExpressionResult<'a> {
308		// at
309		self.expect_type(TokenType::At)?;
310		self.advance();
311
312		// builder name
313		self.expect_type(TokenType::FilePath)?;
314		let name = self.have_content();
315
316		// if see a `(`, parse arguments, else assume there are non
317		let args = if Some(TokenType::LBracket) == self.peek_type() {
318			self.parse_named_arguments()?
319		} else {
320			Vec::new()
321		};
322
323		Ok(Expression::Builder(name, args))
324	}
325
326	fn parse_function_call_or_variable(&mut self) -> ExpressionResult<'a> {
327		// name
328		self.expect_type(TokenType::Identifier)?;
329		let name = self.have_content();
330
331		// if the name is followed by an open bracket then this is a function call
332		if Some(TokenType::LBracket) == self.peek_type() {
333			// (arg0 = value0, arg1 = value1, ...)
334			let args = self.parse_arguments()?;
335			Ok(Expression::FunctionCall(name, args))
336		} else {
337			Ok(Expression::Variable(name))
338		}
339	}
340
341	fn parse_unary_expression(
342		&mut self,
343		seen_negation: bool,
344		seen_not: bool,
345	) -> ExpressionResult<'a> {
346		if let Some(peeked_tt) = self.peek_type() {
347			match peeked_tt {
348				// call to another file
349				TokenType::At => self.parse_builder(),
350				// function call or a variable
351				TokenType::Identifier => self.parse_function_call_or_variable(),
352				// negate and not
353				TokenType::Minus if !seen_negation => {
354					self.advance();
355					Ok(Expression::Negate(Box::new(
356						self.parse_unary_expression(true, seen_not)?,
357					)))
358				}
359				TokenType::Not if !seen_not => {
360					self.advance();
361					Ok(Expression::Not(Box::new(
362						self.parse_unary_expression(seen_negation, true)?,
363					)))
364				}
365				// expression inside brackets
366				TokenType::LBracket => {
367					self.advance();
368					let expression = self.parse_expression()?;
369					self.expect_type(TokenType::RBracket)?;
370					self.advance();
371					Ok(expression)
372				}
373				// list
374				TokenType::SquareLBracket => {
375					self.advance();
376					if Some(TokenType::SquareRBracket) == self.peek_type() {
377						// empty
378						self.advance();
379						Ok(Expression::ListValue(Vec::new()))
380					} else {
381						// first expression
382						let mut expressions = vec![Box::new(self.parse_expression()?)];
383
384						// rest of the expression
385						loop {
386							if Some(TokenType::SquareRBracket) == self.peek_type() {
387								self.advance();
388								break;
389							}
390							self.expect_type(TokenType::Comma)?;
391							self.advance();
392							expressions.push(Box::new(self.parse_expression()?));
393						}
394
395						Ok(Expression::ListValue(expressions))
396					}
397				}
398				// literals
399				TokenType::StringLiteral => Ok(Expression::Value(Value::Text(
400					self.have_content().to_string(),
401				))),
402				TokenType::NumericalLiteral => Ok(Expression::Value(Value::Number(
403					self.have_content().parse::<f32>().unwrap(),
404				))),
405				TokenType::NoneLiteral => {
406					self.advance();
407					Ok(Expression::Value(Value::None))
408				}
409				// if none of the above matched, then the token type is invalid
410				_ => Err(self.unexpected_token_error()),
411			}
412		} else {
413			Err(ParserError::ExpectedButReachedEnd(format!("expression")))
414		}
415	}
416
417	fn parse_index_of(&mut self) -> ExpressionResult<'a> {
418		let mut expression = self.parse_unary_expression(false, false)?;
419		while Some(TokenType::SquareLBracket) == self.peek_type() {
420			self.advance();
421			expression =
422				Expression::IndexOf(Box::new(expression), Box::new(self.parse_expression()?));
423			self.expect_type(TokenType::SquareRBracket)?;
424			self.advance();
425		}
426		Ok(expression)
427	}
428
429	fn parse_multiplicative_expression(&mut self) -> ExpressionResult<'a> {
430		self.parse_binary_expression_2(
431			TokenType::Star,
432			TokenType::Slash,
433			Self::parse_index_of,
434			|expressions_a, expressions_b| Expression::Multiplicative(expressions_a, expressions_b),
435		)
436	}
437
438	fn parse_additive_expression(&mut self) -> ExpressionResult<'a> {
439		self.parse_binary_expression_2(
440			TokenType::Plus,
441			TokenType::Minus,
442			Self::parse_multiplicative_expression,
443			|expressions_a, expressions_b| Expression::Additive(expressions_a, expressions_b),
444		)
445	}
446
447	fn parse_comparison_expression(&mut self) -> ExpressionResult<'a> {
448		// parse the current seen expression
449		let expr = self.parse_additive_expression()?;
450
451		if let Some(tt) = self.peek_type() {
452			if tt.is_comparison_token() {
453				self.advance();
454				let rhs = self.parse_additive_expression()?;
455				return Ok(Expression::Comparison(Box::new(expr), tt, Box::new(rhs)));
456			}
457		}
458
459		Ok(expr)
460	}
461
462	fn parse_or_expression(&mut self) -> ExpressionResult<'a> {
463		self.parse_binary_expression(
464			TokenType::Or,
465			Self::parse_comparison_expression,
466			|expressions| Expression::Or(expressions),
467		)
468	}
469
470	fn parse_and_expression(&mut self) -> ExpressionResult<'a> {
471		self.parse_binary_expression(TokenType::And, Self::parse_or_expression, |expressions| {
472			Expression::And(expressions)
473		})
474	}
475
476	fn parse_ternary(&mut self) -> ExpressionResult<'a> {
477		let expression = self.parse_and_expression()?;
478
479		// if the expression is followed by a question mark, then it is a ternary
480		if let Some(TokenType::QuestionMark) = self.peek_type() {
481			// consume the question mark
482			self.advance();
483
484			// parse the first expression
485			let a = self.parse_expression()?;
486
487			// parse `:`
488			self.expect_type(TokenType::Colon)?;
489			self.advance();
490
491			// prase the second expression and return
492			let b = self.parse_expression()?;
493			Ok(Expression::Ternary(
494				Box::new(expression),
495				Box::new(a),
496				Box::new(b),
497			))
498		} else {
499			Ok(expression)
500		}
501	}
502
503	fn parse_expression(&mut self) -> ExpressionResult<'a> {
504		self.parse_ternary()
505	}
506
507	fn see_assignment(&self) -> bool {
508		if let (Some(TokenType::Identifier), Some(TokenType::Assign)) = (
509			self.tokens.get(self.i).map(|t| t.tt),
510			self.tokens.get(self.i + 1).map(|t| t.tt),
511		) {
512			true
513		} else {
514			false
515		}
516	}
517
518	fn parse_assignment(&mut self) -> (&'a str, ExpressionResult<'a>) {
519		let name = self.have_content();
520		if !self.have_specific_type(TokenType::Assign) {
521			todo!("error - expected =")
522		}
523		(name, self.parse_expression())
524	}
525
526	fn parse_directive(&mut self) -> NodeResult<'a> {
527		self.expect_type(TokenType::Open)?;
528		self.advance();
529
530		// check if the directive is a function
531		let location = self.i;
532		match self.peek_type_expect()? {
533			TokenType::Pound => {
534				self.advance();
535
536				let location = self.i;
537
538				match self.peek_type_expect()? {
539					TokenType::If => {
540						// consume the `if` token
541						self.advance();
542
543						// parse the expression
544						let expression = self.parse_expression()?;
545
546						// expect and consume the `Close` token
547						self.expect_type(TokenType::Close)?;
548						self.advance();
549						Ok(Node::if_node(location, expression))
550					}
551					TokenType::Elif => {
552						// consume the `elif` token
553						self.advance();
554
555						// parse the expression
556						let expression = self.parse_expression()?;
557
558						// expect and consume the `Close` token
559						self.expect_type(TokenType::Close)?;
560						self.advance();
561
562						Ok(Node::elif(location, expression))
563					}
564					TokenType::Else => {
565						// consume the `Else` token
566						self.advance();
567
568						// expect and consume the `Close` token
569						self.expect_type(TokenType::Close)?;
570						self.advance();
571
572						Ok(Node::else_node(location))
573					}
574					TokenType::For => {
575						// consume the token
576						self.advance();
577
578						// name of the variable that will hold the list's values
579						self.expect_type(TokenType::Identifier)?;
580						let name = self.have_content();
581
582						// the 'in' symbol
583						self.expect_type(TokenType::Colon)?;
584						self.advance();
585
586						// the list
587						let l = self.parse_expression()?;
588
589						// expect and consume the `Close` token
590						self.expect_type(TokenType::Close)?;
591						self.advance();
592
593						// construct and return
594						Ok(Node::for_node(location, name, l))
595					}
596					TokenType::Close => {
597						// consume the token and return
598						self.advance();
599						Ok(Node::end(location))
600					}
601					_ => Err(self.unexpected_token_error()),
602				}
603			}
604			_ => {
605				// parse either an assignment or an expression
606				let node = if self.see_assignment() {
607					let (name, expression) = self.parse_assignment();
608					Node::assignment(location, name, expression?)
609				} else {
610					Node::expression(location, self.parse_expression()?)
611				};
612
613				// consume the `Close` token and return
614				self.expect_type(TokenType::Close)?;
615				self.advance();
616				Ok(node)
617			}
618		}
619	}
620
621	pub fn parse(mut self) -> NodesResult<'a> {
622		let mut nodes = Vec::new();
623		while let Some(tt) = self.peek_type() {
624			match tt {
625				TokenType::Source => {
626					nodes.push(Box::new(Node::source_from_token(
627						self.have_token().unwrap(),
628					)));
629				}
630				TokenType::Open => nodes.push(Box::new(self.parse_directive()?)),
631				_ => return Err(self.unexpected_token_error()),
632			}
633		}
634		Ok(nodes)
635	}
636}
637
638#[cfg(test)]
639mod tests {
640	use crate::{
641		parser::Parser,
642		tokenizer::{Tokenizer, TokenizerOptions},
643	};
644
645	#[test]
646	fn test_01() {
647		let source = r"{{#}}";
648		let tokens = Tokenizer::new(source, TokenizerOptions::default())
649			.tokenize()
650			.unwrap();
651		let tokens = tokens.as_slice();
652		let nodes = Parser::new(tokens).parse().unwrap();
653		println!("{:?}", nodes);
654	}
655
656	#[test]
657	fn test_02() {
658		let source = r"{{#if a + b - c > a * b / c || d / (e + f) < d * (e - f)}} {{#}}";
659		let tokens = Tokenizer::new(source, TokenizerOptions::default())
660			.tokenize()
661			.unwrap();
662		let tokens = tokens.as_slice();
663		let nodes = Parser::new(tokens).parse().unwrap();
664		println!("{:?}", nodes);
665	}
666
667	#[test]
668	fn test_03() {
669		let source =
670			r"{{@ file0()}} {{@ file1(arg0 = value0)}} {{@ file2(arg0 = value0, arg1 = value1)}}";
671		let tokens = Tokenizer::new(source, TokenizerOptions::default())
672			.tokenize()
673			.unwrap();
674		let tokens = tokens.as_slice();
675		let nodes = Parser::new(tokens).parse().unwrap();
676		println!("{:?}", nodes);
677	}
678
679	#[test]
680	fn test_04() {
681		let source = "{{a > b || c ? a : (a < b ? b : 0)}}";
682		let tokens = Tokenizer::new(source, TokenizerOptions::default())
683			.tokenize()
684			.unwrap();
685		let tokens = tokens.as_slice();
686		let nodes = Parser::new(tokens).parse().unwrap();
687		println!("{:?}", nodes);
688	}
689
690	#[test]
691	fn test_05() {
692		let source = "{{-a > -b || !c || !d}}";
693		let tokens = Tokenizer::new(source, TokenizerOptions::default())
694			.tokenize()
695			.unwrap();
696		let tokens = tokens.as_slice();
697		let nodes = Parser::new(tokens).parse().unwrap();
698		println!("{:?}", nodes);
699	}
700
701	#[test]
702	fn test_06() {
703		let source = "{{# if a > b}} a > b {{# elif a < b}} a < b {{# else}} a = b {{#}}";
704		let tokens = Tokenizer::new(source, TokenizerOptions::default())
705			.tokenize()
706			.unwrap();
707		let tokens = tokens.as_slice();
708		let nodes = Parser::new(tokens).parse().unwrap();
709		println!("{:?}", nodes);
710	}
711
712	#[test]
713	fn test_07() {
714		let source = "{{ a > b || b > c ? @ option_a(a = a, b = b, c = c) : @ option_b()}}";
715		let tokens = Tokenizer::new(source, TokenizerOptions::default())
716			.tokenize()
717			.unwrap();
718		let tokens = tokens.as_slice();
719		let nodes = Parser::new(tokens).parse().unwrap();
720		println!("{:?}", nodes);
721	}
722
723	#[test]
724	fn test_08() {
725		let source =
726			"{{ (a > b ? [[0, 1], [1, 2], [2, 3]] : [['a', 'b'], ['c', 'd'], ['e', 'f']])[1][0] }}";
727		let tokens = Tokenizer::new(source, TokenizerOptions::default())
728			.tokenize()
729			.unwrap();
730		let tokens = tokens.as_slice();
731		let nodes = Parser::new(tokens).parse().unwrap();
732		println!("{:?}", nodes);
733	}
734}