parser/lib.rs
1//! # Parser Module
2//!
3//! The parser module contains the implementation of the SAP language parser. The parser
4//! is responsible for taking a sequence of tokens and converting them into an abstract
5//! syntax tree (AST). The AST is then used by the interpreter to execute the program.
6//!
7//! The parser is implemented as a recursive descent parser, which is a top-down parser
8//! that starts from the root of the syntax tree and works its way down to the leaves.
9//!
10//! It is also responsible for reporting syntax errors in the input program. When
11//! a syntax error is encountered, the parser returns an error containing a message
12//! describing the error.
13use std::num::IntErrorKind;
14
15use ast::expression::{
16 Array, Binary, Expression, FunctionCall, Identifier, Index, Selection, Unary,
17};
18use ast::literal::Literal;
19use ast::statement::{
20 Display, FunctionDeclaration, RepeatForever, RepeatUntil, Return, Set, Statement,
21};
22use ast::{Program, StatementList};
23use lexer::token::{Token, TokenKind};
24use lexer::Lexer;
25use shared::error::{Error, ErrorKind};
26use shared::span::{GetSpan, Span};
27
28// Attempt to obtain the current version of the parser module.
29pub const VERSION: Option<&str> = std::option_env!("CARGO_PKG_VERSION");
30
31#[cfg(test)]
32mod test;
33
34/// The `Parser` struct is responsible for parsing a sequence of tokens into an abstract
35/// syntax tree (AST). The parser is implemented as a recursive descent parser, which is
36/// a top-down parser that starts from the root of the syntax tree and works its way down
37/// to the leaves.
38///
39/// The `Parser` struct contains a reference to a `Lexer` instance, which is used to
40/// obtain the tokens that make up the input program. The parser is also responsible for
41/// reporting syntax errors in the input program. When a syntax error is encountered, the
42/// parser returns an error containing a message describing the error.
43pub struct Parser<'lexer> {
44 lexer: Lexer<'lexer>,
45 cur_token: Token,
46}
47
48/// This macro is an expansion for creating a new `Error` instance with a syntax error
49/// kind. It's intended for use within the parser to simplify the creation of syntax
50/// errors.
51///
52/// Arguments are identical to the `format!` macro.
53macro_rules! parse_err {
54 ($($arg:tt)*) => {{
55 Err(shared::error::Error::new(&format!($($arg)*), ErrorKind::SyntaxError))
56 }}
57}
58
59/// This macro is an expansion for parsing binary expressions. It's intended for use
60/// within the parser to simplify the repetitive pattern of parsing binary operations.
61///
62/// # Arguments
63///
64/// * `self` - The parser instance, typically the `self` keyword in a method.
65/// * `downstream_parse_fn` - A function that parses the individual expressions on both
66/// sides of the binary operation. This function is called repeatedly to handle the left
67/// and right-hand sides of the binary operation.
68/// * `pattern` - A pattern matching the current token to the relevant operator(s).
69macro_rules! parse_binary_expr {
70 ($self:ident, $downstream_parse_fn:ident, $pattern:pat) => {
71 let start = $self.cur_token.span.start;
72 // Parse the left hand side
73 let mut node = $self.$downstream_parse_fn()?;
74
75 // While the current token matches the expected operator, repeatedly parse the right hand
76 // side and construct a new binary expression.
77 while matches!(&$self.cur_token.kind, $pattern) {
78 let operator = $self.cur_token.kind.clone();
79 // Move onto next token
80 $self.next_token();
81 // Parse the right hand side
82 let right = $self.$downstream_parse_fn()?;
83 let span = Span::new(start, $self.cur_token.span.end);
84 // Construct a new binary expression, placing the old `node` inside the new one.
85 node = Expression::Binary(Binary {
86 operator,
87 left: Box::new(node),
88 right: Box::new(right),
89 span,
90 });
91 }
92
93 let result: Result<Expression, Error> = Ok(node);
94 return result;
95 };
96}
97
98impl<'lexer> Parser<'lexer> {
99 pub fn new(mut lexer: Lexer<'lexer>) -> Self {
100 // Load the first token from the lexer to be used as the current token.
101 let cur = lexer.next_token();
102 Parser {
103 lexer,
104 cur_token: cur,
105 }
106 }
107
108 fn next_token(&mut self) {
109 self.cur_token = self.lexer.next_token();
110 }
111
112 /// Returns true if the current token is equal to the expected token.
113 #[inline]
114 fn cur_token_is(&self, token_kind: &TokenKind) -> bool {
115 self.cur_token.kind == *token_kind
116 }
117
118 /// Consumes the current token if it matches the expected token, otherwise returns
119 /// a syntax error.
120 ///
121 /// # Arguments
122 ///
123 /// * `expected_kind` - The expected token kind.
124 ///
125 /// # Returns
126 ///
127 /// An `Ok` containing `()` if the current token matches the expected token, otherwise
128 /// an `Err` containing a syntax error.
129 fn eat(&mut self, expected_kind: &TokenKind) -> Result<(), Error> {
130 if self.cur_token_is(expected_kind) {
131 self.next_token();
132 Ok(())
133 } else {
134 // This is essentially the default syntax error, any error which doesn't have a specific
135 // handler be caught by this.
136 parse_err!("expected {}, got {}", expected_kind, self.cur_token.kind)
137 }
138 }
139
140 /// The main entry point for the parser. This method is responsible for parsing and
141 /// lexing the entire input program, and returning the resulting AST or a syntax
142 /// error.
143 ///
144 /// # Returns
145 ///
146 /// An `Ok` containing the AST (starting with the root `Program` node) if the input
147 /// program is successfully parsed, otherwise an `Err` containing a syntax error.
148 pub fn parse_program(&mut self) -> Result<Program, Error> {
149 let mut program = Program::new();
150
151 if self.cur_token_is(&TokenKind::Eof) {
152 return Ok(program);
153 } else {
154 program.statements = self.parse_statement_list()?.statements;
155 program.span.end = self.cur_token.span.end;
156 self.eat(&TokenKind::Eof)?;
157 return Ok(program);
158 }
159 }
160
161 /// Parses a list of statements without consuming the termination token
162 /// (`End` | `Eof` | `Otherwise`)
163 ///
164 /// Note: Does not handle empty statements lists, the caller should handle this case.
165 fn parse_statement_list(&mut self) -> Result<StatementList, Error> {
166 let mut list = StatementList::new();
167
168 // Optionally consume leading NewLines
169 while matches!(self.cur_token.kind, TokenKind::NewLine) {
170 self.next_token();
171 }
172
173 loop {
174 // Parse a statement and add it to the list
175 list.statements.push(self.parse_statement()?);
176
177 // If the next token is 'End' or 'Eof' or 'Otherwise', this represents the end of the
178 // statement list, so we break out of the loop.
179 if matches!(
180 self.cur_token.kind,
181 TokenKind::End | TokenKind::Eof | TokenKind::Otherwise
182 ) {
183 break; // No separator needed before 'End' or 'Eof'
184 }
185
186 // Each statement must be separated by a `NewLine` or `Semicolon`, which is enforced
187 // here. The exception is when the next token is 'End' or 'Eof' or 'Otherwise', which is
188 // handled by the previous if statement.
189 if !matches!(
190 self.cur_token.kind,
191 TokenKind::Semicolon | TokenKind::NewLine
192 ) {
193 return parse_err!("expected ';' or newline between statements");
194 }
195 // Multiple separators are allowed, so we consume all of them.
196 while matches!(
197 self.cur_token.kind,
198 TokenKind::Semicolon | TokenKind::NewLine
199 ) {
200 self.next_token();
201 }
202
203 // If we encounter 'End' or 'Eof' after consuming separators, break out of the loop.
204 if matches!(
205 self.cur_token.kind,
206 TokenKind::End | TokenKind::Eof | TokenKind::Otherwise
207 ) {
208 break;
209 }
210 }
211
212 Ok(list)
213 }
214
215 /// Parses a single statement.
216 ///
217 /// The EBNF for a statement is:
218 /// ```ebnf
219 /// statement = set_stmt
220 /// | return_stmt
221 /// | expression
222 /// | fn_decl_stmt
223 /// | repeat_stmt
224 /// | display_stmt;
225 /// ```
226 fn parse_statement(&mut self) -> Result<Statement, Error> {
227 match self.cur_token.kind {
228 TokenKind::Set => self.parse_set_stmt(),
229 TokenKind::Return => self.parse_return_stmt(),
230 TokenKind::DefineFunction => self.parse_fn_decl_stmt(),
231 TokenKind::Repeat => self.parse_repeat_stmt(),
232 TokenKind::Display => self.parse_display_stmt(),
233 _ => self.parse_expr_stmt(),
234 }
235 }
236
237 /// Parses a `Set` (i.e. `set x = 4`) statement.
238 ///
239 /// The EBNF for a `Set` statement is:
240 /// ```ebnf
241 /// set_stmt = "set" , ident , "=" , expression;
242 /// ```
243 fn parse_set_stmt(&mut self) -> Result<Statement, Error> {
244 let start = self.cur_token.span.start;
245 self.eat(&TokenKind::Set)?;
246 let ident = self.parse_identifier()?;
247 self.eat(&TokenKind::Assign)?;
248 let expr = self.parse_expression()?;
249 let span = Span::new(start, self.cur_token.span.end);
250
251 return Ok(Statement::Set(Set { ident, expr, span }));
252 }
253
254 /// Parses a `Return` statement.
255 ///
256 /// The EBNF for a `Return` statement is:
257 /// ```ebnf
258 /// return_stmt = "return" , expression;
259 /// ```
260 fn parse_return_stmt(&mut self) -> Result<Statement, Error> {
261 let start = self.cur_token.span.start;
262 self.eat(&TokenKind::Return)?;
263 let return_value = self.parse_expression()?;
264 let span = Span::new(start, self.cur_token.span.end);
265 return Ok(Statement::Return(Return {
266 value: return_value,
267 span,
268 }));
269 }
270
271 /// Parses a function declaration statement.
272 ///
273 /// The EBNF for a function declaration statement is:
274 /// ```ebnf
275 /// fn_decl_stmt = "defineFunction" , "(" , [params] , ")" , [statements]
276 /// , "end";
277 /// params = ident , {"," , ident};
278 /// ```
279 fn parse_fn_decl_stmt(&mut self) -> Result<Statement, Error> {
280 let start = self.cur_token.span.start;
281
282 self.eat(&TokenKind::DefineFunction)?;
283
284 // Parse the function name
285 let name = self.parse_identifier()?;
286
287 // Parse the function parameters.
288 self.eat(&TokenKind::LParen)?;
289 let parameters = self.parse_fn_decl_parameters()?;
290 self.eat(&TokenKind::RParen)?;
291
292 // Parse the function body.
293 // If the function body is empty, return an empty statement list. Otherwise, parse the
294 // function body.
295 let body = if self.cur_token_is(&TokenKind::End) {
296 self.create_empty_statement_list()
297 } else {
298 self.parse_statement_list()?
299 };
300
301 let end = self.cur_token.span.end;
302 self.eat(&TokenKind::End)?;
303 let span = Span::new(start, end);
304
305 return Ok(Statement::FunctionDeclaration(FunctionDeclaration {
306 name,
307 parameters,
308 body,
309 span,
310 }));
311 }
312
313 /// Parses the parameters of a function declaration.
314 ///
315 /// The EBNF grammar for this function is:
316 /// ```ebnf
317 /// params = [ident , {"," , ident}]
318 /// ```
319 fn parse_fn_decl_parameters(&mut self) -> Result<Vec<Identifier>, Error> {
320 if self.cur_token_is(&TokenKind::RParen) {
321 // Return an empty array if there are no parameters.
322 return Ok(vec![]);
323 } else {
324 let mut parameters: Vec<Identifier> = Vec::new();
325 // Parse the first parameter
326 let param = self.parse_fn_decl_parameter()?;
327 parameters.push(param);
328
329 // Parse all subsequent parameters seperated by commas.
330 while self.cur_token_is(&TokenKind::Comma) {
331 self.eat(&TokenKind::Comma)?;
332 let param = self.parse_fn_decl_parameter()?;
333 parameters.push(param);
334 }
335 return Ok(parameters);
336 }
337 }
338
339 /// This function parses a single function parameter, the same as parsing an
340 /// identifier, except with a specialised error message.
341 fn parse_fn_decl_parameter(&mut self) -> Result<Identifier, Error> {
342 let prev_token_kind = self.cur_token.kind.clone();
343 self.parse_identifier().map_err(|mut err| {
344 err.message = format!("expected function parameter, got '{}'", prev_token_kind);
345 return err;
346 })
347 }
348
349 /// Parses a `Display` statement.
350 ///
351 /// The EBNF for a `Display` statement is:
352 /// ```ebnf
353 /// display_stmt = "display" , expression , {"," , expression};
354 /// ```
355 fn parse_display_stmt(&mut self) -> Result<Statement, Error> {
356 let start = self.cur_token.span.start;
357 self.eat(&TokenKind::Display)?;
358 let expressions = match self.cur_token.kind {
359 TokenKind::NewLine | TokenKind::Semicolon => {
360 return parse_err!("empty display statement");
361 }
362 _ => self.parse_expr_list()?,
363 };
364 let span = Span::new(start, self.cur_token.span.end);
365 return Ok(Statement::Display(Display { expressions, span }));
366 }
367
368 /// Parses a `Repeat` statement.
369 ///
370 /// The EBNF for a `Repeat` statement is:
371 /// ```ebnf
372 /// repeat_stmt = "repeat" , (repeat_n_times | repeat_until | repeat_forever);
373 /// ```
374 fn parse_repeat_stmt(&mut self) -> Result<Statement, Error> {
375 let start = self.cur_token.span.start;
376
377 self.eat(&TokenKind::Repeat)?;
378
379 let statement = match &self.cur_token.kind {
380 TokenKind::Until => self.parse_repeat_until(start)?,
381 TokenKind::Forever => self.parse_repeat_forever(start)?,
382 _ => self.parse_repeat_n_times(start)?,
383 };
384
385 return Ok(statement);
386 }
387
388 /// Parses a `RepeatUntil` statement.
389 ///
390 /// The EBNF for a `RepeatUntil` statement is:
391 /// ```ebnf
392 /// repeat_until = "until" , expression , [statements] , "end";
393 /// ```
394 fn parse_repeat_until(&mut self, start: usize) -> Result<Statement, Error> {
395 self.eat(&TokenKind::Until)?;
396
397 let expression = self.parse_expression()?;
398
399 let statements = match self.cur_token.kind {
400 TokenKind::End => self.create_empty_statement_list(),
401 _ => self.parse_statement_list()?,
402 };
403
404 let span = Span::new(start, self.cur_token.span.end);
405
406 self.eat(&TokenKind::End)?;
407
408 return Ok(Statement::RepeatUntil(RepeatUntil {
409 condition: expression,
410 body: statements,
411 span,
412 }));
413 }
414
415 /// Parses a `RepeatForever` statement.
416 ///
417 /// The EBNF for a `RepeatForever` statement is:
418 /// ```ebnf
419 /// repeat_forever = "forever" , [statements] , "end";
420 /// ```
421 fn parse_repeat_forever(&mut self, start: usize) -> Result<Statement, Error> {
422 self.eat(&TokenKind::Forever)?;
423
424 let statements = match self.cur_token.kind {
425 TokenKind::End => self.create_empty_statement_list(),
426 _ => self.parse_statement_list()?,
427 };
428
429 let span = Span::new(start, self.cur_token.span.end);
430
431 self.eat(&TokenKind::End)?;
432
433 return Ok(Statement::RepeatForever(RepeatForever {
434 body: statements,
435 span,
436 }));
437 }
438
439 /// Parses a `RepeatNTimes` statement.
440 ///
441 /// The EBNF for a `RepeatNTimes` statement is:
442 /// ```ebnf
443 /// repeat_n_times = expression , "times" , [statements] , "end";
444 /// ```
445 fn parse_repeat_n_times(&mut self, start: usize) -> Result<Statement, Error> {
446 let n = self.parse_expression()?;
447
448 self.eat(&TokenKind::Times)?;
449
450 let statements = match self.cur_token.kind {
451 TokenKind::End => self.create_empty_statement_list(),
452 _ => self.parse_statement_list()?,
453 };
454
455 let span = Span::new(start, self.cur_token.span.end);
456
457 self.eat(&TokenKind::End)?;
458
459 return Ok(Statement::RepeatNTimes(ast::statement::RepeatNTimes {
460 n,
461 body: statements,
462 span,
463 }));
464 }
465
466 /// Parses an `Expression` statement.
467 fn parse_expr_stmt(&mut self) -> Result<Statement, Error> {
468 let expr = self.parse_expression()?;
469 return Ok(Statement::Expression(expr));
470 }
471
472 /// Parses an expression (e.g. `1+1`).
473 ///
474 /// The EBNF for an expression is:
475 /// ```ebnf
476 /// expression = or_expr;
477 /// ```
478 fn parse_expression(&mut self) -> Result<Expression, Error> {
479 self.parse_or_expr()
480 }
481
482 /// Parses an or expression (e.g. `true or false`).
483 ///
484 /// The EBNF for an or expression is:
485 /// ```ebnf
486 /// or_expr = and_expr {"or" , and_expr};
487 /// ```
488 fn parse_or_expr(&mut self) -> Result<Expression, Error> {
489 // <or_expr> -> <and_expr> (`Or` <and_expr>)*
490 parse_binary_expr!(self, parse_and_expr, TokenKind::Or);
491 }
492
493 /// Parses an and expression (e.g. `x > 1 and x < 10`)
494 ///
495 /// The EBNF for an and expression is:
496 /// ```ebnf
497 /// and_expr = eq_expr {"and" , eq_expr};
498 /// ```
499 fn parse_and_expr(&mut self) -> Result<Expression, Error> {
500 parse_binary_expr!(self, parse_eq_expr, TokenKind::And);
501 }
502
503 /// Parses an equality expression (e.g. `a == b != c`)
504 ///
505 /// The EBNF for an equality expression is:
506 /// ```ebnf
507 /// eq_expr = comp_expr {("==" | "!=") , comp_expr};
508 /// ```
509 fn parse_eq_expr(&mut self) -> Result<Expression, Error> {
510 parse_binary_expr!(self, parse_comp_expr, TokenKind::Eq | TokenKind::NotEq);
511 }
512
513 /// Parses a comparison expression (e.g. `x > 4`)
514 ///
515 /// The EBNF for a comparison expression is:
516 /// ```ebnf
517 /// comp_expr = sum_expr {("<" | "<=" | ">" | ">=") , sum_expr};
518 /// ```
519 fn parse_comp_expr(&mut self) -> Result<Expression, Error> {
520 parse_binary_expr!(
521 self,
522 parse_sum_expr,
523 TokenKind::Lt | TokenKind::LtEq | TokenKind::Gt | TokenKind::GtEq
524 );
525 }
526
527 /// Parses a sum expression (e.g. `1+2-3`)
528 ///
529 /// The EBNF for a sum expression is:
530 /// ```ebnf
531 /// sum_expr = product_expr {("+" | "-") , product_expr}
532 /// ```
533 fn parse_sum_expr(&mut self) -> Result<Expression, Error> {
534 parse_binary_expr!(self, parse_product_expr, TokenKind::Plus | TokenKind::Minus);
535 }
536
537 /// Parses a product expression (e.g. `1 * 2 / 3 % 4`)
538 ///
539 /// The EBNF for a product expression is:
540 /// ```ebnf
541 /// product_expr = postfix_expr {("*" | "/" | "%") , postfix_expr};
542 /// ```
543 fn parse_product_expr(&mut self) -> Result<Expression, Error> {
544 // <product_expr> -> <postfix_expr> ((`Mult` | `Div` | `Mod`) <postfix_expr>)*
545 parse_binary_expr!(
546 self,
547 parse_postfix_expr,
548 TokenKind::Mult | TokenKind::Div | TokenKind::Mod
549 );
550 }
551
552 /// Parses a post-fix expression (e.g. `x[10]` or `print(x)`)
553 ///
554 /// The EBNF for a post-fix expression is:
555 /// ```ebnf
556 /// postfix_expr = prefix_expr {fn_call | array_index};
557 /// array_index = "[" , expression , "]"
558 /// fn_call = "(" , [args] , ")";
559 /// args = expression , {"," , expression};
560 /// ```
561 fn parse_postfix_expr(&mut self) -> Result<Expression, Error> {
562 let start = self.cur_token.span.start;
563 let mut node = self.parse_prefix_expr()?;
564
565 loop {
566 match &self.cur_token.kind {
567 // Parse array index
568 TokenKind::LBracket => {
569 self.eat(&TokenKind::LBracket)?;
570 let index_expr = self.parse_expression()?;
571 self.eat(&TokenKind::RBracket)?;
572 let span = Span::new(start, self.cur_token.span.end);
573 node = Expression::Index(Index {
574 object: Box::new(node),
575 index: Box::new(index_expr),
576 span,
577 })
578 }
579 // Parse function call
580 TokenKind::LParen => {
581 self.eat(&TokenKind::LParen)?;
582 let arguments = self
583 .parse_expr_list_maybe_empty(&TokenKind::RParen)
584 .map_err(|mut err| {
585 err.message =
586 format!("expected function parameter, got {}", self.cur_token.kind);
587 err
588 })?;
589 self.eat(&TokenKind::RParen)?;
590
591 let span = Span::new(start, self.cur_token.span.end);
592 node = Expression::FunctionCall(FunctionCall {
593 callee: Box::new(node),
594 arguments,
595 span,
596 })
597 }
598 _ => break,
599 };
600 }
601
602 return Ok(node);
603 }
604
605 /// Parse a prefix expression (e.g. `not true` or `---1`)
606 ///
607 /// The EBNF for a prefix expression is:
608 /// ```ebnf
609 /// prefix_expr = {"not" | "-"} , ident_expr;
610 /// ```
611 fn parse_prefix_expr(&mut self) -> Result<Expression, Error> {
612 // Add each prefix operation into an array. For example, if the expression was `not -1`,
613 // the array would be `[TokenKind::Not, TokenKind::Minus]`.
614 let mut prefix_operations = Vec::new();
615 while matches!(&self.cur_token.kind, TokenKind::Not | TokenKind::Minus) {
616 prefix_operations.push(self.cur_token.clone());
617 self.next_token();
618 }
619
620 // Parse the expression.
621 let mut node: Expression = self.parse_ident_expr()?;
622
623 // Now, add each operation to the syntax tree IN REVERSE, since the tree starts from the
624 // root node (the expression), and then the prefix operation closest to the expression is
625 // added to the tree first.
626 for operation in prefix_operations.into_iter().rev() {
627 let operator = operation.kind.clone();
628 let span = Span::new(operation.span.start, node.span().end);
629 node = Expression::Unary(Unary {
630 operator,
631 operand: Box::new(node),
632 span,
633 })
634 }
635
636 return Ok(node);
637 }
638
639 /// Parse an identifier expression (e.g. `my_var`).
640 ///
641 /// The EBNF for an identfier expression is:
642 /// ```ebnf
643 /// ident_expr = ident | group_expr;
644 /// ```
645 fn parse_ident_expr(&mut self) -> Result<Expression, Error> {
646 match &self.cur_token.kind.clone() {
647 TokenKind::Identifier { .. } => Ok(Expression::Identifier(self.parse_identifier()?)),
648 _ => self.parse_group_expr(),
649 }
650 }
651
652 /// Parse a group expression (e.g. `(1+1)*2`).
653 ///
654 /// The EBNF for a group expression is:
655 /// ```ebnf
656 /// group_expr = ("(" , expression , ")") | entity_expr;
657 /// ```
658 fn parse_group_expr(&mut self) -> Result<Expression, Error> {
659 if self.cur_token_is(&TokenKind::LParen) {
660 self.eat(&TokenKind::LParen)?;
661 let expr = self.parse_expression()?;
662 self.eat(&TokenKind::RParen)?;
663 return Ok(expr);
664 } else {
665 return self.parse_entity_expr();
666 }
667 }
668
669 /// Parse an entity expression. An 'entity' is a single building block of an
670 /// expression, it cannot be sliced up into smaller expressions.
671 ///
672 /// The EBNF for an entity expression is:
673 /// ```ebnf
674 /// entity_expr = selection_expr | array_expr | literal;
675 /// ```
676 fn parse_entity_expr(&mut self) -> Result<Expression, Error> {
677 match &self.cur_token.kind {
678 TokenKind::If => return self.parse_selection_expr(),
679 TokenKind::LBracket => return self.parse_array_expr(),
680 _ => {
681 let literal = self.parse_literal()?;
682 return Ok(Expression::Literal(literal));
683 }
684 }
685 }
686
687 /// Parse an array expression (e.g. `[1, 2, x, 16/4]`).
688 ///
689 /// The EBNF for an array expression is:
690 /// ```ebnf
691 /// array_expr = "[" , [elements] , "]";
692 /// elements = expression , {"," , expression};
693 /// ```
694 fn parse_array_expr(&mut self) -> Result<Expression, Error> {
695 let start = self.cur_token.span.start;
696 self.eat(&TokenKind::LBracket)?;
697 let elements = self.parse_expr_list_maybe_empty(&TokenKind::RBracket)?;
698 self.eat(&TokenKind::RBracket)?;
699 let span = Span::new(start, self.cur_token.span.end);
700
701 return Ok(Expression::Array(Array { elements, span }));
702 }
703
704 /// Parse a selection expression (e.g. `if x > 4 then return x end`)
705 ///
706 /// The EBNF for an array expression is:
707 /// ```ebnf
708 /// selection_expr = "if" , expression , "then" , [statements]
709 /// , ["otherwise" [statements]] "end";
710 /// ```
711 fn parse_selection_expr(&mut self) -> Result<Expression, Error> {
712 let start = self.cur_token.span.start;
713
714 self.eat(&TokenKind::If)?;
715 let condition_expr = self.parse_expression()?;
716 self.eat(&TokenKind::Then)?;
717 let conditional_statements = self.parse_selection_if_body()?;
718 let else_conditional_block = self.parse_selection_else_body()?;
719
720 let end = self.cur_token.span.end;
721 self.eat(&TokenKind::End)?;
722 let span = Span::new(start, end);
723
724 return Ok(Expression::Selection(Selection {
725 condition: Box::new(condition_expr),
726 conditional: conditional_statements,
727 else_conditional: else_conditional_block,
728 span,
729 }));
730 }
731
732 /// Parse the statements within the 'if' branch of a selection statement.
733 fn parse_selection_if_body(&mut self) -> Result<StatementList, Error> {
734 match matches!(self.cur_token.kind, TokenKind::End | TokenKind::Otherwise) {
735 false => self.parse_statement_list(),
736 true => Ok(self.create_empty_statement_list()),
737 }
738 }
739
740 /// Parse the statements within the 'else' branch of a selection statement (if it
741 /// exists).
742 fn parse_selection_else_body(&mut self) -> Result<Option<StatementList>, Error> {
743 if self.cur_token_is(&TokenKind::Otherwise) {
744 self.eat(&TokenKind::Otherwise)?;
745 let else_conditional_statements = match matches!(self.cur_token.kind, TokenKind::End) {
746 false => self.parse_statement_list()?,
747 true => self.create_empty_statement_list(),
748 };
749 Ok(Some(else_conditional_statements))
750 } else {
751 Ok(None)
752 }
753 }
754
755 /// Parse a literal. A literal is a hard-coded `string`, `int`, `float` or `bool`.
756 ///
757 /// The EBNF grammar for a literal is:
758 /// ```ebnf
759 /// literal = int | float | bool | string;
760 /// bool = "true" | "false";
761 /// float = int , "." , int;
762 /// int = digit , {digit};
763 /// digit = "0" | "1" | ... | "9";
764 /// string = '"', {char} , '"';
765 /// char = ? any character ? -'"';
766 /// ```
767 fn parse_literal(&mut self) -> Result<Literal, Error> {
768 let span = self.cur_token.span.clone();
769 match &self.cur_token.kind.clone() {
770 TokenKind::Int(n) => {
771 return self.parse_integer_literal(n);
772 }
773 TokenKind::Float(n) => {
774 return self.parse_float_literal(n);
775 }
776 TokenKind::True => {
777 self.next_token();
778 return Ok(Literal::Boolean { value: true, span });
779 }
780 TokenKind::False => {
781 self.next_token();
782 return Ok(Literal::Boolean { value: false, span });
783 }
784 TokenKind::String(string) => {
785 self.next_token();
786 return Ok(Literal::String {
787 value: string.to_owned(),
788 span,
789 });
790 }
791 _ => parse_err!("expected Literal, got {}", self.cur_token.kind),
792 }
793 }
794
795 /// Converts a string into an integer literal.
796 fn parse_integer_literal(&mut self, integer_to_parse: &String) -> Result<Literal, Error> {
797 let span = self.cur_token.span.clone();
798 self.next_token();
799 match integer_to_parse.parse::<i64>() {
800 Ok(n) => Ok(Literal::Integer { value: n, span }),
801 Err(err) => match err.kind() {
802 IntErrorKind::PosOverflow => Err(Error::new(
803 &format!(
804 "literal to large for type Integer, whose maximum value is `{}`",
805 i64::MAX
806 ),
807 ErrorKind::OverflowError,
808 )),
809 _ => parse_err!("failed to parse literal into Integer"),
810 },
811 }
812 }
813
814 /// Converts a string into a float literal.
815 fn parse_float_literal(&mut self, float_to_parse: &String) -> Result<Literal, Error> {
816 let span = self.cur_token.span.clone();
817 self.next_token();
818 match float_to_parse.parse::<f64>() {
819 Ok(n) => Ok(Literal::Float { value: n, span }),
820 Err(_) => {
821 return parse_err!("failed to parse literal into Float");
822 }
823 }
824 }
825
826 /// Parse an identifier (e.g. `my_var`).
827 ///
828 /// The EBNF for this function is:
829 /// ```ebnf
830 /// ident = letter , {letter | digit};
831 /// digit = "0" | "1" | ... | "9";
832 /// letter = ? any alphabetical character ? | "_";
833 /// ```
834 fn parse_identifier(&mut self) -> Result<Identifier, Error> {
835 let span = self.cur_token.span.clone();
836 let name = match &self.cur_token.kind {
837 TokenKind::Identifier { name } => name.to_string(),
838 _ => return parse_err!("expected Identifier, got {}", self.cur_token.kind),
839 };
840 // TokenKind has already been checked, so we can safely call next_token()
841 self.next_token();
842
843 Ok(Identifier { name, span })
844 }
845
846 /// Parses a potentially empty list of (comma seperated) expressions.
847 ///
848 /// The EBNF for this function is:
849 /// ```ebnf
850 /// expressions = [expression , {"," , expression}];
851 /// ```
852 fn parse_expr_list_maybe_empty(
853 &mut self,
854 ending_token: &TokenKind,
855 ) -> Result<Vec<Expression>, Error> {
856 if self.cur_token_is(ending_token) {
857 return Ok(vec![]);
858 } else {
859 return self.parse_expr_list();
860 };
861 }
862
863 /// Parses a list of (comma seperated) expressions.
864 ///
865 /// The EBNF for this function is:
866 /// ```ebnf
867 /// expressions = expression , {"," , expression};
868 /// ```
869 fn parse_expr_list(&mut self) -> Result<Vec<Expression>, Error> {
870 let mut expressions: Vec<Expression> = Vec::new();
871 expressions.push(self.parse_expression()?);
872 while self.cur_token_is(&TokenKind::Comma) {
873 self.eat(&TokenKind::Comma)?;
874 expressions.push(self.parse_expression()?);
875 }
876 return Ok(expressions);
877 }
878
879 /// Helper function to create an empty statement list at the location fo the current
880 /// token.
881 fn create_empty_statement_list(&self) -> StatementList {
882 StatementList {
883 statements: vec![],
884 span: Span::new(self.cur_token.span.start, self.cur_token.span.start),
885 }
886 }
887}
888
889/// This function takes in a string input, and produces either a valid syntax tree, or a
890/// syntax error.
891///
892/// # Arguments
893///
894/// * `input` - The input SAP program as a string.
895///
896/// # Returns
897///
898/// * If there are no syntax errors, an `Ok(Program)`, which is the root node containing the
899/// entire AST (Abstract Syntax Tree) which represents the program.
900///
901/// * If there are syntax errors, then an `Err(Error)` is returned, containing information
902/// about what went wrong.
903pub fn parse(input: &str) -> Result<Program, Error> {
904 let lexer = Lexer::new(input);
905 let mut parser = Parser::new(lexer);
906
907 return parser.parse_program();
908}