parser/
lib.rs

1//! # Parser Module
2//!
3//! The parser module contains the implementation of the SAP language parser. The parser
4//! is responsible for taking a sequence of tokens and converting them into an abstract
5//! syntax tree (AST). The AST is then used by the interpreter to execute the program.
6//!
7//! The parser is implemented as a recursive descent parser, which is a top-down parser
8//! that starts from the root of the syntax tree and works its way down to the leaves.
9//!
10//! It is also responsible for reporting syntax errors in the input program. When
11//! a syntax error is encountered, the parser returns an error containing a message
12//! describing the error.
13use std::num::IntErrorKind;
14
15use ast::expression::{
16    Array, Binary, Expression, FunctionCall, Identifier, Index, Selection, Unary,
17};
18use ast::literal::Literal;
19use ast::statement::{
20    Display, FunctionDeclaration, RepeatForever, RepeatUntil, Return, Set, Statement,
21};
22use ast::{Program, StatementList};
23use lexer::token::{Token, TokenKind};
24use lexer::Lexer;
25use shared::error::{Error, ErrorKind};
26use shared::span::{GetSpan, Span};
27
28// Attempt to obtain the current version of the parser module.
29pub const VERSION: Option<&str> = std::option_env!("CARGO_PKG_VERSION");
30
31#[cfg(test)]
32mod test;
33
34/// The `Parser` struct is responsible for parsing a sequence of tokens into an abstract
35/// syntax tree (AST). The parser is implemented as a recursive descent parser, which is
36/// a top-down parser that starts from the root of the syntax tree and works its way down
37/// to the leaves.
38///
39/// The `Parser` struct contains a reference to a `Lexer` instance, which is used to
40/// obtain the tokens that make up the input program. The parser is also responsible for
41/// reporting syntax errors in the input program. When a syntax error is encountered, the
42/// parser returns an error containing a message describing the error.
43pub struct Parser<'lexer> {
44    lexer: Lexer<'lexer>,
45    cur_token: Token,
46}
47
48/// This macro is an expansion for creating a new `Error` instance with a syntax error
49/// kind. It's intended for use within the parser to simplify the creation of syntax
50/// errors.
51///
52/// Arguments are identical to the `format!` macro.
53macro_rules! parse_err {
54    ($($arg:tt)*) => {{
55        Err(shared::error::Error::new(&format!($($arg)*), ErrorKind::SyntaxError))
56    }}
57}
58
59/// This macro is an expansion for parsing binary expressions. It's intended for use
60/// within the parser to simplify the repetitive pattern of parsing binary operations.
61///
62/// # Arguments
63///
64/// * `self` - The parser instance, typically the `self` keyword in a method.
65/// * `downstream_parse_fn` - A function that parses the individual expressions on both
66///   sides of the binary operation. This function is called repeatedly to handle the left
67///   and right-hand sides of the binary operation.
68/// * `pattern` - A pattern matching the current token to the relevant operator(s).
69macro_rules! parse_binary_expr {
70    ($self:ident, $downstream_parse_fn:ident, $pattern:pat) => {
71        let start = $self.cur_token.span.start;
72        // Parse the left hand side
73        let mut node = $self.$downstream_parse_fn()?;
74
75        // While the current token matches the expected operator, repeatedly parse the right hand
76        // side and construct a new binary expression.
77        while matches!(&$self.cur_token.kind, $pattern) {
78            let operator = $self.cur_token.kind.clone();
79            // Move onto next token
80            $self.next_token();
81            // Parse the right hand side
82            let right = $self.$downstream_parse_fn()?;
83            let span = Span::new(start, $self.cur_token.span.end);
84            // Construct a new binary expression, placing the old `node` inside the new one.
85            node = Expression::Binary(Binary {
86                operator,
87                left: Box::new(node),
88                right: Box::new(right),
89                span,
90            });
91        }
92
93        let result: Result<Expression, Error> = Ok(node);
94        return result;
95    };
96}
97
98impl<'lexer> Parser<'lexer> {
99    pub fn new(mut lexer: Lexer<'lexer>) -> Self {
100        // Load the first token from the lexer to be used as the current token.
101        let cur = lexer.next_token();
102        Parser {
103            lexer,
104            cur_token: cur,
105        }
106    }
107
108    fn next_token(&mut self) {
109        self.cur_token = self.lexer.next_token();
110    }
111
112    /// Returns true if the current token is equal to the expected token.
113    #[inline]
114    fn cur_token_is(&self, token_kind: &TokenKind) -> bool {
115        self.cur_token.kind == *token_kind
116    }
117
118    /// Consumes the current token if it matches the expected token, otherwise returns
119    /// a syntax error.
120    ///
121    /// # Arguments
122    ///
123    /// * `expected_kind` - The expected token kind.
124    ///
125    /// # Returns
126    ///
127    /// An `Ok` containing `()` if the current token matches the expected token, otherwise
128    /// an `Err` containing a syntax error.
129    fn eat(&mut self, expected_kind: &TokenKind) -> Result<(), Error> {
130        if self.cur_token_is(expected_kind) {
131            self.next_token();
132            Ok(())
133        } else {
134            // This is essentially the default syntax error, any error which doesn't have a specific
135            // handler be caught by this.
136            parse_err!("expected {}, got {}", expected_kind, self.cur_token.kind)
137        }
138    }
139
140    /// The main entry point for the parser. This method is responsible for parsing and
141    /// lexing the entire input program, and returning the resulting AST or a syntax
142    /// error.
143    ///
144    /// # Returns
145    ///
146    /// An `Ok` containing the AST (starting with the root `Program` node) if the input
147    /// program is successfully parsed, otherwise an `Err` containing a syntax error.
148    pub fn parse_program(&mut self) -> Result<Program, Error> {
149        let mut program = Program::new();
150
151        if self.cur_token_is(&TokenKind::Eof) {
152            return Ok(program);
153        } else {
154            program.statements = self.parse_statement_list()?.statements;
155            program.span.end = self.cur_token.span.end;
156            self.eat(&TokenKind::Eof)?;
157            return Ok(program);
158        }
159    }
160
161    /// Parses a list of statements without consuming the termination token
162    /// (`End` | `Eof` | `Otherwise`)
163    ///
164    /// Note: Does not handle empty statements lists, the caller should handle this case.
165    fn parse_statement_list(&mut self) -> Result<StatementList, Error> {
166        let mut list = StatementList::new();
167
168        // Optionally consume leading NewLines
169        while matches!(self.cur_token.kind, TokenKind::NewLine) {
170            self.next_token();
171        }
172
173        loop {
174            // Parse a statement and add it to the list
175            list.statements.push(self.parse_statement()?);
176
177            // If the next token is 'End' or 'Eof' or 'Otherwise', this represents the end of the
178            // statement list, so we break out of the loop.
179            if matches!(
180                self.cur_token.kind,
181                TokenKind::End | TokenKind::Eof | TokenKind::Otherwise
182            ) {
183                break; // No separator needed before 'End' or 'Eof'
184            }
185
186            // Each statement must be separated by a `NewLine` or `Semicolon`, which is enforced
187            // here. The exception is when the next token is 'End' or 'Eof' or 'Otherwise', which is
188            // handled by the previous if statement.
189            if !matches!(
190                self.cur_token.kind,
191                TokenKind::Semicolon | TokenKind::NewLine
192            ) {
193                return parse_err!("expected ';' or newline between statements");
194            }
195            // Multiple separators are allowed, so we consume all of them.
196            while matches!(
197                self.cur_token.kind,
198                TokenKind::Semicolon | TokenKind::NewLine
199            ) {
200                self.next_token();
201            }
202
203            // If we encounter 'End' or 'Eof' after consuming separators, break out of the loop.
204            if matches!(
205                self.cur_token.kind,
206                TokenKind::End | TokenKind::Eof | TokenKind::Otherwise
207            ) {
208                break;
209            }
210        }
211
212        Ok(list)
213    }
214
215    /// Parses a single statement.
216    ///
217    /// The EBNF for a statement is:
218    /// ```ebnf
219    ///     statement = set_stmt
220    ///                 | return_stmt
221    ///                 | expression
222    ///                 | fn_decl_stmt
223    ///                 | repeat_stmt
224    ///                 | display_stmt;
225    /// ```
226    fn parse_statement(&mut self) -> Result<Statement, Error> {
227        match self.cur_token.kind {
228            TokenKind::Set => self.parse_set_stmt(),
229            TokenKind::Return => self.parse_return_stmt(),
230            TokenKind::DefineFunction => self.parse_fn_decl_stmt(),
231            TokenKind::Repeat => self.parse_repeat_stmt(),
232            TokenKind::Display => self.parse_display_stmt(),
233            _ => self.parse_expr_stmt(),
234        }
235    }
236
237    /// Parses a `Set` (i.e. `set x = 4`) statement.
238    ///
239    /// The EBNF for a `Set` statement is:
240    /// ```ebnf
241    ///     set_stmt = "set" , ident , "=" , expression;
242    /// ```
243    fn parse_set_stmt(&mut self) -> Result<Statement, Error> {
244        let start = self.cur_token.span.start;
245        self.eat(&TokenKind::Set)?;
246        let ident = self.parse_identifier()?;
247        self.eat(&TokenKind::Assign)?;
248        let expr = self.parse_expression()?;
249        let span = Span::new(start, self.cur_token.span.end);
250
251        return Ok(Statement::Set(Set { ident, expr, span }));
252    }
253
254    /// Parses a `Return` statement.
255    ///
256    /// The EBNF for a `Return` statement is:
257    /// ```ebnf
258    ///    return_stmt = "return" , expression;
259    /// ```
260    fn parse_return_stmt(&mut self) -> Result<Statement, Error> {
261        let start = self.cur_token.span.start;
262        self.eat(&TokenKind::Return)?;
263        let return_value = self.parse_expression()?;
264        let span = Span::new(start, self.cur_token.span.end);
265        return Ok(Statement::Return(Return {
266            value: return_value,
267            span,
268        }));
269    }
270
271    /// Parses a function declaration statement.
272    ///
273    /// The EBNF for a function declaration statement is:
274    /// ```ebnf
275    ///     fn_decl_stmt = "defineFunction" , "(" , [params] , ")" , [statements]
276    ///                    , "end";
277    ///     params = ident , {"," , ident};
278    /// ```
279    fn parse_fn_decl_stmt(&mut self) -> Result<Statement, Error> {
280        let start = self.cur_token.span.start;
281
282        self.eat(&TokenKind::DefineFunction)?;
283
284        // Parse the function name
285        let name = self.parse_identifier()?;
286
287        // Parse the function parameters.
288        self.eat(&TokenKind::LParen)?;
289        let parameters = self.parse_fn_decl_parameters()?;
290        self.eat(&TokenKind::RParen)?;
291
292        // Parse the function body.
293        // If the function body is empty, return an empty statement list. Otherwise, parse the
294        // function body.
295        let body = if self.cur_token_is(&TokenKind::End) {
296            self.create_empty_statement_list()
297        } else {
298            self.parse_statement_list()?
299        };
300
301        let end = self.cur_token.span.end;
302        self.eat(&TokenKind::End)?;
303        let span = Span::new(start, end);
304
305        return Ok(Statement::FunctionDeclaration(FunctionDeclaration {
306            name,
307            parameters,
308            body,
309            span,
310        }));
311    }
312
313    /// Parses the parameters of a function declaration.
314    ///
315    /// The EBNF grammar for this function is:
316    /// ```ebnf
317    ///     params = [ident , {"," , ident}]
318    /// ```
319    fn parse_fn_decl_parameters(&mut self) -> Result<Vec<Identifier>, Error> {
320        if self.cur_token_is(&TokenKind::RParen) {
321            // Return an empty array if there are no parameters.
322            return Ok(vec![]);
323        } else {
324            let mut parameters: Vec<Identifier> = Vec::new();
325            // Parse the first parameter
326            let param = self.parse_fn_decl_parameter()?;
327            parameters.push(param);
328
329            // Parse all subsequent parameters seperated by commas.
330            while self.cur_token_is(&TokenKind::Comma) {
331                self.eat(&TokenKind::Comma)?;
332                let param = self.parse_fn_decl_parameter()?;
333                parameters.push(param);
334            }
335            return Ok(parameters);
336        }
337    }
338
339    /// This function parses a single function parameter, the same as parsing an
340    /// identifier, except with a specialised error message.
341    fn parse_fn_decl_parameter(&mut self) -> Result<Identifier, Error> {
342        let prev_token_kind = self.cur_token.kind.clone();
343        self.parse_identifier().map_err(|mut err| {
344            err.message = format!("expected function parameter, got '{}'", prev_token_kind);
345            return err;
346        })
347    }
348
349    /// Parses a `Display` statement.
350    ///
351    /// The EBNF for a `Display` statement is:
352    /// ```ebnf
353    ///     display_stmt = "display" , expression , {"," , expression};
354    /// ```
355    fn parse_display_stmt(&mut self) -> Result<Statement, Error> {
356        let start = self.cur_token.span.start;
357        self.eat(&TokenKind::Display)?;
358        let expressions = match self.cur_token.kind {
359            TokenKind::NewLine | TokenKind::Semicolon => {
360                return parse_err!("empty display statement");
361            }
362            _ => self.parse_expr_list()?,
363        };
364        let span = Span::new(start, self.cur_token.span.end);
365        return Ok(Statement::Display(Display { expressions, span }));
366    }
367
368    /// Parses a `Repeat` statement.
369    ///
370    /// The EBNF for a `Repeat` statement is:
371    /// ```ebnf
372    ///     repeat_stmt = "repeat" , (repeat_n_times | repeat_until | repeat_forever);
373    /// ```
374    fn parse_repeat_stmt(&mut self) -> Result<Statement, Error> {
375        let start = self.cur_token.span.start;
376
377        self.eat(&TokenKind::Repeat)?;
378
379        let statement = match &self.cur_token.kind {
380            TokenKind::Until => self.parse_repeat_until(start)?,
381            TokenKind::Forever => self.parse_repeat_forever(start)?,
382            _ => self.parse_repeat_n_times(start)?,
383        };
384
385        return Ok(statement);
386    }
387
388    /// Parses a `RepeatUntil` statement.
389    ///
390    /// The EBNF for a `RepeatUntil` statement is:
391    /// ```ebnf
392    ///    repeat_until = "until" , expression , [statements] , "end";
393    /// ```
394    fn parse_repeat_until(&mut self, start: usize) -> Result<Statement, Error> {
395        self.eat(&TokenKind::Until)?;
396
397        let expression = self.parse_expression()?;
398
399        let statements = match self.cur_token.kind {
400            TokenKind::End => self.create_empty_statement_list(),
401            _ => self.parse_statement_list()?,
402        };
403
404        let span = Span::new(start, self.cur_token.span.end);
405
406        self.eat(&TokenKind::End)?;
407
408        return Ok(Statement::RepeatUntil(RepeatUntil {
409            condition: expression,
410            body: statements,
411            span,
412        }));
413    }
414
415    /// Parses a `RepeatForever` statement.
416    ///
417    /// The EBNF for a `RepeatForever` statement is:
418    /// ```ebnf
419    ///   repeat_forever = "forever" , [statements] , "end";
420    /// ```
421    fn parse_repeat_forever(&mut self, start: usize) -> Result<Statement, Error> {
422        self.eat(&TokenKind::Forever)?;
423
424        let statements = match self.cur_token.kind {
425            TokenKind::End => self.create_empty_statement_list(),
426            _ => self.parse_statement_list()?,
427        };
428
429        let span = Span::new(start, self.cur_token.span.end);
430
431        self.eat(&TokenKind::End)?;
432
433        return Ok(Statement::RepeatForever(RepeatForever {
434            body: statements,
435            span,
436        }));
437    }
438
439    /// Parses a `RepeatNTimes` statement.
440    ///
441    /// The EBNF for a `RepeatNTimes` statement is:
442    /// ```ebnf
443    ///    repeat_n_times = expression , "times" , [statements] , "end";
444    /// ```
445    fn parse_repeat_n_times(&mut self, start: usize) -> Result<Statement, Error> {
446        let n = self.parse_expression()?;
447
448        self.eat(&TokenKind::Times)?;
449
450        let statements = match self.cur_token.kind {
451            TokenKind::End => self.create_empty_statement_list(),
452            _ => self.parse_statement_list()?,
453        };
454
455        let span = Span::new(start, self.cur_token.span.end);
456
457        self.eat(&TokenKind::End)?;
458
459        return Ok(Statement::RepeatNTimes(ast::statement::RepeatNTimes {
460            n,
461            body: statements,
462            span,
463        }));
464    }
465
466    /// Parses an `Expression` statement.
467    fn parse_expr_stmt(&mut self) -> Result<Statement, Error> {
468        let expr = self.parse_expression()?;
469        return Ok(Statement::Expression(expr));
470    }
471
472    /// Parses an expression (e.g. `1+1`).
473    ///
474    /// The EBNF for an expression is:
475    /// ```ebnf
476    ///     expression = or_expr;
477    /// ```
478    fn parse_expression(&mut self) -> Result<Expression, Error> {
479        self.parse_or_expr()
480    }
481
482    /// Parses an or expression (e.g. `true or false`).
483    ///
484    /// The EBNF for an or expression is:
485    /// ```ebnf
486    ///     or_expr = and_expr {"or" , and_expr};
487    /// ```
488    fn parse_or_expr(&mut self) -> Result<Expression, Error> {
489        // <or_expr> -> <and_expr> (`Or` <and_expr>)*
490        parse_binary_expr!(self, parse_and_expr, TokenKind::Or);
491    }
492
493    /// Parses an and expression (e.g. `x > 1 and x < 10`)
494    ///
495    /// The EBNF for an and expression is:
496    /// ```ebnf
497    ///     and_expr = eq_expr {"and" , eq_expr};
498    /// ```
499    fn parse_and_expr(&mut self) -> Result<Expression, Error> {
500        parse_binary_expr!(self, parse_eq_expr, TokenKind::And);
501    }
502
503    /// Parses an equality expression (e.g. `a == b != c`)
504    ///
505    /// The EBNF for an equality expression is:
506    /// ```ebnf
507    ///     eq_expr = comp_expr {("==" | "!=") , comp_expr};
508    /// ```
509    fn parse_eq_expr(&mut self) -> Result<Expression, Error> {
510        parse_binary_expr!(self, parse_comp_expr, TokenKind::Eq | TokenKind::NotEq);
511    }
512
513    /// Parses a comparison expression (e.g. `x > 4`)
514    ///
515    /// The EBNF for a comparison expression is:
516    /// ```ebnf
517    ///     comp_expr = sum_expr {("<" | "<=" | ">" | ">=") , sum_expr};
518    /// ```
519    fn parse_comp_expr(&mut self) -> Result<Expression, Error> {
520        parse_binary_expr!(
521            self,
522            parse_sum_expr,
523            TokenKind::Lt | TokenKind::LtEq | TokenKind::Gt | TokenKind::GtEq
524        );
525    }
526
527    /// Parses a sum expression (e.g. `1+2-3`)
528    ///
529    /// The EBNF for a sum expression is:
530    /// ```ebnf
531    ///     sum_expr = product_expr {("+" | "-") , product_expr}
532    /// ```
533    fn parse_sum_expr(&mut self) -> Result<Expression, Error> {
534        parse_binary_expr!(self, parse_product_expr, TokenKind::Plus | TokenKind::Minus);
535    }
536
537    /// Parses a product expression (e.g. `1 * 2 / 3 % 4`)
538    ///
539    /// The EBNF for a product expression is:
540    /// ```ebnf
541    ///     product_expr = postfix_expr {("*" | "/" | "%") , postfix_expr};
542    /// ```
543    fn parse_product_expr(&mut self) -> Result<Expression, Error> {
544        // <product_expr> -> <postfix_expr> ((`Mult` | `Div` | `Mod`) <postfix_expr>)*
545        parse_binary_expr!(
546            self,
547            parse_postfix_expr,
548            TokenKind::Mult | TokenKind::Div | TokenKind::Mod
549        );
550    }
551
552    /// Parses a post-fix expression (e.g. `x[10]` or `print(x)`)
553    ///
554    /// The EBNF for a post-fix expression is:
555    /// ```ebnf
556    ///     postfix_expr = prefix_expr {fn_call | array_index};
557    ///         array_index = "[" , expression , "]"
558    ///         fn_call = "(" , [args] , ")";
559    ///         args = expression , {"," , expression};
560    /// ```
561    fn parse_postfix_expr(&mut self) -> Result<Expression, Error> {
562        let start = self.cur_token.span.start;
563        let mut node = self.parse_prefix_expr()?;
564
565        loop {
566            match &self.cur_token.kind {
567                // Parse array index
568                TokenKind::LBracket => {
569                    self.eat(&TokenKind::LBracket)?;
570                    let index_expr = self.parse_expression()?;
571                    self.eat(&TokenKind::RBracket)?;
572                    let span = Span::new(start, self.cur_token.span.end);
573                    node = Expression::Index(Index {
574                        object: Box::new(node),
575                        index: Box::new(index_expr),
576                        span,
577                    })
578                }
579                // Parse function call
580                TokenKind::LParen => {
581                    self.eat(&TokenKind::LParen)?;
582                    let arguments = self
583                        .parse_expr_list_maybe_empty(&TokenKind::RParen)
584                        .map_err(|mut err| {
585                            err.message =
586                                format!("expected function parameter, got {}", self.cur_token.kind);
587                            err
588                        })?;
589                    self.eat(&TokenKind::RParen)?;
590
591                    let span = Span::new(start, self.cur_token.span.end);
592                    node = Expression::FunctionCall(FunctionCall {
593                        callee: Box::new(node),
594                        arguments,
595                        span,
596                    })
597                }
598                _ => break,
599            };
600        }
601
602        return Ok(node);
603    }
604
605    /// Parse a prefix expression (e.g. `not true` or `---1`)
606    ///
607    /// The EBNF for a prefix expression is:
608    /// ```ebnf
609    ///     prefix_expr = {"not" | "-"} , ident_expr;
610    /// ```
611    fn parse_prefix_expr(&mut self) -> Result<Expression, Error> {
612        // Add each prefix operation into an array. For example, if the expression was `not -1`,
613        // the array would be `[TokenKind::Not, TokenKind::Minus]`.
614        let mut prefix_operations = Vec::new();
615        while matches!(&self.cur_token.kind, TokenKind::Not | TokenKind::Minus) {
616            prefix_operations.push(self.cur_token.clone());
617            self.next_token();
618        }
619
620        // Parse the expression.
621        let mut node: Expression = self.parse_ident_expr()?;
622
623        // Now, add each operation to the syntax tree IN REVERSE, since the tree starts from the
624        // root node (the expression), and then the prefix operation closest to the expression is
625        // added to the tree first.
626        for operation in prefix_operations.into_iter().rev() {
627            let operator = operation.kind.clone();
628            let span = Span::new(operation.span.start, node.span().end);
629            node = Expression::Unary(Unary {
630                operator,
631                operand: Box::new(node),
632                span,
633            })
634        }
635
636        return Ok(node);
637    }
638
639    /// Parse an identifier expression (e.g. `my_var`).
640    ///
641    /// The EBNF for an identfier expression is:
642    /// ```ebnf
643    ///     ident_expr = ident | group_expr;
644    /// ```
645    fn parse_ident_expr(&mut self) -> Result<Expression, Error> {
646        match &self.cur_token.kind.clone() {
647            TokenKind::Identifier { .. } => Ok(Expression::Identifier(self.parse_identifier()?)),
648            _ => self.parse_group_expr(),
649        }
650    }
651
652    /// Parse a group expression (e.g. `(1+1)*2`).
653    ///
654    /// The EBNF for a group expression is:
655    /// ```ebnf
656    ///     group_expr = ("(" , expression , ")") | entity_expr;
657    /// ```
658    fn parse_group_expr(&mut self) -> Result<Expression, Error> {
659        if self.cur_token_is(&TokenKind::LParen) {
660            self.eat(&TokenKind::LParen)?;
661            let expr = self.parse_expression()?;
662            self.eat(&TokenKind::RParen)?;
663            return Ok(expr);
664        } else {
665            return self.parse_entity_expr();
666        }
667    }
668
669    /// Parse an entity expression. An 'entity' is a single building block of an
670    /// expression, it cannot be sliced up into smaller expressions.
671    ///
672    /// The EBNF for an entity expression is:
673    /// ```ebnf
674    ///     entity_expr = selection_expr | array_expr | literal;
675    /// ```
676    fn parse_entity_expr(&mut self) -> Result<Expression, Error> {
677        match &self.cur_token.kind {
678            TokenKind::If => return self.parse_selection_expr(),
679            TokenKind::LBracket => return self.parse_array_expr(),
680            _ => {
681                let literal = self.parse_literal()?;
682                return Ok(Expression::Literal(literal));
683            }
684        }
685    }
686
687    /// Parse an array expression (e.g. `[1, 2, x, 16/4]`).
688    ///
689    /// The EBNF for an array expression is:
690    /// ```ebnf
691    ///     array_expr = "[" , [elements] , "]";
692    ///         elements = expression , {"," , expression};
693    /// ```
694    fn parse_array_expr(&mut self) -> Result<Expression, Error> {
695        let start = self.cur_token.span.start;
696        self.eat(&TokenKind::LBracket)?;
697        let elements = self.parse_expr_list_maybe_empty(&TokenKind::RBracket)?;
698        self.eat(&TokenKind::RBracket)?;
699        let span = Span::new(start, self.cur_token.span.end);
700
701        return Ok(Expression::Array(Array { elements, span }));
702    }
703
704    /// Parse a selection expression (e.g. `if x > 4 then return x end`)
705    ///
706    /// The EBNF for an array expression is:
707    /// ```ebnf
708    ///     selection_expr = "if" , expression , "then" , [statements]
709    ///                      , ["otherwise" [statements]] "end";
710    /// ```
711    fn parse_selection_expr(&mut self) -> Result<Expression, Error> {
712        let start = self.cur_token.span.start;
713
714        self.eat(&TokenKind::If)?;
715        let condition_expr = self.parse_expression()?;
716        self.eat(&TokenKind::Then)?;
717        let conditional_statements = self.parse_selection_if_body()?;
718        let else_conditional_block = self.parse_selection_else_body()?;
719
720        let end = self.cur_token.span.end;
721        self.eat(&TokenKind::End)?;
722        let span = Span::new(start, end);
723
724        return Ok(Expression::Selection(Selection {
725            condition: Box::new(condition_expr),
726            conditional: conditional_statements,
727            else_conditional: else_conditional_block,
728            span,
729        }));
730    }
731
732    /// Parse the statements within the 'if' branch of a selection statement.
733    fn parse_selection_if_body(&mut self) -> Result<StatementList, Error> {
734        match matches!(self.cur_token.kind, TokenKind::End | TokenKind::Otherwise) {
735            false => self.parse_statement_list(),
736            true => Ok(self.create_empty_statement_list()),
737        }
738    }
739
740    /// Parse the statements within the 'else' branch of a selection statement (if it
741    /// exists).
742    fn parse_selection_else_body(&mut self) -> Result<Option<StatementList>, Error> {
743        if self.cur_token_is(&TokenKind::Otherwise) {
744            self.eat(&TokenKind::Otherwise)?;
745            let else_conditional_statements = match matches!(self.cur_token.kind, TokenKind::End) {
746                false => self.parse_statement_list()?,
747                true => self.create_empty_statement_list(),
748            };
749            Ok(Some(else_conditional_statements))
750        } else {
751            Ok(None)
752        }
753    }
754
755    /// Parse a literal. A literal is a hard-coded `string`, `int`, `float` or `bool`.
756    ///
757    /// The EBNF grammar for a literal is:
758    /// ```ebnf
759    ///     literal = int | float | bool | string;
760    ///         bool = "true" | "false";
761    ///         float = int , "." , int;
762    ///         int = digit , {digit};
763    ///             digit = "0" | "1" | ... | "9";
764    ///         string = '"', {char} , '"';
765    ///             char = ? any character ? -'"';
766    /// ```
767    fn parse_literal(&mut self) -> Result<Literal, Error> {
768        let span = self.cur_token.span.clone();
769        match &self.cur_token.kind.clone() {
770            TokenKind::Int(n) => {
771                return self.parse_integer_literal(n);
772            }
773            TokenKind::Float(n) => {
774                return self.parse_float_literal(n);
775            }
776            TokenKind::True => {
777                self.next_token();
778                return Ok(Literal::Boolean { value: true, span });
779            }
780            TokenKind::False => {
781                self.next_token();
782                return Ok(Literal::Boolean { value: false, span });
783            }
784            TokenKind::String(string) => {
785                self.next_token();
786                return Ok(Literal::String {
787                    value: string.to_owned(),
788                    span,
789                });
790            }
791            _ => parse_err!("expected Literal, got {}", self.cur_token.kind),
792        }
793    }
794
795    /// Converts a string into an integer literal.
796    fn parse_integer_literal(&mut self, integer_to_parse: &String) -> Result<Literal, Error> {
797        let span = self.cur_token.span.clone();
798        self.next_token();
799        match integer_to_parse.parse::<i64>() {
800            Ok(n) => Ok(Literal::Integer { value: n, span }),
801            Err(err) => match err.kind() {
802                IntErrorKind::PosOverflow => Err(Error::new(
803                    &format!(
804                        "literal to large for type Integer, whose maximum value is `{}`",
805                        i64::MAX
806                    ),
807                    ErrorKind::OverflowError,
808                )),
809                _ => parse_err!("failed to parse literal into Integer"),
810            },
811        }
812    }
813
814    /// Converts a string into a float literal.
815    fn parse_float_literal(&mut self, float_to_parse: &String) -> Result<Literal, Error> {
816        let span = self.cur_token.span.clone();
817        self.next_token();
818        match float_to_parse.parse::<f64>() {
819            Ok(n) => Ok(Literal::Float { value: n, span }),
820            Err(_) => {
821                return parse_err!("failed to parse literal into Float");
822            }
823        }
824    }
825
826    /// Parse an identifier (e.g. `my_var`).
827    ///
828    /// The EBNF for this function is:
829    /// ```ebnf
830    ///     ident = letter , {letter | digit};
831    ///         digit = "0" | "1" | ... | "9";
832    ///         letter = ? any alphabetical character ? | "_";
833    /// ```
834    fn parse_identifier(&mut self) -> Result<Identifier, Error> {
835        let span = self.cur_token.span.clone();
836        let name = match &self.cur_token.kind {
837            TokenKind::Identifier { name } => name.to_string(),
838            _ => return parse_err!("expected Identifier, got {}", self.cur_token.kind),
839        };
840        // TokenKind has already been checked, so we can safely call next_token()
841        self.next_token();
842
843        Ok(Identifier { name, span })
844    }
845
846    /// Parses a potentially empty list of (comma seperated) expressions.
847    ///
848    /// The EBNF for this function is:
849    /// ```ebnf
850    ///     expressions = [expression , {"," , expression}];
851    /// ```
852    fn parse_expr_list_maybe_empty(
853        &mut self,
854        ending_token: &TokenKind,
855    ) -> Result<Vec<Expression>, Error> {
856        if self.cur_token_is(ending_token) {
857            return Ok(vec![]);
858        } else {
859            return self.parse_expr_list();
860        };
861    }
862
863    /// Parses a list of (comma seperated) expressions.
864    ///
865    /// The EBNF for this function is:
866    /// ```ebnf
867    ///     expressions = expression , {"," , expression};
868    /// ```
869    fn parse_expr_list(&mut self) -> Result<Vec<Expression>, Error> {
870        let mut expressions: Vec<Expression> = Vec::new();
871        expressions.push(self.parse_expression()?);
872        while self.cur_token_is(&TokenKind::Comma) {
873            self.eat(&TokenKind::Comma)?;
874            expressions.push(self.parse_expression()?);
875        }
876        return Ok(expressions);
877    }
878
879    /// Helper function to create an empty statement list at the location fo the current
880    /// token.
881    fn create_empty_statement_list(&self) -> StatementList {
882        StatementList {
883            statements: vec![],
884            span: Span::new(self.cur_token.span.start, self.cur_token.span.start),
885        }
886    }
887}
888
889/// This function takes in a string input, and produces either a valid syntax tree, or a
890/// syntax error.
891///
892/// # Arguments
893///
894/// * `input` - The input SAP program as a string.
895///
896/// # Returns
897///
898/// * If there are no syntax errors, an `Ok(Program)`, which is the root node containing the
899/// entire AST (Abstract Syntax Tree) which represents the program.
900///
901/// * If there are syntax errors, then an `Err(Error)` is returned, containing information
902/// about what went wrong.
903pub fn parse(input: &str) -> Result<Program, Error> {
904    let lexer = Lexer::new(input);
905    let mut parser = Parser::new(lexer);
906
907    return parser.parse_program();
908}
parser/lib.rs

parser/
lib.rs