maolang_core/parser/
ast.rs

1//! AST definition and parser rules implemented
2
3use std::{error::Error, fmt::Display};
4
5use crate::tokenizer::{Token, TokenTag, keyword::Keyword};
6
7use super::Parser;
8
9/// A node in the abstract syntax tree, represents all possible operations that can occur
10#[derive(Debug, PartialEq, Clone)]
11pub enum Expr<'src> {
12    /// A for loop
13    ForLoop {
14        /// An inistialization
15        init: Box<Expr<'src>>,
16        /// The conditional check
17        cond: Box<Expr<'src>>,
18        /// The update after each run
19        inc: Box<Expr<'src>>,
20        /// What is run each time
21        exec: Box<Expr<'src>>,
22    },
23    /// A while loop
24    WhileLoop {
25        /// The condition checked before running
26        condition: Box<Expr<'src>>,
27        /// What runs each time around
28        eval: Box<Expr<'src>>,
29    },
30    /// A conditional executor
31    Conditional {
32        /// The condition being checked
33        condition: Box<Expr<'src>>,
34        /// What is executed if the condition is true
35        true_branch: Box<Expr<'src>>,
36        /// What is optionally executed if else
37        else_branch: Option<Box<Expr<'src>>>,
38    },
39    /// A block to be executed
40    Block(Vec<Expr<'src>>),
41    /// Print an expression's literal result
42    Print(Box<Expr<'src>>),
43    /// A variable reference
44    Variable(&'src str),
45    /// An assignment from an identifier to an expression
46    Assignment(&'src str, Box<Expr<'src>>),
47    /// A binary operation between two expressions
48    Binary {
49        /// The operator
50        op: BinaryOp,
51        /// Left hand side
52        left: Box<Expr<'src>>,
53        /// Right hand side
54        right: Box<Expr<'src>>,
55    },
56    /// A unary operation on a single expression
57    Unary {
58        /// The operator
59        op: UnaryOp,
60        /// The expression being acted on
61        node: Box<Expr<'src>>,
62    },
63    /// ( `expr` )
64    Grouping(Box<Expr<'src>>),
65    /// A literal
66    Literal(Literal<'src>),
67}
68
69/// A literal type
70#[derive(Debug, PartialEq, Clone, Copy)]
71pub enum Literal<'src> {
72    /// String
73    String(&'src str),
74    /// Real number
75    Number(f64),
76    /// Boolean
77    Bool(bool),
78    /// Null, nil, None, etc
79    Null,
80}
81
82impl Display for Literal<'_> {
83    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
84        match self {
85            Self::Bool(b) => write!(f, "{b}"),
86            Self::String(s) => write!(f, "{s}"),
87            Self::Number(n) => write!(f, "{n}"),
88            Self::Null => write!(f, "null"),
89        }
90    }
91}
92
93/// All operations that can occur between two targets
94#[derive(Debug, PartialEq, Clone, Copy)]
95pub enum UnaryOp {
96    /// Number negation
97    Neg,
98    /// Boolean not-ing
99    Not,
100}
101
102/// All operations that can occur between two targets
103#[derive(Debug, PartialEq, Clone, Copy)]
104pub enum BinaryOp {
105    /// Add two expressions
106    Add,
107    /// Subtract two expressions
108    Sub,
109    /// Multiply
110    Mul,
111    /// Divide
112    Div,
113    /// Equality
114    Eq,
115    /// Inequality
116    Neq,
117    /// Greater than
118    Gt,
119    /// Greater than or equal to
120    Gte,
121    /// Less than
122    Lt,
123    /// Less than or equal to
124    Lte,
125}
126
127/// An error that occurs whilst parsing
128#[derive(Clone, PartialEq, Debug, Default)]
129pub struct ParseError {
130    /// The error message
131    pub message: String,
132    /// Error token's line
133    pub line: usize,
134    /// Error token's column
135    pub col: usize,
136    /// Error token's len
137    pub len: usize,
138}
139
140impl Display for ParseError {
141    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
142        write!(f, "parser :(")
143    }
144}
145
146impl Error for ParseError {}
147
148impl<'src> Parser<'src> {
149    /// Parse a series of statements
150    pub fn parse(&mut self) -> Result<Vec<Expr<'src>>, ParseError> {
151        let mut statements = vec![];
152
153        while self.peek() != TokenTag::EOF {
154            statements.push(self.statement()?);
155        }
156
157        Ok(statements)
158    }
159
160    /// Parse a singular statement
161    pub fn statement(&mut self) -> Result<Expr<'src>, ParseError> {
162        match self.peek() {
163            TokenTag::Keyword(Keyword::Print) => {
164                self.advance();
165                self.consume_open_paren_if_necessary()?;
166                let next = self.expression()?;
167                self.consume_close_paren_if_necessary()?;
168                self.consume_end()?;
169                Ok(Expr::Print(Box::new(next)))
170            }
171            TokenTag::Keyword(Keyword::OpenBrace) => self.block(),
172            TokenTag::Keyword(Keyword::ConditionalCheck) => self.if_statement(),
173            TokenTag::Keyword(Keyword::WhileLoopInit) => self.while_loop(),
174            TokenTag::Keyword(Keyword::ForLoopInit) => self.for_loop(),
175            _ => {
176                let res = self.expression()?;
177                self.consume_end()?;
178                Ok(res)
179            }
180        }
181    }
182
183    /// A for loop is `for` (statment; expression; statement) block
184    fn for_loop(&mut self) -> Result<Expr<'src>, ParseError> {
185        self.consume(&TokenTag::Keyword(Keyword::ForLoopInit))?;
186        self.consume_open_paren_if_necessary()?;
187
188        let init = Box::new(self.statement()?);
189        self.consume_end()?;
190
191        let cond = Box::new(self.expression()?);
192        self.consume_end()?;
193
194        let inc = Box::new(self.statement()?);
195
196        self.consume_close_paren_if_necessary()?;
197
198        let exec = Box::new(self.block()?);
199
200        Ok(Expr::ForLoop {
201            init,
202            cond,
203            inc,
204            exec,
205        })
206    }
207
208    /// A while loop is `while` (expression) block
209    fn while_loop(&mut self) -> Result<Expr<'src>, ParseError> {
210        self.consume(&TokenTag::Keyword(Keyword::WhileLoopInit))?;
211        self.consume_open_paren_if_necessary()?;
212        let condition = Box::new(self.expression()?);
213        self.consume_close_paren_if_necessary()?;
214        let eval = Box::new(self.block()?);
215
216        Ok(Expr::WhileLoop { condition, eval })
217    }
218
219    /// A block is `{ (expression)* }`
220    fn block(&mut self) -> Result<Expr<'src>, ParseError> {
221        self.consume(&TokenTag::Keyword(Keyword::OpenBrace))?;
222        let mut block_items = vec![];
223
224        while self.peek() != TokenTag::Keyword(Keyword::CloseBrace) {
225            let expr = self.statement()?;
226            block_items.push(expr);
227        }
228
229        self.consume(&TokenTag::Keyword(Keyword::CloseBrace))?;
230        Ok(Expr::Block(block_items))
231    }
232
233    /// An If statement is:
234    /// `if ( equality ) { `block` } ( else { `block` })?`
235    fn if_statement(&mut self) -> Result<Expr<'src>, ParseError> {
236        self.consume(&TokenTag::Keyword(Keyword::ConditionalCheck))?;
237        self.consume_open_paren_if_necessary()?;
238        let check = self.equality()?;
239        self.consume_close_paren_if_necessary()?;
240        let block = self.block()?;
241
242        let mut else_branch = None;
243
244        if self.peek() == TokenTag::Keyword(Keyword::ConditionalElse) {
245            self.advance();
246            else_branch = Some(Box::new(self.statement()?));
247        }
248
249        Ok(Expr::Conditional {
250            condition: Box::new(check),
251            true_branch: Box::new(block),
252            else_branch,
253        })
254    }
255
256    /// an expression is equality  | var ident = equality
257    fn expression(&mut self) -> Result<Expr<'src>, ParseError> {
258        match self.peek() {
259            TokenTag::Keyword(Keyword::VariableDeclaration) => {
260                self.advance();
261                if let TokenTag::Identifier(name) = self.advance() {
262                    self.consume(&TokenTag::Keyword(Keyword::Equal))?;
263                    let assignment = self.equality()?;
264
265                    Ok(Expr::Assignment(name, Box::new(assignment)))
266                } else {
267                    let Token {
268                        tag,
269                        line,
270                        col,
271                        len,
272                    } = self.peek_token();
273                    Err(ParseError {
274                        message: format!("Expected a variable expression, found `{tag:?}`"),
275                        line,
276                        col,
277                        len,
278                    })
279                }
280            }
281
282            _ => self.equality(),
283        }
284    }
285
286    /// An equality is comparison ( (!= | ==) comparison)*
287    fn equality(&mut self) -> Result<Expr<'src>, ParseError> {
288        let mut expr = self.comparison()?;
289
290        while matches!(
291            self.peek(),
292            TokenTag::Keyword(Keyword::EqualEqual) | TokenTag::Keyword(Keyword::BangEqual)
293        ) {
294            let op = match self.advance() {
295                TokenTag::Keyword(Keyword::EqualEqual) => BinaryOp::Eq,
296                TokenTag::Keyword(Keyword::BangEqual) => BinaryOp::Neq,
297                _ => unreachable!(),
298            };
299
300            let right = Box::new(self.comparison()?);
301
302            expr = Expr::Binary {
303                op,
304                left: Box::new(expr),
305                right,
306            }
307        }
308
309        Ok(expr)
310    }
311
312    /// A comparison is term ( ( ">" | ">=" | "<" | "<=" ) term )*
313    fn comparison(&mut self) -> Result<Expr<'src>, ParseError> {
314        let mut expr = self.term()?;
315
316        while matches!(
317            self.peek(),
318            TokenTag::Keyword(Keyword::Greater)
319                | TokenTag::Keyword(Keyword::GreaterEqual)
320                | TokenTag::Keyword(Keyword::Less)
321                | TokenTag::Keyword(Keyword::LessEqual)
322        ) {
323            let op = match self.advance() {
324                TokenTag::Keyword(Keyword::Greater) => BinaryOp::Gt,
325                TokenTag::Keyword(Keyword::GreaterEqual) => BinaryOp::Gte,
326                TokenTag::Keyword(Keyword::Less) => BinaryOp::Lt,
327                TokenTag::Keyword(Keyword::LessEqual) => BinaryOp::Lte,
328                _ => unreachable!(),
329            };
330
331            let right = Box::new(self.term()?);
332
333            expr = Expr::Binary {
334                op,
335                left: Box::new(expr),
336                right,
337            }
338        }
339
340        Ok(expr)
341    }
342
343    /// A term is factor ( ( "+" | "-" ) factor )*
344    fn term(&mut self) -> Result<Expr<'src>, ParseError> {
345        let mut expr = self.factor()?;
346
347        while matches!(self.peek(), TokenTag::Plus | TokenTag::Minus) {
348            let op = match self.advance() {
349                TokenTag::Plus => BinaryOp::Add,
350                TokenTag::Minus => BinaryOp::Sub,
351                _ => unreachable!(),
352            };
353
354            let right = Box::new(self.factor()?);
355
356            expr = Expr::Binary {
357                op,
358                left: Box::new(expr),
359                right,
360            };
361        }
362
363        Ok(expr)
364    }
365
366    /// A factor is unary ( ( "*" | "/" ) unary )*
367    fn factor(&mut self) -> Result<Expr<'src>, ParseError> {
368        let mut expr = self.unary()?;
369
370        while matches!(self.peek(), TokenTag::Star | TokenTag::Slash) {
371            let op = match self.advance() {
372                TokenTag::Star => BinaryOp::Mul,
373                TokenTag::Slash => BinaryOp::Div,
374                _ => unreachable!(),
375            };
376
377            let right = Box::new(self.unary()?);
378
379            expr = Expr::Binary {
380                op,
381                left: Box::new(expr),
382                right,
383            };
384        }
385
386        Ok(expr)
387    }
388
389    /// A unary expression is either (! | -) unary | primary
390    fn unary(&mut self) -> Result<Expr<'src>, ParseError> {
391        if matches!(
392            self.peek(),
393            TokenTag::Keyword(Keyword::Bang) | TokenTag::Minus
394        ) {
395            let op = match self.advance() {
396                TokenTag::Keyword(Keyword::Bang) => UnaryOp::Not,
397                TokenTag::Minus => UnaryOp::Neg,
398                _ => unreachable!(),
399            };
400
401            let unary = self.unary()?;
402
403            Ok(Expr::Unary {
404                op,
405                node: Box::new(unary),
406            })
407        } else {
408            self.primary()
409        }
410    }
411
412    /// Variable | NUMBER | STRING | "true" | "false" | "nil" | "(" expression ")"
413    fn primary(&mut self) -> Result<Expr<'src>, ParseError> {
414        let advance = self.advance();
415        let prim = match advance {
416            TokenTag::Number(n) => Expr::Literal(Literal::Number(n)),
417            TokenTag::Keyword(Keyword::True) => Expr::Literal(Literal::Bool(true)),
418            TokenTag::Keyword(Keyword::False) => Expr::Literal(Literal::Bool(false)),
419            TokenTag::Keyword(Keyword::EmptyValue) => Expr::Literal(Literal::Null),
420            TokenTag::Identifier(ident) => Expr::Variable(ident),
421            TokenTag::String(s) => Expr::Literal(Literal::String(s)),
422            TokenTag::OpenParen => {
423                let expr = self.expression()?;
424                self.consume(&TokenTag::CloseParen)?;
425
426                Expr::Grouping(Box::new(expr))
427            }
428            _ => {
429                let Token {
430                    tag,
431                    line,
432                    col,
433                    len,
434                } = self.peek_token();
435                return Err(ParseError {
436                    message: format!("Expected primary statement, found `{tag:?}`"),
437                    line,
438                    col,
439                    len,
440                });
441            }
442        };
443
444        Ok(prim)
445    }
446}
447
448/** GOOD TO KNOW
449* These are the mappings for when the seed is 42:
450*
451* Lexer:
452*
453*   "undefined" EmptyValue,
454*   "not" Bang,
455*   "each" ForLoopInit,
456*   "and" And,
457*   "lte" LessEqual,
458*   "(": False,
459*   "true" True,
460*   "while" WhileLoopInit,
461*   "println" Print,
462*   "$" VariableDeclaration,
463*   "<" Less,
464*   "equals" EqualEqual,
465*   "inequal" BangEqual,
466*   "\\/" Or,
467*   "case" ConditionalCheck,
468*   "end" CloseBrace,
469*   "gte" GreaterEqual,
470*   "then" ConditionalElse,
471*   "def" FuncDec,
472*   "=" Equal,
473*   "{" OpenBrace,
474*   ">" Greater
475*
476*/
477
478#[cfg(test)]
479mod tests {
480    use rand::SeedableRng;
481    use rand_chacha::ChaCha8Rng;
482
483    use crate::{
484        parser::{
485            Parser,
486            ast::{BinaryOp, Expr, Literal, UnaryOp},
487        },
488        tokenizer::Tokenizable,
489    };
490
491    #[test]
492    fn while_loop() {
493        let mut rng = ChaCha8Rng::seed_from_u64(42);
494        let stream = r#"
495while true {
496    println "yay" ;
497end
498            "#
499        .tokenize(&mut rng)
500        .expect("Valid tokenization");
501        let mut parser = Parser::from_rng(&mut rng).with_tokens(&stream);
502
503        let ast = parser.parse().expect("Parse to AST");
504
505        let expected = [Expr::WhileLoop {
506            condition: Box::new(Expr::Literal(Literal::Bool(true))),
507            eval: Box::new(Expr::Block(vec![Expr::Print(Box::new(Expr::Literal(
508                Literal::String("yay"),
509            )))])),
510        }];
511
512        assert_eq!(ast, expected);
513    }
514
515    #[test]
516    fn if_statement() {
517        let mut rng = ChaCha8Rng::seed_from_u64(42);
518        let stream = r#"
519case true {
520    $ foo = 10 ;
521end
522            "#
523        .tokenize(&mut rng)
524        .expect("Valid tokenization");
525        let mut parser = Parser::from_rng(&mut rng).with_tokens(&stream);
526
527        let ast = parser.parse().expect("Parse to AST");
528
529        let expected = [Expr::Conditional {
530            condition: Box::new(Expr::Literal(Literal::Bool(true))),
531            true_branch: Box::new(Expr::Block(vec![Expr::Assignment(
532                "foo",
533                Box::new(Expr::Literal(Literal::Number(10.0))),
534            )])),
535            else_branch: None,
536        }];
537
538        assert_eq!(ast, expected);
539    }
540
541    #[test]
542    fn variable_assignment() {
543        let mut rng = ChaCha8Rng::seed_from_u64(42);
544        let stream = "$ i = 0 ;".tokenize(&mut rng).expect("Valid tokenization");
545        let mut parser = Parser::from_rng(&mut rng).with_tokens(&stream);
546
547        let ast = &parser.parse().expect("Parse to AST")[0];
548
549        assert_eq!(
550            ast,
551            &Expr::Assignment("i", Box::new(Expr::Literal(Literal::Number(0.0))))
552        )
553    }
554
555    #[test]
556    fn print_statement() {
557        let mut rng = ChaCha8Rng::seed_from_u64(42);
558        let stream = "println 42 ;"
559            .tokenize(&mut rng)
560            .expect("Valid tokenization");
561        let mut parser = Parser::from_rng(&mut rng).with_tokens(&stream);
562
563        let ast = &parser.parse().expect("Parse to AST")[0];
564
565        assert_eq!(
566            ast,
567            &Expr::Print(Box::new(Expr::Literal(Literal::Number(42.0))))
568        );
569    }
570
571    #[test]
572    fn binary_operations() {
573        let mut rng = ChaCha8Rng::seed_from_u64(42);
574        let stream = "1 + 2 * 3 ;"
575            .tokenize(&mut rng)
576            .expect("Valid tokenization");
577        let mut parser = Parser::from_rng(&mut rng).with_tokens(&stream);
578
579        let ast = &parser.parse().expect("Parse to AST")[0];
580
581        assert_eq!(
582            ast,
583            &Expr::Binary {
584                op: BinaryOp::Add,
585                left: Box::new(Expr::Literal(Literal::Number(1.0))),
586                right: Box::new(Expr::Binary {
587                    op: BinaryOp::Mul,
588                    left: Box::new(Expr::Literal(Literal::Number(2.0))),
589                    right: Box::new(Expr::Literal(Literal::Number(3.0))),
590                }),
591            }
592        );
593    }
594
595    #[test]
596    fn comparison_operations() {
597        let mut rng = ChaCha8Rng::seed_from_u64(42);
598        let stream = "1 < 2 ;".tokenize(&mut rng).expect("Valid tokenization");
599        let mut parser = Parser::from_rng(&mut rng).with_tokens(&stream);
600
601        let ast = &parser.parse().expect("Parse to AST")[0];
602
603        assert_eq!(
604            ast,
605            &Expr::Binary {
606                op: BinaryOp::Lt,
607                left: Box::new(Expr::Literal(Literal::Number(1.0))),
608                right: Box::new(Expr::Literal(Literal::Number(2.0))),
609            }
610        );
611    }
612
613    #[test]
614    fn equality_operations() {
615        let mut rng = ChaCha8Rng::seed_from_u64(42);
616        let stream = "1 equals 1 ;"
617            .tokenize(&mut rng)
618            .expect("Valid tokenization");
619        let mut parser = Parser::from_rng(&mut rng).with_tokens(&stream);
620
621        let ast = &parser.parse().expect("Parse to AST")[0];
622
623        assert_eq!(
624            ast,
625            &Expr::Binary {
626                op: BinaryOp::Eq,
627                left: Box::new(Expr::Literal(Literal::Number(1.0))),
628                right: Box::new(Expr::Literal(Literal::Number(1.0))),
629            }
630        );
631    }
632
633    #[test]
634    fn unary_operations() {
635        let mut rng = ChaCha8Rng::seed_from_u64(42);
636        let stream = "not true ;".tokenize(&mut rng).expect("Valid tokenization");
637        let mut parser = Parser::from_rng(&mut rng).with_tokens(&stream);
638
639        let ast = &parser.parse().expect("Parse to AST")[0];
640
641        assert_eq!(
642            ast,
643            &Expr::Unary {
644                op: UnaryOp::Not,
645                node: Box::new(Expr::Literal(Literal::Bool(true))),
646            }
647        );
648    }
649
650    #[test]
651    fn grouping() {
652        let mut rng = ChaCha8Rng::seed_from_u64(42);
653        let stream = "(1 + 2) * 3 ;"
654            .tokenize(&mut rng)
655            .expect("Valid tokenization");
656        let mut parser = Parser::from_rng(&mut rng).with_tokens(&stream);
657
658        let ast = &parser.parse().expect("Parse to AST")[0];
659        let expected = Expr::Binary {
660            op: BinaryOp::Mul,
661            left: Box::new(Expr::Grouping(Box::new(Expr::Binary {
662                op: BinaryOp::Add,
663                left: Box::new(Expr::Literal(Literal::Number(1.0))),
664                right: Box::new(Expr::Literal(Literal::Number(2.0))),
665            }))),
666            right: Box::new(Expr::Literal(Literal::Number(3.0))),
667        };
668
669        assert_eq!(ast, &expected);
670    }
671
672    #[test]
673    fn variable_reference() {
674        let mut rng = ChaCha8Rng::seed_from_u64(42);
675        let stream = "x ;".tokenize(&mut rng).expect("Valid tokenization");
676        let mut parser = Parser::from_rng(&mut rng).with_tokens(&stream);
677
678        let ast = &parser.parse().expect("Parse to AST")[0];
679
680        assert_eq!(ast, &Expr::Variable("x"));
681    }
682
683    #[test]
684    fn complex_expression() {
685        let mut rng = ChaCha8Rng::seed_from_u64(42);
686        let stream = "$ x = 5 * (3 + 2) ; println x < 10 ;"
687            .tokenize(&mut rng)
688            .expect("Valid tokenization");
689        let mut parser = Parser::from_rng(&mut rng).with_tokens(&stream);
690
691        let ast = parser.parse().expect("Parse to AST");
692
693        assert_eq!(
694            ast,
695            vec![
696                Expr::Assignment(
697                    "x",
698                    Box::new(Expr::Binary {
699                        op: BinaryOp::Mul,
700                        left: Box::new(Expr::Literal(Literal::Number(5.0))),
701                        right: Box::new(Expr::Grouping(Box::new(Expr::Binary {
702                            op: BinaryOp::Add,
703                            left: Box::new(Expr::Literal(Literal::Number(3.0))),
704                            right: Box::new(Expr::Literal(Literal::Number(2.0))),
705                        }))),
706                    }),
707                ),
708                Expr::Print(Box::new(Expr::Binary {
709                    op: BinaryOp::Lt,
710                    left: Box::new(Expr::Variable("x")),
711                    right: Box::new(Expr::Literal(Literal::Number(10.0))),
712                })),
713            ]
714        );
715    }
716}