nexus_lib/parser/
mod.rs

1pub mod ast;
2mod tests;
3
4use std::{error::Error, fmt::Display, mem::swap};
5
6use ast::*;
7use crate::lexer::{
8    tokens::*,
9    Lexer,
10};
11
12pub struct Parser<'a> {
13    lexer: &'a mut Lexer,
14
15    cur_tok: Token,
16    peek_tok: Token,
17}
18
19#[repr(u8)]
20#[derive(Debug, PartialEq, Eq, Clone, Copy)]
21enum Precedence {
22    /// default value
23    LOWEST,
24    /// Assign new value to variable
25    ASSIGN,
26    /// Check if i is in list/range
27    ///
28    /// `i in 0..10`
29    CONTAINS,
30    /// Range of numbers
31    /// 0..1000
32    RANGE,
33    /// Check if value is equivalent
34    /// to other value
35    ///
36    /// `x == 0`
37    EQUALS,
38    /// Lesser and greater
39    /// comparison operations
40    LESSGREATER,
41    /// Lesser or equal and greater or equal
42    /// comparsion operations
43    LESSGREATEROREQUAL,
44    /// Sum of two numbers
45    SUM,
46    /// Product of two numbers
47    PRODUCT,
48    /// Prefix operators like +, -, !
49    PREFIX,
50    /// Call a function
51    CALL,
52    /// Convert types using `as`
53    CONVERSION,
54    /// Index a list
55    /// `myList[1]`
56    INDEX,
57}
58
59impl PartialOrd for Precedence {
60    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
61        let iself = *self as u8;
62        let iother = *other as u8;
63        iself.partial_cmp(&iother)
64    }
65}
66
67impl Ord for Precedence {
68    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
69        let iself = *self as u8;
70        let iother = *other as u8;
71        iself.cmp(&iother)
72    }
73}
74
75impl<'a> Parser<'a> {
76    pub fn new(lexer: &'a mut Lexer) -> Self {
77        let cur_tok = lexer.tokenize();
78        let peek_tok = lexer.tokenize();
79        Self {
80            lexer,
81            cur_tok,
82            peek_tok,
83        }
84    }
85
86    pub fn parse_stmt(&mut self) -> Result<Statement, EofError> {
87        Ok(match self.cur_tok {
88            Token::Use => todo!(),
89            Token::Var => self.parse_variable(false),
90            Token::Const => self.parse_variable(true),
91            Token::Break => {
92                let label = match self.peek_tok {
93                    Token::Ident(_) => {
94                        self.next_token();
95                        Some(Ident(self.cur_tok.to_string()))
96                    }
97                    _ => None,
98                };
99                Statement::Break(BreakStmt { label })
100            }
101            Token::Return => {
102                let val = match self.peek_tok {
103                    Token::Eol => None,
104                    _ => {
105                        self.next_token();
106                        Some(self.parse_expr(Precedence::LOWEST))
107                    }
108                };
109                Statement::Return(ReturnStmt { val })
110            }
111            Token::Local => {
112                if self.peek_tok == Token::Local {
113                    panic!("Cannot stack multiple `local` statements")
114                }
115                self.next_token();
116                let stmt = self.parse_stmt().expect(
117                    "Encountered End of file instead of a statement after the local statement",
118                );
119                Statement::Local(LocalStmt {
120                    val: Box::new(stmt),
121                })
122            }
123            Token::Eol => {
124                self.next_token();
125                return self.parse_stmt();
126            }
127            Token::Eof => return Err(EofError),
128            _ => {
129                if let Token::Ident(_) = self.cur_tok {
130                    if self.peek_tok == Token::VarAssign {
131                        return Ok(self.parse_quick_assign());
132                    }
133                    match self.peek_tok {
134                        Token::Colon | Token::ConstAssign | Token::VarAssign => {
135                            return Ok(self.parse_quick_assign())
136                        }
137                        _ => (),
138                    }
139                }
140                Statement::Expression(self.parse_expr(Precedence::LOWEST))
141            }
142        })
143    }
144
145    fn parse_expr(&mut self, precedence: Precedence) -> Expression {
146        let prefix = self.parse_prefix();
147        if prefix.is_none() {
148            panic!("No prefix parse found for: {}", self.cur_tok)
149        }
150
151        let prefix = prefix.unwrap();
152        let mut left_expression = prefix;
153
154        while !self.peek_is_end() && precedence < self.get_precedence(&self.peek_tok) {
155            self.next_token();
156            // Unwrap here might not be safe. Observe this
157            left_expression = match self.parse_infix(left_expression) {
158                Some(expr) => expr,
159                None => panic!("Invalid infix expression"),
160            };
161        }
162
163        left_expression
164    }
165
166    fn parse_prefix(&mut self) -> Option<Expression> {
167        Some(match self.cur_tok {
168            Token::Ident(_) => Expression::Ident(Ident(self.cur_tok.to_string())),
169            Token::Literal(Literal::Bool(ref bool)) => Expression::Literal(Literal::Bool(*bool)),
170            Token::Literal(Literal::Num(ref lit)) => Expression::Literal(Literal::Num(*lit)),
171            Token::Literal(Literal::Str(_)) => self.parse_str_lit(),
172            Token::LSquare => self.parse_list_lit(),
173            // TODO: add hashes
174            /*
175            Token::LCURLY => self.parse_hash_literal(),
176            */
177            // Token::NONE => Expression::NONE(NoneLiteral),
178            Token::LParent => self.parse_grouped_expr(),
179            Token::Func => self.parse_func_expr(),
180            Token::If => self.parse_if_expr(IfType::If),
181            Token::Loop => self.parse_loop_expr(),
182            Token::When => self.parse_when_expr(),
183            Token::ExclamMark
184            | Token::Operator(Operator::Plus)
185            | Token::Operator(Operator::Minus) => self.parse_prefix_expr(),
186            // Token::ANNOTATION => self.parse_annotation(),
187            _ => return None,
188        })
189    }
190
191    fn parse_infix(&mut self, left: Expression) -> Option<Expression> {
192        Some(match self.cur_tok {
193            Token::Operator(ref op) => match op {
194                Operator::Equals
195                | Operator::NotEquals
196                | Operator::Greater
197                | Operator::Lesser
198                | Operator::GreaterEquals
199                | Operator::LesserEquals
200                | Operator::Plus
201                | Operator::Minus
202                | Operator::Asterisk
203                | Operator::Slash => self.parse_infix_expr(left),
204            },
205            Token::LParent => self.parse_call_expr(left),
206            // Token::LSquare => self.parse_index_expr(left),
207            _ => return None,
208        })
209    }
210
211    fn parse_str_lit(&mut self) -> Expression {
212        // TODO: Parse string interpolation
213        Expression::Literal(Literal::Str(self.cur_tok.to_string()))
214    }
215
216    fn parse_list_lit(&mut self) -> Expression {
217        todo!()
218    }
219
220    fn parse_grouped_expr(&mut self) -> Expression {
221        todo!()
222    }
223
224    fn parse_func_expr(&mut self) -> Expression {
225        self.expect_peek(Token::LParent);
226        self.next_token();
227        let args = self.parse_ident_list(Token::RParent);
228        let ret_type = match self.peek_tok {
229            Token::Colon => {
230                self.next_token();
231                self.next_token();
232                Some(Ident(self.cur_tok.to_string()))
233            }
234            Token::LCurly => None,
235            _ => panic!(),
236        };
237        self.next_token();
238        let block = self.parse_block_stmt();
239        Expression::Func(FuncExpr {
240            ret_type,
241            args,
242            block,
243        })
244    }
245
246    fn parse_if_expr(&mut self, _type: IfType) -> Expression {
247        match _type {
248            // Current token needs to be `if`
249            IfType::If => {
250                self.next_token();
251                let cond = self.parse_expr(Precedence::LOWEST);
252                self.expect_peek(Token::LCurly);
253                self.next_token();
254                let block = self.parse_block_stmt();
255                let alt = match self.peek_tok {
256                    Token::Else => {
257                        self.next_token();
258                        Some(Box::from(match self.peek_tok {
259                            Token::If => match self.parse_if_expr(IfType::ElseIf) {
260                                Expression::If(_if) => _if,
261                                _ => panic!("UNREACHABLE"),
262                            },
263                            Token::LCurly => match self.parse_if_expr(IfType::Else) {
264                                Expression::If(_if) => _if,
265                                _ => panic!("UNREACHABLE"),
266                            },
267                            ref other => {
268                                panic!("Exptected `block` or `if` after else, got `{other:?}`")
269                            }
270                        }))
271                    }
272                    _ => None,
273                };
274                Expression::If(IfExpr {
275                    cond: Some(Box::from(cond)),
276                    block,
277                    _type,
278                    alt,
279                })
280            }
281            // Current token needs to be `else`
282            IfType::ElseIf => {
283                self.next_token();
284                let _if = match self.parse_if_expr(IfType::If) {
285                    Expression::If(_if) => _if,
286                    other => panic!("Unreachable: Got {other:?} instead of if expression"),
287                };
288                Expression::If(IfExpr { _type, .._if })
289            }
290            // Current token needs to be `else`
291            IfType::Else => {
292                self.next_token();
293                let block = self.parse_block_stmt();
294                Expression::If(IfExpr {
295                    _type,
296                    cond: None,
297                    block,
298                    alt: None,
299                })
300            }
301        }
302    }
303
304    // TODO: parse else branches
305    fn parse_loop_expr(&mut self) -> Expression {
306        self.next_token();
307        let cond = self.parse_expr(Precedence::LOWEST);
308        self.expect_peek(Token::LCurly);
309        self.next_token();
310        let block = self.parse_block_stmt();
311        Expression::Loop(LoopExpr {
312            _type: LoopType::While,
313            cond: Some(Box::from(cond)),
314            block,
315            alt: None,
316        })
317    }
318
319    fn parse_when_expr(&mut self) -> Expression {
320        todo!()
321    }
322
323    fn parse_infix_expr(&mut self, left_expr: Expression) -> Expression {
324        let op = match self.cur_tok {
325            Token::Operator(_) => self.cur_tok_to_in_op(),
326            ref other => panic!("Missing operator, got {other} instead"),
327        };
328        let prec = self.get_precedence(&self.cur_tok);
329        self.next_token();
330        let right_expr = self.parse_expr(prec);
331        Expression::Infix(InfixExpr {
332            left: Box::from(left_expr),
333            right: Box::from(right_expr),
334            op,
335        })
336    }
337
338    fn parse_prefix_expr(&mut self) -> Expression {
339        let op = match &self.cur_tok {
340            Token::Operator(op) => Self::reg_op_to_pre_op(op),
341            Token::ExclamMark => PrefixOp::Not,
342            other => panic!("Expected operator, got: {other} instead"),
343        };
344        self.next_token();
345        let val = Box::from(self.parse_expr(Precedence::PREFIX));
346        Expression::Prefix(PrefixExpr { op, val })
347    }
348
349    fn cur_tok_to_in_op(&self) -> InfixOp {
350        match &self.cur_tok {
351            Token::Operator(op) => Self::reg_op_to_in_op(op),
352            _ => todo!(),
353        }
354    }
355
356    fn reg_op_to_pre_op(op: &Operator) -> PrefixOp {
357        match op {
358            Operator::Plus => PrefixOp::Pos,
359            Operator::Minus => PrefixOp::Neg,
360            other => panic!("Cannot convert operator: {other} to pre op"),
361        }
362    }
363
364    fn reg_op_to_in_op(op: &Operator) -> InfixOp {
365        match op {
366            Operator::Equals => InfixOp::Eq,
367            Operator::NotEquals => InfixOp::NEq,
368            Operator::Greater => InfixOp::GT,
369            Operator::Lesser => InfixOp::LT,
370            Operator::GreaterEquals => InfixOp::GTEq,
371            Operator::LesserEquals => InfixOp::LTEq,
372            Operator::Plus => InfixOp::Add,
373            Operator::Minus => InfixOp::Sub,
374            Operator::Asterisk => InfixOp::Mul,
375            Operator::Slash => InfixOp::Div,
376        }
377    }
378
379    /// First token needs to be the begin_token like `(` or `{` for example
380    /// This function sets cur_tok to the end_tok
381    fn parse_ident_list(&mut self, end_tok: Token) -> Vec<OptionallyTypedIdent> {
382        if self.peek_tok == end_tok {
383            self.next_token();
384            return Vec::new();
385        }
386
387        self.next_token();
388
389        let mut items = Vec::new();
390
391        let first_item = self.parse_typed_ident();
392
393        items.push(first_item);
394
395        if self.peek_tok == Token::Comma {
396            self.next_token();
397        }
398
399        while self.peek_tok != end_tok {
400            self.next_token();
401            let ident = self.parse_typed_ident();
402            items.push(ident);
403            if self.peek_tok == Token::Comma {
404                self.next_token();
405            }
406        }
407
408        self.next_token();
409        items
410    }
411
412    /// First token needs to be the begin_token like `(` or `{` for example
413    fn parse_raw_list(&mut self, end_tok: Token) -> Vec<Expression> {
414        if self.peek_tok == Token::RParent {
415            self.next_token();
416            return Vec::new();
417        }
418
419        self.next_token();
420
421        let mut items = Vec::new();
422
423        let first_item = self.parse_expr(Precedence::LOWEST);
424
425        items.push(first_item);
426
427        if self.peek_tok == Token::Comma {
428            self.next_token();
429        }
430
431        while self.peek_tok != end_tok {
432            self.next_token();
433            let ident = self.parse_expr(Precedence::LOWEST);
434            items.push(ident);
435            if self.peek_tok == Token::Comma {
436                self.next_token();
437            }
438        }
439
440        self.next_token();
441        items
442    }
443
444    /// First token needs to be a left curly `{`
445    fn parse_block_stmt(&mut self) -> BlockStmt {
446        let mut stmts = Vec::new();
447
448        self.next_token();
449        while self.peek_tok != Token::RCurly {
450            self.next_token();
451            let stmt = self
452                .parse_stmt()
453                .expect("Found eof even though the blockstatement was not yet fully parsed");
454            stmts.push(stmt);
455            self.next_token();
456        }
457        self.next_token();
458        BlockStmt { stmts }
459    }
460
461    fn parse_typed_ident(&mut self) -> OptionallyTypedIdent {
462        let ident = Ident(self.cur_tok.to_string());
463        let _type = match self.peek_tok {
464            Token::Colon => {
465                self.next_token();
466                self.next_token();
467                Some(Ident(self.cur_tok.to_string()))
468            }
469            _ => None,
470        };
471        OptionallyTypedIdent { ident, _type }
472    }
473
474    fn parse_variable(&mut self, is_const: bool) -> Statement {
475        let name = Ident(match self.peek_tok {
476            Token::Ident(_) => self.peek_tok.to_string(),
477            _ => panic!("Expected an identifier, received: {}", self.peek_tok),
478        });
479
480        self.next_token();
481
482        let _type = match self.peek_tok {
483            Token::Colon => {
484                self.next_token();
485                let ident = Ident(self.peek_tok.to_string());
486                self.next_token();
487                self.expect_peek(Token::Assign);
488                self.next_token();
489                Some(ident)
490            }
491            Token::Assign => {
492                self.next_token();
493                None
494            }
495            _ => panic!("Expected Assign, received: {}", self.peek_tok),
496        };
497
498        self.next_token();
499
500        let val = self.parse_expr(Precedence::LOWEST);
501
502        Statement::Variable(VarStmt {
503            name: OptionallyTypedIdent { ident: name, _type },
504            val,
505            is_const,
506        })
507    }
508
509    fn parse_quick_assign(&mut self) -> Statement {
510        let name = Ident(match self.cur_tok {
511            Token::Ident(_) => self.cur_tok.to_string(),
512            // unreachable
513            _ => panic!("Expected an identifier, received: {}", self.cur_tok),
514        });
515
516        let is_const;
517
518        let _type = match self.peek_tok {
519            Token::Colon => {
520                self.next_token();
521                let ident = Ident(self.peek_tok.to_string());
522                self.next_token();
523                match self.peek_tok {
524                    Token::ConstAssign => is_const = true,
525                    Token::VarAssign => is_const = false,
526                    _ => panic!(
527                        "Expected ConstAssign or VarAssign, received: {}",
528                        self.peek_tok
529                    ),
530                }
531                Some(ident)
532            }
533            Token::ConstAssign => {
534                is_const = true;
535                None
536            }
537            Token::VarAssign => {
538                is_const = false;
539                None
540            }
541            _ => panic!("Expected Assign, received: {}", self.peek_tok),
542        };
543
544        self.next_token();
545        self.next_token();
546
547        let val = self.parse_expr(Precedence::LOWEST);
548
549        Statement::Variable(VarStmt {
550            name: OptionallyTypedIdent { ident: name, _type },
551            val,
552            is_const,
553        })
554    }
555
556    fn parse_call_expr(&mut self, func: Expression) -> Expression {
557        let args = self.parse_raw_list(Token::RParent);
558        Expression::Call(CallExpr {
559            ident: Box::from(func),
560            args,
561        })
562    }
563
564    fn expect_peek(&self, expected: Token) {
565        if self.peek_tok != expected {
566            panic!("Expected: {}, received: {}", expected, self.peek_tok)
567        }
568    }
569
570    fn peek_is_end(&self) -> bool {
571        match self.peek_tok {
572            Token::Eol | Token::Eof => true,
573            _ => false,
574        }
575    }
576
577    pub fn next_token(&mut self) {
578        swap(&mut self.cur_tok, &mut self.peek_tok);
579        self.peek_tok = self.lexer.tokenize();
580    }
581
582    fn get_precedence(&self, token: &Token) -> Precedence {
583        match token {
584            Token::Assign => Precedence::ASSIGN,
585            Token::Operator(op) => match op {
586                Operator::Equals | Operator::NotEquals => Precedence::EQUALS,
587                Operator::Greater | Operator::Lesser => Precedence::LESSGREATER,
588                Operator::GreaterEquals | Operator::LesserEquals => Precedence::LESSGREATEROREQUAL,
589                Operator::Plus | Operator::Minus => Precedence::SUM,
590                Operator::Asterisk | Operator::Slash => Precedence::PRODUCT,
591            },
592            Token::LParent => Precedence::CALL,
593            Token::LSquare => Precedence::INDEX,
594            _ => Precedence::LOWEST,
595        }
596    }
597}
598
599#[derive(Debug)]
600pub struct EofError;
601
602impl Error for EofError {}
603
604impl Display for EofError {
605    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
606        f.write_str("Encountered end of file")
607    }
608}