parser/
lib.rs

1pub mod ast;
2mod ast_tree_test;
3mod parser_test;
4mod precedences;
5
6pub extern crate lexer;
7
8use crate::ast::{
9    Array, BinaryExpression, BlockStatement, Boolean, Expression, FunctionCall,
10    FunctionDeclaration, Hash, Index, Integer, Let, Literal, Node, Program, ReturnStatement,
11    Statement, StringType, UnaryExpression, IDENTIFIER, IF,
12};
13use crate::precedences::{get_token_precedence, Precedence};
14use lexer::token::{Span, Token, TokenKind};
15use lexer::Lexer;
16
17type ParseError = String;
18type ParseErrors = Vec<ParseError>;
19
20pub struct Parser<'a> {
21    lexer: Lexer<'a>,
22    current_token: Token,
23    peek_token: Token,
24    errors: ParseErrors,
25}
26
27impl<'a> Parser<'a> {
28    pub fn new(mut lexer: Lexer<'a>) -> Parser<'a> {
29        let cur = lexer.next_token();
30        let next = lexer.next_token();
31        let errors = Vec::new();
32        // in strict sense, rust can be as classic go pattern, but it requires more work
33        // so let's just use pattern matching
34        // ```rust
35        // type PrefixParseFn = fn() -> Result<Expression, ParseError>;
36        // type InfixParseFn = fn(Expression) -> Result<Expression, ParseError>;
37        // let prefix_parse_fns = HashMap::new();
38        // let infix_parse_fns = HashMap::new();
39        // ```
40
41        let p = Parser { lexer, current_token: cur, peek_token: next, errors };
42
43        return p;
44    }
45
46    fn next_token(&mut self) {
47        self.current_token = self.peek_token.clone();
48        self.peek_token = self.lexer.next_token();
49    }
50
51    fn current_token_is(&mut self, token: &TokenKind) -> bool {
52        self.current_token.kind == *token
53    }
54
55    fn peek_token_is(&mut self, token: &TokenKind) -> bool {
56        self.peek_token.kind == *token
57    }
58
59    fn expect_peek(&mut self, token: &TokenKind) -> Result<(), ParseError> {
60        self.next_token();
61        if self.current_token.kind == *token {
62            Ok(())
63        } else {
64            let e = format!("expected token: {} got: {}", token, self.current_token);
65            Err(e)
66        }
67    }
68
69    pub fn parse_program(&mut self) -> Result<Program, ParseErrors> {
70        let mut program = Program::new();
71        while !self.current_token_is(&TokenKind::EOF) {
72            match self.parse_statement() {
73                Ok(stmt) => program.body.push(stmt),
74                Err(e) => self.errors.push(e),
75            }
76            self.next_token();
77        }
78        program.span.end = self.current_token.span.end;
79
80        if self.errors.is_empty() {
81            return Ok(program);
82        } else {
83            return Err(self.errors.clone());
84        }
85    }
86
87    fn parse_statement(&mut self) -> Result<Statement, ParseError> {
88        match self.current_token.kind {
89            TokenKind::LET => self.parse_let_statement(),
90            TokenKind::RETURN => self.parse_return_statement(),
91            _ => self.parse_expression_statement(),
92        }
93    }
94
95    fn parse_let_statement(&mut self) -> Result<Statement, ParseError> {
96        let start = self.current_token.span.start;
97        self.next_token();
98
99        let name = self.current_token.clone();
100        let mut identifier_name = "".to_string();
101        match &self.current_token.kind {
102            TokenKind::IDENTIFIER { name } => {
103                identifier_name = name.to_string();
104            }
105            _ => return Err(format!("{} not an identifier", self.current_token)),
106        };
107
108        self.expect_peek(&TokenKind::ASSIGN)?;
109        self.next_token();
110
111        let mut value = self.parse_expression(Precedence::LOWEST)?.0;
112        match value {
113            Expression::FUNCTION(ref mut f) => {
114                f.name = identifier_name;
115            }
116            _ => {}
117        }
118
119        if self.peek_token_is(&TokenKind::SEMICOLON) {
120            self.next_token();
121        }
122
123        let end = self.current_token.span.end;
124
125        return Ok(Statement::Let(Let {
126            identifier: name,
127            expr: value,
128            span: Span { start, end },
129        }));
130    }
131
132    fn parse_return_statement(&mut self) -> Result<Statement, ParseError> {
133        let start = self.current_token.span.start;
134        self.next_token();
135
136        let value = self.parse_expression(Precedence::LOWEST)?.0;
137
138        if self.peek_token_is(&TokenKind::SEMICOLON) {
139            self.next_token();
140        }
141        let end = self.current_token.span.end;
142
143        return Ok(Statement::Return(ReturnStatement {
144            argument: value,
145            span: Span { start, end },
146        }));
147    }
148
149    fn parse_expression_statement(&mut self) -> Result<Statement, ParseError> {
150        let expr = self.parse_expression(Precedence::LOWEST)?.0;
151        if self.peek_token_is(&TokenKind::SEMICOLON) {
152            self.next_token();
153        }
154
155        Ok(Statement::Expr(expr))
156    }
157
158    fn parse_expression(
159        &mut self,
160        precedence: Precedence,
161    ) -> Result<(Expression, Span), ParseError> {
162        let mut left_start = self.current_token.span.start;
163        let mut left = self.parse_prefix_expression()?;
164        while self.peek_token.kind != TokenKind::SEMICOLON
165            && precedence < get_token_precedence(&self.peek_token.kind)
166        {
167            match self.parse_infix_expression(&left, left_start) {
168                Some(infix) => {
169                    left = infix?;
170                    if let Expression::INFIX(b) = left.clone() {
171                        left_start = b.span.start;
172                    }
173                }
174                None => {
175                    return Ok((left, Span { start: left_start, end: self.current_token.span.end }))
176                }
177            }
178        }
179
180        let end = self.current_token.span.end;
181
182        Ok((left, Span { start: left_start, end }))
183    }
184
185    fn parse_prefix_expression(&mut self) -> Result<Expression, ParseError> {
186        // this is prefix fn map :)
187        match &self.current_token.kind {
188            TokenKind::IDENTIFIER { name } => {
189                return Ok(Expression::IDENTIFIER(IDENTIFIER {
190                    name: name.clone(),
191                    span: self.current_token.clone().span,
192                }))
193            }
194            TokenKind::INT(i) => {
195                return Ok(Expression::LITERAL(Literal::Integer(Integer {
196                    raw: *i,
197                    span: self.current_token.clone().span,
198                })))
199            }
200            TokenKind::STRING(s) => {
201                return Ok(Expression::LITERAL(Literal::String(StringType {
202                    raw: s.to_string(),
203                    span: self.current_token.clone().span,
204                })))
205            }
206            b @ TokenKind::TRUE | b @ TokenKind::FALSE => {
207                return Ok(Expression::LITERAL(Literal::Boolean(Boolean {
208                    raw: *b == TokenKind::TRUE,
209                    span: self.current_token.clone().span,
210                })))
211            }
212            TokenKind::BANG | TokenKind::MINUS => {
213                let start = self.current_token.span.start;
214                let prefix_op = self.current_token.clone();
215                self.next_token();
216                let (expr, span) = self.parse_expression(Precedence::PREFIX)?;
217                return Ok(Expression::PREFIX(UnaryExpression {
218                    op: prefix_op,
219                    operand: Box::new(expr),
220                    span: Span { start, end: span.end },
221                }));
222            }
223            TokenKind::LPAREN => {
224                self.next_token();
225                let expr = self.parse_expression(Precedence::LOWEST)?.0;
226                self.expect_peek(&TokenKind::RPAREN)?;
227                return Ok(expr);
228            }
229            TokenKind::IF => self.parse_if_expression(),
230            TokenKind::FUNCTION => self.parse_fn_expression(),
231            TokenKind::LBRACKET => {
232                let (elements, span) = self.parse_expression_list(&TokenKind::RBRACKET)?;
233                return Ok(Expression::LITERAL(Literal::Array(Array { elements, span })));
234            }
235            TokenKind::LBRACE => self.parse_hash_expression(),
236            _ => Err(format!("no prefix function for token: {}", self.current_token)),
237        }
238    }
239
240    fn parse_infix_expression(
241        &mut self,
242        left: &Expression,
243        left_start: usize,
244    ) -> Option<Result<Expression, ParseError>> {
245        match self.peek_token.kind {
246            TokenKind::PLUS
247            | TokenKind::MINUS
248            | TokenKind::ASTERISK
249            | TokenKind::SLASH
250            | TokenKind::EQ
251            | TokenKind::NotEq
252            | TokenKind::LT
253            | TokenKind::GT => {
254                self.next_token();
255                let infix_op = self.current_token.clone();
256                let precedence_value = get_token_precedence(&self.current_token.kind);
257                self.next_token();
258                let (right, span) = self.parse_expression(precedence_value).unwrap();
259                return Some(Ok(Expression::INFIX(BinaryExpression {
260                    op: infix_op,
261                    left: Box::new(left.clone()),
262                    right: Box::new(right),
263                    span: Span { start: left_start, end: span.end },
264                })));
265            }
266            TokenKind::LPAREN => {
267                self.next_token();
268                return Some(self.parse_fn_call_expression(left.clone()));
269            }
270            TokenKind::LBRACKET => {
271                self.next_token();
272                return Some(self.parse_index_expression(left.clone()));
273            }
274            _ => None,
275        }
276    }
277
278    fn parse_if_expression(&mut self) -> Result<Expression, ParseError> {
279        let start = self.current_token.span.start;
280        self.expect_peek(&TokenKind::LPAREN)?;
281        self.next_token();
282
283        let condition = self.parse_expression(Precedence::LOWEST)?.0;
284        self.expect_peek(&TokenKind::RPAREN)?;
285        self.expect_peek(&TokenKind::LBRACE)?;
286
287        let consequent = self.parse_block_statement()?;
288
289        let alternate = if self.peek_token_is(&TokenKind::ELSE) {
290            self.next_token();
291            self.expect_peek(&TokenKind::LBRACE)?;
292            Some(self.parse_block_statement()?)
293        } else {
294            None
295        };
296
297        let end = self.current_token.span.end;
298
299        return Ok(Expression::IF(IF {
300            condition: Box::new(condition),
301            consequent,
302            alternate,
303            span: Span { start, end },
304        }));
305    }
306
307    fn parse_block_statement(&mut self) -> Result<BlockStatement, ParseError> {
308        let start = self.current_token.span.start;
309        self.next_token();
310        let mut block_statement = Vec::new();
311
312        while !self.current_token_is(&TokenKind::RBRACE) && !self.current_token_is(&TokenKind::EOF)
313        {
314            if let Ok(statement) = self.parse_statement() {
315                block_statement.push(statement)
316            }
317
318            self.next_token();
319        }
320
321        let end = self.current_token.span.end;
322
323        Ok(BlockStatement { body: block_statement, span: Span { start, end } })
324    }
325
326    fn parse_fn_expression(&mut self) -> Result<Expression, ParseError> {
327        let start = self.current_token.span.start;
328        self.expect_peek(&TokenKind::LPAREN)?;
329
330        let params = self.parse_fn_parameters()?;
331
332        self.expect_peek(&TokenKind::LBRACE)?;
333
334        let function_body = self.parse_block_statement()?;
335
336        let end = self.current_token.span.end;
337
338        Ok(Expression::FUNCTION(FunctionDeclaration {
339            params,
340            body: function_body,
341            span: Span { start, end },
342            name: "".to_string(),
343        }))
344    }
345
346    fn parse_fn_parameters(&mut self) -> Result<Vec<IDENTIFIER>, ParseError> {
347        let mut params = Vec::new();
348        if self.peek_token_is(&TokenKind::RPAREN) {
349            self.next_token();
350            return Ok(params);
351        }
352
353        self.next_token();
354
355        match &self.current_token.kind {
356            TokenKind::IDENTIFIER { name } => params
357                .push(IDENTIFIER { name: name.clone(), span: self.current_token.span.clone() }),
358            token => {
359                return Err(format!("expected function params  to be an identifier, got {}", token))
360            }
361        }
362
363        while self.peek_token_is(&TokenKind::COMMA) {
364            self.next_token();
365            self.next_token();
366            match &self.current_token.kind {
367                TokenKind::IDENTIFIER { name } => params
368                    .push(IDENTIFIER { name: name.clone(), span: self.current_token.span.clone() }),
369                token => {
370                    return Err(format!(
371                        "expected function params  to be an identifier, got {}",
372                        token
373                    ))
374                }
375            }
376        }
377
378        self.expect_peek(&TokenKind::RPAREN)?;
379
380        return Ok(params);
381    }
382
383    fn parse_fn_call_expression(&mut self, expr: Expression) -> Result<Expression, ParseError> {
384        // fake positive
385        #[allow(unused_assignments)]
386        let mut start = self.current_token.span.start;
387        let (arguments, ..) = self.parse_expression_list(&TokenKind::RPAREN)?;
388        let end = self.current_token.span.end;
389        match &expr {
390            Expression::IDENTIFIER(i) => start = i.span.start,
391            Expression::FUNCTION(f) => start = f.span.start,
392            _ => return Err(format!("expected function")),
393        }
394        let callee = Box::new(expr);
395
396        Ok(Expression::FunctionCall(FunctionCall { callee, arguments, span: Span { start, end } }))
397    }
398
399    fn parse_expression_list(
400        &mut self,
401        end: &TokenKind,
402    ) -> Result<(Vec<Expression>, Span), ParseError> {
403        let start = self.current_token.span.start;
404        let mut expr_list = Vec::new();
405        if self.peek_token_is(end) {
406            self.next_token();
407            let end = self.current_token.span.end;
408            return Ok((expr_list, Span { start, end }));
409        }
410
411        self.next_token();
412
413        expr_list.push(self.parse_expression(Precedence::LOWEST)?.0);
414
415        while self.peek_token_is(&TokenKind::COMMA) {
416            self.next_token();
417            self.next_token();
418            expr_list.push(self.parse_expression(Precedence::LOWEST)?.0);
419        }
420
421        self.expect_peek(end)?;
422        let end = self.current_token.span.end;
423
424        return Ok((expr_list, Span { start, end }));
425    }
426
427    fn parse_index_expression(&mut self, left: Expression) -> Result<Expression, ParseError> {
428        let start = self.current_token.span.start;
429        self.next_token();
430        let index = self.parse_expression(Precedence::LOWEST)?.0;
431
432        self.expect_peek(&TokenKind::RBRACKET)?;
433
434        let end = self.current_token.span.end;
435
436        return Ok(Expression::Index(Index {
437            object: Box::new(left),
438            index: Box::new(index),
439            span: Span { start, end },
440        }));
441    }
442
443    fn parse_hash_expression(&mut self) -> Result<Expression, ParseError> {
444        let mut map = Vec::new();
445        let start = self.current_token.span.start;
446        while !self.peek_token_is(&TokenKind::RBRACE) {
447            self.next_token();
448
449            let key = self.parse_expression(Precedence::LOWEST)?.0;
450
451            self.expect_peek(&TokenKind::COLON)?;
452
453            self.next_token();
454            let value = self.parse_expression(Precedence::LOWEST)?.0;
455
456            map.push((key, value));
457
458            if !self.peek_token_is(&TokenKind::RBRACE) {
459                self.expect_peek(&TokenKind::COMMA)?;
460            }
461        }
462
463        self.expect_peek(&TokenKind::RBRACE)?;
464        let end = self.current_token.span.end;
465
466        Ok(Expression::LITERAL(Literal::Hash(Hash { elements: map, span: Span { start, end } })))
467    }
468}
469
470pub fn parse(input: &str) -> Result<Node, ParseErrors> {
471    let lexer = Lexer::new(input);
472    let mut parser = Parser::new(lexer);
473    let program = parser.parse_program()?;
474
475    Ok(Node::Program(program))
476}
477
478pub fn parse_ast_json_string(input: &str) -> Result<String, ParseErrors> {
479    let ast = match parse(input) {
480        Ok(node) => serde_json::to_string_pretty(&node).unwrap(),
481        Err(e) => return Err(e),
482    };
483
484    return Ok(ast);
485}