1pub mod ast;
2mod ast_tree_test;
3mod parser_test;
4mod precedences;
5
6pub extern crate lexer;
7
8use crate::ast::{
9 Array, BinaryExpression, BlockStatement, Boolean, Expression, FunctionCall,
10 FunctionDeclaration, Hash, Index, Integer, Let, Literal, Node, Program, ReturnStatement,
11 Statement, StringType, UnaryExpression, IDENTIFIER, IF,
12};
13use crate::precedences::{get_token_precedence, Precedence};
14use lexer::token::{Span, Token, TokenKind};
15use lexer::Lexer;
16
17type ParseError = String;
18type ParseErrors = Vec<ParseError>;
19
20pub struct Parser<'a> {
21 lexer: Lexer<'a>,
22 current_token: Token,
23 peek_token: Token,
24 errors: ParseErrors,
25}
26
27impl<'a> Parser<'a> {
28 pub fn new(mut lexer: Lexer<'a>) -> Parser<'a> {
29 let cur = lexer.next_token();
30 let next = lexer.next_token();
31 let errors = Vec::new();
32 let p = Parser { lexer, current_token: cur, peek_token: next, errors };
42
43 return p;
44 }
45
46 fn next_token(&mut self) {
47 self.current_token = self.peek_token.clone();
48 self.peek_token = self.lexer.next_token();
49 }
50
51 fn current_token_is(&mut self, token: &TokenKind) -> bool {
52 self.current_token.kind == *token
53 }
54
55 fn peek_token_is(&mut self, token: &TokenKind) -> bool {
56 self.peek_token.kind == *token
57 }
58
59 fn expect_peek(&mut self, token: &TokenKind) -> Result<(), ParseError> {
60 self.next_token();
61 if self.current_token.kind == *token {
62 Ok(())
63 } else {
64 let e = format!("expected token: {} got: {}", token, self.current_token);
65 Err(e)
66 }
67 }
68
69 pub fn parse_program(&mut self) -> Result<Program, ParseErrors> {
70 let mut program = Program::new();
71 while !self.current_token_is(&TokenKind::EOF) {
72 match self.parse_statement() {
73 Ok(stmt) => program.body.push(stmt),
74 Err(e) => self.errors.push(e),
75 }
76 self.next_token();
77 }
78 program.span.end = self.current_token.span.end;
79
80 if self.errors.is_empty() {
81 return Ok(program);
82 } else {
83 return Err(self.errors.clone());
84 }
85 }
86
87 fn parse_statement(&mut self) -> Result<Statement, ParseError> {
88 match self.current_token.kind {
89 TokenKind::LET => self.parse_let_statement(),
90 TokenKind::RETURN => self.parse_return_statement(),
91 _ => self.parse_expression_statement(),
92 }
93 }
94
95 fn parse_let_statement(&mut self) -> Result<Statement, ParseError> {
96 let start = self.current_token.span.start;
97 self.next_token();
98
99 let name = self.current_token.clone();
100 let mut identifier_name = "".to_string();
101 match &self.current_token.kind {
102 TokenKind::IDENTIFIER { name } => {
103 identifier_name = name.to_string();
104 }
105 _ => return Err(format!("{} not an identifier", self.current_token)),
106 };
107
108 self.expect_peek(&TokenKind::ASSIGN)?;
109 self.next_token();
110
111 let mut value = self.parse_expression(Precedence::LOWEST)?.0;
112 match value {
113 Expression::FUNCTION(ref mut f) => {
114 f.name = identifier_name;
115 }
116 _ => {}
117 }
118
119 if self.peek_token_is(&TokenKind::SEMICOLON) {
120 self.next_token();
121 }
122
123 let end = self.current_token.span.end;
124
125 return Ok(Statement::Let(Let {
126 identifier: name,
127 expr: value,
128 span: Span { start, end },
129 }));
130 }
131
132 fn parse_return_statement(&mut self) -> Result<Statement, ParseError> {
133 let start = self.current_token.span.start;
134 self.next_token();
135
136 let value = self.parse_expression(Precedence::LOWEST)?.0;
137
138 if self.peek_token_is(&TokenKind::SEMICOLON) {
139 self.next_token();
140 }
141 let end = self.current_token.span.end;
142
143 return Ok(Statement::Return(ReturnStatement {
144 argument: value,
145 span: Span { start, end },
146 }));
147 }
148
149 fn parse_expression_statement(&mut self) -> Result<Statement, ParseError> {
150 let expr = self.parse_expression(Precedence::LOWEST)?.0;
151 if self.peek_token_is(&TokenKind::SEMICOLON) {
152 self.next_token();
153 }
154
155 Ok(Statement::Expr(expr))
156 }
157
158 fn parse_expression(
159 &mut self,
160 precedence: Precedence,
161 ) -> Result<(Expression, Span), ParseError> {
162 let mut left_start = self.current_token.span.start;
163 let mut left = self.parse_prefix_expression()?;
164 while self.peek_token.kind != TokenKind::SEMICOLON
165 && precedence < get_token_precedence(&self.peek_token.kind)
166 {
167 match self.parse_infix_expression(&left, left_start) {
168 Some(infix) => {
169 left = infix?;
170 if let Expression::INFIX(b) = left.clone() {
171 left_start = b.span.start;
172 }
173 }
174 None => {
175 return Ok((left, Span { start: left_start, end: self.current_token.span.end }))
176 }
177 }
178 }
179
180 let end = self.current_token.span.end;
181
182 Ok((left, Span { start: left_start, end }))
183 }
184
185 fn parse_prefix_expression(&mut self) -> Result<Expression, ParseError> {
186 match &self.current_token.kind {
188 TokenKind::IDENTIFIER { name } => {
189 return Ok(Expression::IDENTIFIER(IDENTIFIER {
190 name: name.clone(),
191 span: self.current_token.clone().span,
192 }))
193 }
194 TokenKind::INT(i) => {
195 return Ok(Expression::LITERAL(Literal::Integer(Integer {
196 raw: *i,
197 span: self.current_token.clone().span,
198 })))
199 }
200 TokenKind::STRING(s) => {
201 return Ok(Expression::LITERAL(Literal::String(StringType {
202 raw: s.to_string(),
203 span: self.current_token.clone().span,
204 })))
205 }
206 b @ TokenKind::TRUE | b @ TokenKind::FALSE => {
207 return Ok(Expression::LITERAL(Literal::Boolean(Boolean {
208 raw: *b == TokenKind::TRUE,
209 span: self.current_token.clone().span,
210 })))
211 }
212 TokenKind::BANG | TokenKind::MINUS => {
213 let start = self.current_token.span.start;
214 let prefix_op = self.current_token.clone();
215 self.next_token();
216 let (expr, span) = self.parse_expression(Precedence::PREFIX)?;
217 return Ok(Expression::PREFIX(UnaryExpression {
218 op: prefix_op,
219 operand: Box::new(expr),
220 span: Span { start, end: span.end },
221 }));
222 }
223 TokenKind::LPAREN => {
224 self.next_token();
225 let expr = self.parse_expression(Precedence::LOWEST)?.0;
226 self.expect_peek(&TokenKind::RPAREN)?;
227 return Ok(expr);
228 }
229 TokenKind::IF => self.parse_if_expression(),
230 TokenKind::FUNCTION => self.parse_fn_expression(),
231 TokenKind::LBRACKET => {
232 let (elements, span) = self.parse_expression_list(&TokenKind::RBRACKET)?;
233 return Ok(Expression::LITERAL(Literal::Array(Array { elements, span })));
234 }
235 TokenKind::LBRACE => self.parse_hash_expression(),
236 _ => Err(format!("no prefix function for token: {}", self.current_token)),
237 }
238 }
239
240 fn parse_infix_expression(
241 &mut self,
242 left: &Expression,
243 left_start: usize,
244 ) -> Option<Result<Expression, ParseError>> {
245 match self.peek_token.kind {
246 TokenKind::PLUS
247 | TokenKind::MINUS
248 | TokenKind::ASTERISK
249 | TokenKind::SLASH
250 | TokenKind::EQ
251 | TokenKind::NotEq
252 | TokenKind::LT
253 | TokenKind::GT => {
254 self.next_token();
255 let infix_op = self.current_token.clone();
256 let precedence_value = get_token_precedence(&self.current_token.kind);
257 self.next_token();
258 let (right, span) = self.parse_expression(precedence_value).unwrap();
259 return Some(Ok(Expression::INFIX(BinaryExpression {
260 op: infix_op,
261 left: Box::new(left.clone()),
262 right: Box::new(right),
263 span: Span { start: left_start, end: span.end },
264 })));
265 }
266 TokenKind::LPAREN => {
267 self.next_token();
268 return Some(self.parse_fn_call_expression(left.clone()));
269 }
270 TokenKind::LBRACKET => {
271 self.next_token();
272 return Some(self.parse_index_expression(left.clone()));
273 }
274 _ => None,
275 }
276 }
277
278 fn parse_if_expression(&mut self) -> Result<Expression, ParseError> {
279 let start = self.current_token.span.start;
280 self.expect_peek(&TokenKind::LPAREN)?;
281 self.next_token();
282
283 let condition = self.parse_expression(Precedence::LOWEST)?.0;
284 self.expect_peek(&TokenKind::RPAREN)?;
285 self.expect_peek(&TokenKind::LBRACE)?;
286
287 let consequent = self.parse_block_statement()?;
288
289 let alternate = if self.peek_token_is(&TokenKind::ELSE) {
290 self.next_token();
291 self.expect_peek(&TokenKind::LBRACE)?;
292 Some(self.parse_block_statement()?)
293 } else {
294 None
295 };
296
297 let end = self.current_token.span.end;
298
299 return Ok(Expression::IF(IF {
300 condition: Box::new(condition),
301 consequent,
302 alternate,
303 span: Span { start, end },
304 }));
305 }
306
307 fn parse_block_statement(&mut self) -> Result<BlockStatement, ParseError> {
308 let start = self.current_token.span.start;
309 self.next_token();
310 let mut block_statement = Vec::new();
311
312 while !self.current_token_is(&TokenKind::RBRACE) && !self.current_token_is(&TokenKind::EOF)
313 {
314 if let Ok(statement) = self.parse_statement() {
315 block_statement.push(statement)
316 }
317
318 self.next_token();
319 }
320
321 let end = self.current_token.span.end;
322
323 Ok(BlockStatement { body: block_statement, span: Span { start, end } })
324 }
325
326 fn parse_fn_expression(&mut self) -> Result<Expression, ParseError> {
327 let start = self.current_token.span.start;
328 self.expect_peek(&TokenKind::LPAREN)?;
329
330 let params = self.parse_fn_parameters()?;
331
332 self.expect_peek(&TokenKind::LBRACE)?;
333
334 let function_body = self.parse_block_statement()?;
335
336 let end = self.current_token.span.end;
337
338 Ok(Expression::FUNCTION(FunctionDeclaration {
339 params,
340 body: function_body,
341 span: Span { start, end },
342 name: "".to_string(),
343 }))
344 }
345
346 fn parse_fn_parameters(&mut self) -> Result<Vec<IDENTIFIER>, ParseError> {
347 let mut params = Vec::new();
348 if self.peek_token_is(&TokenKind::RPAREN) {
349 self.next_token();
350 return Ok(params);
351 }
352
353 self.next_token();
354
355 match &self.current_token.kind {
356 TokenKind::IDENTIFIER { name } => params
357 .push(IDENTIFIER { name: name.clone(), span: self.current_token.span.clone() }),
358 token => {
359 return Err(format!("expected function params to be an identifier, got {}", token))
360 }
361 }
362
363 while self.peek_token_is(&TokenKind::COMMA) {
364 self.next_token();
365 self.next_token();
366 match &self.current_token.kind {
367 TokenKind::IDENTIFIER { name } => params
368 .push(IDENTIFIER { name: name.clone(), span: self.current_token.span.clone() }),
369 token => {
370 return Err(format!(
371 "expected function params to be an identifier, got {}",
372 token
373 ))
374 }
375 }
376 }
377
378 self.expect_peek(&TokenKind::RPAREN)?;
379
380 return Ok(params);
381 }
382
383 fn parse_fn_call_expression(&mut self, expr: Expression) -> Result<Expression, ParseError> {
384 #[allow(unused_assignments)]
386 let mut start = self.current_token.span.start;
387 let (arguments, ..) = self.parse_expression_list(&TokenKind::RPAREN)?;
388 let end = self.current_token.span.end;
389 match &expr {
390 Expression::IDENTIFIER(i) => start = i.span.start,
391 Expression::FUNCTION(f) => start = f.span.start,
392 _ => return Err(format!("expected function")),
393 }
394 let callee = Box::new(expr);
395
396 Ok(Expression::FunctionCall(FunctionCall { callee, arguments, span: Span { start, end } }))
397 }
398
399 fn parse_expression_list(
400 &mut self,
401 end: &TokenKind,
402 ) -> Result<(Vec<Expression>, Span), ParseError> {
403 let start = self.current_token.span.start;
404 let mut expr_list = Vec::new();
405 if self.peek_token_is(end) {
406 self.next_token();
407 let end = self.current_token.span.end;
408 return Ok((expr_list, Span { start, end }));
409 }
410
411 self.next_token();
412
413 expr_list.push(self.parse_expression(Precedence::LOWEST)?.0);
414
415 while self.peek_token_is(&TokenKind::COMMA) {
416 self.next_token();
417 self.next_token();
418 expr_list.push(self.parse_expression(Precedence::LOWEST)?.0);
419 }
420
421 self.expect_peek(end)?;
422 let end = self.current_token.span.end;
423
424 return Ok((expr_list, Span { start, end }));
425 }
426
427 fn parse_index_expression(&mut self, left: Expression) -> Result<Expression, ParseError> {
428 let start = self.current_token.span.start;
429 self.next_token();
430 let index = self.parse_expression(Precedence::LOWEST)?.0;
431
432 self.expect_peek(&TokenKind::RBRACKET)?;
433
434 let end = self.current_token.span.end;
435
436 return Ok(Expression::Index(Index {
437 object: Box::new(left),
438 index: Box::new(index),
439 span: Span { start, end },
440 }));
441 }
442
443 fn parse_hash_expression(&mut self) -> Result<Expression, ParseError> {
444 let mut map = Vec::new();
445 let start = self.current_token.span.start;
446 while !self.peek_token_is(&TokenKind::RBRACE) {
447 self.next_token();
448
449 let key = self.parse_expression(Precedence::LOWEST)?.0;
450
451 self.expect_peek(&TokenKind::COLON)?;
452
453 self.next_token();
454 let value = self.parse_expression(Precedence::LOWEST)?.0;
455
456 map.push((key, value));
457
458 if !self.peek_token_is(&TokenKind::RBRACE) {
459 self.expect_peek(&TokenKind::COMMA)?;
460 }
461 }
462
463 self.expect_peek(&TokenKind::RBRACE)?;
464 let end = self.current_token.span.end;
465
466 Ok(Expression::LITERAL(Literal::Hash(Hash { elements: map, span: Span { start, end } })))
467 }
468}
469
470pub fn parse(input: &str) -> Result<Node, ParseErrors> {
471 let lexer = Lexer::new(input);
472 let mut parser = Parser::new(lexer);
473 let program = parser.parse_program()?;
474
475 Ok(Node::Program(program))
476}
477
478pub fn parse_ast_json_string(input: &str) -> Result<String, ParseErrors> {
479 let ast = match parse(input) {
480 Ok(node) => serde_json::to_string_pretty(&node).unwrap(),
481 Err(e) => return Err(e),
482 };
483
484 return Ok(ast);
485}