mica_language/
parser.rs

1//! The parser.
2
3use std::rc::Rc;
4
5use crate::ast::{Ast, NodeId, NodeKind};
6use crate::common::{Error, ErrorKind};
7use crate::lexer::{Lexer, Token, TokenKind};
8
9/// The parser's state.
10pub struct Parser {
11   lexer: Lexer,
12   ast: Ast,
13}
14
15impl Parser {
16   /// Constructs a new parser from a lexer.
17   pub fn new(lexer: Lexer) -> Self {
18      Self {
19         ast: Ast::new(Rc::clone(&lexer.module_name)),
20         lexer,
21      }
22   }
23
24   /// Constructs a compilation error located at the given token.
25   fn error(&self, token: &Token, kind: ErrorKind) -> Error {
26      Error::Compile {
27         module_name: Rc::clone(&self.lexer.module_name),
28         kind,
29         location: token.location,
30      }
31   }
32
33   /// Returns an error if the next token is not of the given kind.
34   fn expect(
35      &mut self,
36      kind: TokenKind,
37      error: impl FnOnce(&Token) -> ErrorKind,
38   ) -> Result<Token, Error> {
39      let next_token = self.lexer.peek_token()?;
40      if next_token.kind == kind {
41         Ok(self.lexer.next_token()?)
42      } else {
43         Err(self.error(&next_token, error(&next_token)))
44      }
45   }
46
47   /// If the next token's kind is equal to `kind`, advances to the next token and returns the
48   /// token. Otherwise returns `None`.
49   fn try_next(&mut self, kind: TokenKind) -> Result<Option<Token>, Error> {
50      let next_token = self.lexer.peek_token()?;
51      Ok(if next_token.kind == kind {
52         Some(self.lexer.next_token()?)
53      } else {
54         None
55      })
56   }
57
58   /// Returns the precedence level of the given token kind.
59   fn precedence(kind: &TokenKind) -> i8 {
60      match kind {
61         TokenKind::Or => 1,
62         TokenKind::And => 2,
63         TokenKind::Assign => 3,
64         | TokenKind::Equal
65         | TokenKind::NotEqual
66         | TokenKind::Less
67         | TokenKind::Greater
68         | TokenKind::LessEqual
69         | TokenKind::GreaterEqual => 4,
70         TokenKind::Plus | TokenKind::Minus => 5,
71         TokenKind::Star | TokenKind::Slash => 6,
72         TokenKind::LeftParen | TokenKind::Dot | TokenKind::Impl => 7,
73         _ => 0,
74      }
75   }
76
77   /// Returns the associativity of the given token kind.
78   fn associativity(kind: &TokenKind) -> Associativity {
79      match kind {
80         TokenKind::Assign => Associativity::Right,
81         _ => Associativity::Left,
82      }
83   }
84
85   /// Parses a "unit literal". This is used for all literals that are uniquely identified by a
86   /// single token's kind (such as `nil`.)
87   fn parse_unit(&mut self, token: Token, kind: NodeKind) -> NodeId {
88      self.ast.build_node(kind, ()).with_location(token.location).done()
89   }
90
91   /// Parses a number literal.
92   fn parse_number(&mut self, token: Token) -> NodeId {
93      if let &TokenKind::Number(x) = &token.kind {
94         self
95            .ast
96            .build_node(NodeKind::Number, ())
97            .with_location(token.location)
98            .with_number(x)
99            .done()
100      } else {
101         panic!("next token must be a number");
102      }
103   }
104
105   /// Parses a string literal.
106   fn parse_string(&mut self, token: Token) -> NodeId {
107      if let TokenKind::String(s) = token.kind {
108         self
109            .ast
110            .build_node(NodeKind::String, ())
111            .with_location(token.location)
112            .with_string(s)
113            .done()
114      } else {
115         panic!("next token must be a string");
116      }
117   }
118
119   /// Parses a sequence of long string literals.
120   fn parse_long_string(&mut self, first: Token) -> Result<NodeId, Error> {
121      let mut content = String::new();
122      if let TokenKind::LongString(s) = first.kind {
123         content.push_str(&s);
124      } else {
125         panic!("first token must be a long string")
126      }
127      while let TokenKind::LongString(_) = self.lexer.peek_token()?.kind {
128         let s = match self.lexer.next_token()?.kind {
129            TokenKind::LongString(s) => s,
130            _ => unreachable!(),
131         };
132         content.push('\n');
133         content.push_str(&s);
134      }
135      Ok(self
136         .ast
137         .build_node(NodeKind::String, ())
138         .with_location(first.location)
139         .with_string(Rc::from(content))
140         .done())
141   }
142
143   /// Parses an identifier.
144   fn parse_identifier(&mut self, token: Token) -> Result<NodeId, Error> {
145      if let TokenKind::Identifier(i) = token.kind {
146         Ok(self
147            .ast
148            .build_node(NodeKind::Identifier, ())
149            .with_location(token.location)
150            .with_string(i)
151            .done())
152      } else {
153         Err(self.error(&token, ErrorKind::IdentifierExpected))
154      }
155   }
156
157   /// Parses a unary operator.
158   fn unary_operator(&mut self, token: Token, kind: NodeKind) -> Result<NodeId, Error> {
159      let right = self.parse_expression(Self::precedence(&TokenKind::Star))?;
160      Ok(self.ast.build_node(kind, right).with_location(token.location).done())
161   }
162
163   /// Parses a terminated block. Typically blocks are terminated with `end`.
164   fn parse_terminated_block(
165      &mut self,
166      token: &Token,
167      children: &mut Vec<NodeId>,
168      is_terminator: impl Fn(&TokenKind) -> bool,
169   ) -> Result<(), Error> {
170      while !is_terminator(&self.lexer.peek_token()?.kind) {
171         if self.lexer.peek_token()?.kind == TokenKind::Eof {
172            return Err(self.error(token, ErrorKind::MissingEnd));
173         }
174         children.push(self.parse_item()?);
175      }
176      Ok(())
177   }
178
179   /// Parses a comma-separated list.
180   fn parse_comma_separated(
181      &mut self,
182      dest: &mut Vec<NodeId>,
183      end: TokenKind,
184      mut next: impl FnMut(&mut Self) -> Result<NodeId, Error>,
185   ) -> Result<(), Error> {
186      loop {
187         let token = self.lexer.peek_token()?;
188         match &token.kind {
189            TokenKind::Eof => return Err(self.error(&token, ErrorKind::UnexpectedEof)),
190            kind if *kind == end => {
191               self.lexer.next_token()?;
192               return Ok(());
193            }
194            _ => (),
195         }
196         dest.push(next(self)?);
197         match self.lexer.next_token()? {
198            Token {
199               kind: TokenKind::Comma,
200               ..
201            } => (),
202            t if t.kind == end => return Ok(()),
203            token => return Err(self.error(&token, ErrorKind::CommaExpected)),
204         }
205      }
206   }
207
208   /// Parses a list or dict literal.
209   fn parse_list_or_dict(&mut self, token: Token) -> Result<NodeId, Error> {
210      #[derive(Clone, Copy, PartialEq, Eq)]
211      enum Mode {
212         Unknown,
213         Dict,
214         List,
215      }
216
217      let mut elements = Vec::new();
218      let mut mode = Mode::Unknown;
219      if self.lexer.peek_token()?.kind == TokenKind::Colon {
220         self.lexer.next_token()?;
221         self.expect(TokenKind::RightBracket, |_| {
222            ErrorKind::RightBracketExpectedToCloseEmptyDict
223         })?;
224         mode = Mode::Dict;
225      } else {
226         self.parse_comma_separated(&mut elements, TokenKind::RightBracket, |p| match mode {
227            Mode::Unknown => {
228               let key = p.parse_expression(0)?;
229               if p.lexer.peek_token()?.kind == TokenKind::Colon {
230                  mode = Mode::Dict;
231                  let colon = p.lexer.next_token()?;
232                  let value = p.parse_expression(0)?;
233                  Ok(p
234                     .ast
235                     .build_node(NodeKind::DictPair, (key, value))
236                     .with_location(colon.location)
237                     .done())
238               } else {
239                  mode = Mode::List;
240                  Ok(key)
241               }
242            }
243            Mode::Dict => {
244               let key = p.parse_expression(0)?;
245               let colon = p.expect(TokenKind::Colon, |_| ErrorKind::ColonExpectedAfterDictKey)?;
246               let value = p.parse_expression(0)?;
247               Ok(p
248                  .ast
249                  .build_node(NodeKind::DictPair, (key, value))
250                  .with_location(colon.location)
251                  .done())
252            }
253            Mode::List => p.parse_expression(0),
254         })?;
255      }
256
257      Ok(self
258         .ast
259         .build_node(
260            match mode {
261               Mode::Unknown | Mode::List => NodeKind::List,
262               Mode::Dict => NodeKind::Dict,
263            },
264            (),
265         )
266         .with_location(token.location)
267         .with_children(elements)
268         .done())
269   }
270
271   /// Parses a `do` block.
272   fn parse_do_block(&mut self, token: Token) -> Result<NodeId, Error> {
273      let mut children = Vec::new();
274      self.parse_terminated_block(&token, &mut children, |k| *k == TokenKind::End)?;
275      let _end = self.lexer.next_token();
276      Ok(self
277         .ast
278         .build_node(NodeKind::Do, ())
279         .with_location(token.location)
280         .with_children(children)
281         .done())
282   }
283
284   /// Parses an `if` expression.
285   fn parse_if_expression(&mut self, if_token: Token) -> Result<NodeId, Error> {
286      let mut branches = Vec::new();
287      let mut else_token = None;
288
289      loop {
290         let (condition, do_token) = if let Some(token) = else_token.clone() {
291            (None, token)
292         } else {
293            let condition = self.parse_expression(0)?;
294            let do_token = self.expect(TokenKind::Do, |_| ErrorKind::MissingDo)?;
295            (Some(condition), do_token)
296         };
297         let mut branch = Vec::new();
298         self.parse_terminated_block(&do_token, &mut branch, |k| {
299            matches!(k, TokenKind::Elif | TokenKind::Else | TokenKind::End)
300         })?;
301         branches.push(
302            if let Some(condition) = condition {
303               self.ast.build_node(NodeKind::IfBranch, condition).with_children(branch)
304            } else {
305               self.ast.build_node(NodeKind::ElseBranch, ()).with_children(branch)
306            }
307            .with_location(do_token.location)
308            .done(),
309         );
310
311         let next_token = self.lexer.next_token()?;
312         match &next_token.kind {
313            TokenKind::Elif => {
314               if else_token.is_some() {
315                  return Err(self.error(&next_token, ErrorKind::BranchAfterElse));
316               }
317            }
318            TokenKind::Else => {
319               else_token = Some(next_token);
320            }
321            TokenKind::Eof => return Err(self.error(&do_token, ErrorKind::MissingEnd)),
322            TokenKind::End => break,
323            _ => return Err(self.error(&next_token, ErrorKind::InvalidIfBranchToken)),
324         }
325      }
326
327      Ok(self
328         .ast
329         .build_node(NodeKind::If, ())
330         .with_location(if_token.location)
331         .with_children(branches)
332         .done())
333   }
334
335   /// Parses a `while` expression.
336   fn parse_while_expression(&mut self, token: Token) -> Result<NodeId, Error> {
337      let condition = self.parse_expression(0)?;
338      let do_token = self.expect(TokenKind::Do, |_| ErrorKind::MissingDo)?;
339      let mut body = Vec::new();
340      self.parse_terminated_block(&do_token, &mut body, |k| *k == TokenKind::End)?;
341      let _end = self.lexer.next_token();
342      Ok(self
343         .ast
344         .build_node(NodeKind::While, condition)
345         .with_location(token.location)
346         .with_children(body)
347         .done())
348   }
349
350   /// Parses a `for` expression.
351   fn parse_for_expression(&mut self, token: Token) -> Result<NodeId, Error> {
352      let binding = self.parse_expression(0)?;
353      let _in_token = self.expect(TokenKind::In, |_| ErrorKind::InExpectedAfterForBinding)?;
354      let iterator = self.parse_expression(0)?;
355      let do_token = self.expect(TokenKind::Do, |_| ErrorKind::MissingDo)?;
356      let mut body = Vec::new();
357      self.parse_terminated_block(&do_token, &mut body, |k| *k == TokenKind::End)?;
358      let _end = self.lexer.next_token();
359      Ok(self
360         .ast
361         .build_node(NodeKind::For, (binding, iterator))
362         .with_location(token.location)
363         .with_children(body)
364         .done())
365   }
366
367   /// Parses a function. `anonymous` decides if the function has a name or not.
368   fn parse_function(&mut self, func_token: Token, anonymous: bool) -> Result<NodeId, Error> {
369      let name = if !anonymous {
370         let name = self.lexer.next_token()?;
371         self.parse_identifier(name)?
372      } else {
373         NodeId::EMPTY
374      };
375
376      let left_paren = self.expect(TokenKind::LeftParen, |_| ErrorKind::LeftParenExpected)?;
377      let mut parameters = Vec::new();
378      self.parse_comma_separated(&mut parameters, TokenKind::RightParen, |p| {
379         let name = p.lexer.next_token()?;
380         p.parse_identifier(name)
381      })?;
382
383      // We allow either `constructor` or `static`, but not both.
384      let kind = if let Some(token) = self.try_next(TokenKind::Constructor)? {
385         self.ast.build_node(NodeKind::Constructor, ()).with_location(token.location).done()
386      } else if let Some(token) = self.try_next(TokenKind::Static)? {
387         self.ast.build_node(NodeKind::Static, ()).with_location(token.location).done()
388      } else {
389         NodeId::EMPTY
390      };
391
392      let parameters = self
393         .ast
394         .build_node(NodeKind::Parameters, kind)
395         .with_location(left_paren.location)
396         .with_children(parameters)
397         .done();
398      let name_location = self.ast.location(name);
399      let head = self
400         .ast
401         .build_node(NodeKind::FunctionHead, (name, parameters))
402         .with_location(name_location)
403         .done();
404
405      let body = if self.lexer.peek_token()?.kind == TokenKind::Assign {
406         let _equals = self.lexer.next_token();
407         self.parse_expression(0)?
408      } else {
409         NodeId::EMPTY
410      };
411
412      Ok(self
413         .ast
414         .build_node(NodeKind::Func, (head, body))
415         .with_location(func_token.location)
416         .done())
417   }
418
419   /// Parses a "break-like" expression. This includes `break` and `return`.
420   ///
421   /// A break-like expression is a token followed followed by an optional value on the same line as
422   /// that token.
423   fn parse_break_like(&mut self, token: Token, kind: NodeKind) -> Result<NodeId, Error> {
424      let next_token = self.lexer.peek_token()?;
425      let result = if next_token.location.line > token.location.line
426         || matches!(next_token.kind, TokenKind::End)
427      {
428         NodeId::EMPTY
429      } else {
430         self.parse_expression(0)?
431      };
432      Ok(self.ast.build_node(kind, result).with_location(token.location).done())
433   }
434
435   /// Parses a struct declaration.
436   fn parse_struct(&mut self, struct_token: Token) -> Result<NodeId, Error> {
437      let name = self.lexer.next_token()?;
438      let name = self.parse_identifier(name)?;
439      Ok(self.ast.build_node(NodeKind::Struct, name).with_location(struct_token.location).done())
440   }
441
442   /// Parses an `as` block.
443   fn parse_as(&mut self, token: Token) -> Result<NodeId, Error> {
444      let implementee = self.parse_expression(0)?;
445      let mut items = Vec::new();
446      // Note that we parse any type of item inside of the `impl` block.
447      // The codegen phase is the thing that ensures the items declared are valid.
448      self.parse_terminated_block(&token, &mut items, |k| k == &TokenKind::End)?;
449      let _end = self.lexer.next_token()?;
450      Ok(self
451         .ast
452         .build_node(NodeKind::ImplAs, implementee)
453         .with_location(token.location)
454         .with_children(items)
455         .done())
456   }
457
458   /// Parses a trait declaration.
459   fn parse_trait(&mut self, trait_token: Token) -> Result<NodeId, Error> {
460      let name = self.lexer.next_token()?;
461      let name = self.parse_identifier(name)?;
462      let mut items = Vec::new();
463      // Just like with `impl`, we allow for any item here, and the codegen phase ensures they're
464      // functions.
465      self.parse_terminated_block(&trait_token, &mut items, |k| k == &TokenKind::End)?;
466      let _end = self.lexer.next_token()?;
467      Ok(self
468         .ast
469         .build_node(NodeKind::Trait, name)
470         .with_location(trait_token.location)
471         .with_children(items)
472         .done())
473   }
474
475   /// Parses a prefix expression.
476   fn parse_prefix(&mut self, token: Token) -> Result<NodeId, Error> {
477      match &token.kind {
478         TokenKind::Nil => Ok(self.parse_unit(token, NodeKind::Nil)),
479         TokenKind::False => Ok(self.parse_unit(token, NodeKind::False)),
480         TokenKind::True => Ok(self.parse_unit(token, NodeKind::True)),
481         TokenKind::Number(_) => Ok(self.parse_number(token)),
482         TokenKind::String(_) => Ok(self.parse_string(token)),
483         TokenKind::LongString(_) => self.parse_long_string(token),
484         TokenKind::Identifier(_) => self.parse_identifier(token),
485
486         TokenKind::Minus => self.unary_operator(token, NodeKind::Negate),
487         TokenKind::Bang => self.unary_operator(token, NodeKind::Not),
488
489         TokenKind::At => {
490            let name = self.lexer.next_token()?;
491            let name = self.parse_identifier(name)?;
492            Ok(self.ast.build_node(NodeKind::Field, name).with_location(token.location).done())
493         }
494
495         TokenKind::LeftParen => {
496            let inner = self.parse_expression(0)?;
497            if !matches!(self.lexer.next_token()?.kind, TokenKind::RightParen) {
498               return Err(self.error(&token, ErrorKind::MissingRightParen));
499            }
500            Ok(inner)
501         }
502         TokenKind::LeftBracket => self.parse_list_or_dict(token),
503
504         TokenKind::Do => self.parse_do_block(token),
505         TokenKind::If => self.parse_if_expression(token),
506         TokenKind::While => self.parse_while_expression(token),
507         TokenKind::For => self.parse_for_expression(token),
508
509         TokenKind::Break => self.parse_break_like(token, NodeKind::Break),
510         TokenKind::Return => self.parse_break_like(token, NodeKind::Return),
511
512         TokenKind::Func => self.parse_function(token, true),
513         TokenKind::Struct => self.parse_struct(token),
514         TokenKind::As => self.parse_as(token),
515         TokenKind::Trait => self.parse_trait(token),
516
517         _ => Err(self.error(&token, ErrorKind::InvalidPrefixToken)),
518      }
519   }
520
521   /// Parses a binary operator.
522   fn binary_operator(
523      &mut self,
524      left: NodeId,
525      token: Token,
526      kind: NodeKind,
527   ) -> Result<NodeId, Error> {
528      let precedence = Self::precedence(&token.kind)
529         - (Self::associativity(&token.kind) == Associativity::Right) as i8;
530      let right = self.parse_expression(precedence)?;
531      Ok(self.ast.build_node(kind, (left, right)).with_location(token.location).done())
532   }
533
534   /// Parses a function call.
535   fn function_call(&mut self, left: NodeId, left_paren: Token) -> Result<NodeId, Error> {
536      let mut arguments = Vec::new();
537      self.parse_comma_separated(&mut arguments, TokenKind::RightParen, |p| {
538         p.parse_expression(0)
539      })?;
540      Ok(self
541         .ast
542         .build_node(NodeKind::Call, left)
543         .with_location(left_paren.location)
544         .with_children(arguments)
545         .done())
546   }
547
548   /// Parses an `impl` block.
549   fn parse_impl(&mut self, left: NodeId, token: Token) -> Result<NodeId, Error> {
550      let mut items = Vec::new();
551      // Note that we parse any type of item inside of the `impl` block.
552      // The codegen phase is the thing that ensures the items declared are valid.
553      self.parse_terminated_block(&token, &mut items, |k| k == &TokenKind::End)?;
554      let _end = self.lexer.next_token()?;
555      Ok(self
556         .ast
557         .build_node(NodeKind::Impl, left)
558         .with_location(token.location)
559         .with_children(items)
560         .done())
561   }
562
563   /// Parses an infix token.
564   fn parse_infix(&mut self, left: NodeId, token: Token) -> Result<NodeId, Error> {
565      match &token.kind {
566         TokenKind::Plus => self.binary_operator(left, token, NodeKind::Add),
567         TokenKind::Minus => self.binary_operator(left, token, NodeKind::Subtract),
568         TokenKind::Star => self.binary_operator(left, token, NodeKind::Multiply),
569         TokenKind::Slash => self.binary_operator(left, token, NodeKind::Divide),
570
571         TokenKind::And => self.binary_operator(left, token, NodeKind::And),
572         TokenKind::Or => self.binary_operator(left, token, NodeKind::Or),
573         TokenKind::Equal => self.binary_operator(left, token, NodeKind::Equal),
574         TokenKind::NotEqual => self.binary_operator(left, token, NodeKind::NotEqual),
575         TokenKind::Less => self.binary_operator(left, token, NodeKind::Less),
576         TokenKind::Greater => self.binary_operator(left, token, NodeKind::Greater),
577         TokenKind::LessEqual => self.binary_operator(left, token, NodeKind::LessEqual),
578         TokenKind::GreaterEqual => self.binary_operator(left, token, NodeKind::GreaterEqual),
579
580         TokenKind::Assign => self.binary_operator(left, token, NodeKind::Assign),
581         TokenKind::Dot => self.binary_operator(left, token, NodeKind::Dot),
582
583         TokenKind::LeftParen => self.function_call(left, token),
584
585         TokenKind::Impl => self.parse_impl(left, token),
586
587         _ => Err(self.error(&token, ErrorKind::InvalidInfixToken)),
588      }
589   }
590
591   /// Returns whether an infix token is not allowed to be carried over to the next line.
592   fn is_invalid_continuation_token(token: &TokenKind) -> bool {
593      matches!(token, TokenKind::LeftParen)
594   }
595
596   /// Parses an expression.
597   fn parse_expression(&mut self, precedence: i8) -> Result<NodeId, Error> {
598      let mut token = self.lexer.next_token()?;
599      let mut left = self.parse_prefix(token)?;
600
601      while precedence < Self::precedence(&self.lexer.peek_token()?.kind) {
602         let next_token = self.lexer.peek_token()?;
603         if Self::is_invalid_continuation_token(&next_token.kind)
604            && next_token.location.line > self.ast.location(left).line
605         {
606            break;
607         }
608         token = self.lexer.next_token()?;
609         left = self.parse_infix(left, token)?;
610      }
611
612      Ok(left)
613   }
614
615   /// Parses a single item.
616   fn parse_item(&mut self) -> Result<NodeId, Error> {
617      let token = self.lexer.peek_token()?;
618      match &token.kind {
619         TokenKind::Func => {
620            let func_token = self.lexer.next_token()?;
621            self.parse_function(func_token, false)
622         }
623         _ => self.parse_expression(0),
624      }
625   }
626
627   /// Parses a Mica program.
628   pub fn parse(mut self) -> Result<(Ast, NodeId), Error> {
629      let first_token = self.lexer.peek_token()?;
630      let mut main = Vec::new();
631      loop {
632         if self.lexer.peek_token()?.kind == TokenKind::Eof {
633            let main = self
634               .ast
635               .build_node(NodeKind::Main, ())
636               .with_location(first_token.location)
637               .with_children(main)
638               .done();
639            return Ok((self.ast, main));
640         }
641         let item = self.parse_item()?;
642         main.push(item);
643      }
644   }
645}
646
647/// The associativity of an infix token.
648#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
649#[repr(u8)]
650enum Associativity {
651   Left,
652   Right,
653}