1use std::rc::Rc;
4
5use crate::ast::{Ast, NodeId, NodeKind};
6use crate::common::{Error, ErrorKind};
7use crate::lexer::{Lexer, Token, TokenKind};
8
9pub struct Parser {
11 lexer: Lexer,
12 ast: Ast,
13}
14
15impl Parser {
16 pub fn new(lexer: Lexer) -> Self {
18 Self {
19 ast: Ast::new(Rc::clone(&lexer.module_name)),
20 lexer,
21 }
22 }
23
24 fn error(&self, token: &Token, kind: ErrorKind) -> Error {
26 Error::Compile {
27 module_name: Rc::clone(&self.lexer.module_name),
28 kind,
29 location: token.location,
30 }
31 }
32
33 fn expect(
35 &mut self,
36 kind: TokenKind,
37 error: impl FnOnce(&Token) -> ErrorKind,
38 ) -> Result<Token, Error> {
39 let next_token = self.lexer.peek_token()?;
40 if next_token.kind == kind {
41 Ok(self.lexer.next_token()?)
42 } else {
43 Err(self.error(&next_token, error(&next_token)))
44 }
45 }
46
47 fn try_next(&mut self, kind: TokenKind) -> Result<Option<Token>, Error> {
50 let next_token = self.lexer.peek_token()?;
51 Ok(if next_token.kind == kind {
52 Some(self.lexer.next_token()?)
53 } else {
54 None
55 })
56 }
57
58 fn precedence(kind: &TokenKind) -> i8 {
60 match kind {
61 TokenKind::Or => 1,
62 TokenKind::And => 2,
63 TokenKind::Assign => 3,
64 | TokenKind::Equal
65 | TokenKind::NotEqual
66 | TokenKind::Less
67 | TokenKind::Greater
68 | TokenKind::LessEqual
69 | TokenKind::GreaterEqual => 4,
70 TokenKind::Plus | TokenKind::Minus => 5,
71 TokenKind::Star | TokenKind::Slash => 6,
72 TokenKind::LeftParen | TokenKind::Dot | TokenKind::Impl => 7,
73 _ => 0,
74 }
75 }
76
77 fn associativity(kind: &TokenKind) -> Associativity {
79 match kind {
80 TokenKind::Assign => Associativity::Right,
81 _ => Associativity::Left,
82 }
83 }
84
85 fn parse_unit(&mut self, token: Token, kind: NodeKind) -> NodeId {
88 self.ast.build_node(kind, ()).with_location(token.location).done()
89 }
90
91 fn parse_number(&mut self, token: Token) -> NodeId {
93 if let &TokenKind::Number(x) = &token.kind {
94 self
95 .ast
96 .build_node(NodeKind::Number, ())
97 .with_location(token.location)
98 .with_number(x)
99 .done()
100 } else {
101 panic!("next token must be a number");
102 }
103 }
104
105 fn parse_string(&mut self, token: Token) -> NodeId {
107 if let TokenKind::String(s) = token.kind {
108 self
109 .ast
110 .build_node(NodeKind::String, ())
111 .with_location(token.location)
112 .with_string(s)
113 .done()
114 } else {
115 panic!("next token must be a string");
116 }
117 }
118
119 fn parse_long_string(&mut self, first: Token) -> Result<NodeId, Error> {
121 let mut content = String::new();
122 if let TokenKind::LongString(s) = first.kind {
123 content.push_str(&s);
124 } else {
125 panic!("first token must be a long string")
126 }
127 while let TokenKind::LongString(_) = self.lexer.peek_token()?.kind {
128 let s = match self.lexer.next_token()?.kind {
129 TokenKind::LongString(s) => s,
130 _ => unreachable!(),
131 };
132 content.push('\n');
133 content.push_str(&s);
134 }
135 Ok(self
136 .ast
137 .build_node(NodeKind::String, ())
138 .with_location(first.location)
139 .with_string(Rc::from(content))
140 .done())
141 }
142
143 fn parse_identifier(&mut self, token: Token) -> Result<NodeId, Error> {
145 if let TokenKind::Identifier(i) = token.kind {
146 Ok(self
147 .ast
148 .build_node(NodeKind::Identifier, ())
149 .with_location(token.location)
150 .with_string(i)
151 .done())
152 } else {
153 Err(self.error(&token, ErrorKind::IdentifierExpected))
154 }
155 }
156
157 fn unary_operator(&mut self, token: Token, kind: NodeKind) -> Result<NodeId, Error> {
159 let right = self.parse_expression(Self::precedence(&TokenKind::Star))?;
160 Ok(self.ast.build_node(kind, right).with_location(token.location).done())
161 }
162
163 fn parse_terminated_block(
165 &mut self,
166 token: &Token,
167 children: &mut Vec<NodeId>,
168 is_terminator: impl Fn(&TokenKind) -> bool,
169 ) -> Result<(), Error> {
170 while !is_terminator(&self.lexer.peek_token()?.kind) {
171 if self.lexer.peek_token()?.kind == TokenKind::Eof {
172 return Err(self.error(token, ErrorKind::MissingEnd));
173 }
174 children.push(self.parse_item()?);
175 }
176 Ok(())
177 }
178
179 fn parse_comma_separated(
181 &mut self,
182 dest: &mut Vec<NodeId>,
183 end: TokenKind,
184 mut next: impl FnMut(&mut Self) -> Result<NodeId, Error>,
185 ) -> Result<(), Error> {
186 loop {
187 let token = self.lexer.peek_token()?;
188 match &token.kind {
189 TokenKind::Eof => return Err(self.error(&token, ErrorKind::UnexpectedEof)),
190 kind if *kind == end => {
191 self.lexer.next_token()?;
192 return Ok(());
193 }
194 _ => (),
195 }
196 dest.push(next(self)?);
197 match self.lexer.next_token()? {
198 Token {
199 kind: TokenKind::Comma,
200 ..
201 } => (),
202 t if t.kind == end => return Ok(()),
203 token => return Err(self.error(&token, ErrorKind::CommaExpected)),
204 }
205 }
206 }
207
208 fn parse_list_or_dict(&mut self, token: Token) -> Result<NodeId, Error> {
210 #[derive(Clone, Copy, PartialEq, Eq)]
211 enum Mode {
212 Unknown,
213 Dict,
214 List,
215 }
216
217 let mut elements = Vec::new();
218 let mut mode = Mode::Unknown;
219 if self.lexer.peek_token()?.kind == TokenKind::Colon {
220 self.lexer.next_token()?;
221 self.expect(TokenKind::RightBracket, |_| {
222 ErrorKind::RightBracketExpectedToCloseEmptyDict
223 })?;
224 mode = Mode::Dict;
225 } else {
226 self.parse_comma_separated(&mut elements, TokenKind::RightBracket, |p| match mode {
227 Mode::Unknown => {
228 let key = p.parse_expression(0)?;
229 if p.lexer.peek_token()?.kind == TokenKind::Colon {
230 mode = Mode::Dict;
231 let colon = p.lexer.next_token()?;
232 let value = p.parse_expression(0)?;
233 Ok(p
234 .ast
235 .build_node(NodeKind::DictPair, (key, value))
236 .with_location(colon.location)
237 .done())
238 } else {
239 mode = Mode::List;
240 Ok(key)
241 }
242 }
243 Mode::Dict => {
244 let key = p.parse_expression(0)?;
245 let colon = p.expect(TokenKind::Colon, |_| ErrorKind::ColonExpectedAfterDictKey)?;
246 let value = p.parse_expression(0)?;
247 Ok(p
248 .ast
249 .build_node(NodeKind::DictPair, (key, value))
250 .with_location(colon.location)
251 .done())
252 }
253 Mode::List => p.parse_expression(0),
254 })?;
255 }
256
257 Ok(self
258 .ast
259 .build_node(
260 match mode {
261 Mode::Unknown | Mode::List => NodeKind::List,
262 Mode::Dict => NodeKind::Dict,
263 },
264 (),
265 )
266 .with_location(token.location)
267 .with_children(elements)
268 .done())
269 }
270
271 fn parse_do_block(&mut self, token: Token) -> Result<NodeId, Error> {
273 let mut children = Vec::new();
274 self.parse_terminated_block(&token, &mut children, |k| *k == TokenKind::End)?;
275 let _end = self.lexer.next_token();
276 Ok(self
277 .ast
278 .build_node(NodeKind::Do, ())
279 .with_location(token.location)
280 .with_children(children)
281 .done())
282 }
283
284 fn parse_if_expression(&mut self, if_token: Token) -> Result<NodeId, Error> {
286 let mut branches = Vec::new();
287 let mut else_token = None;
288
289 loop {
290 let (condition, do_token) = if let Some(token) = else_token.clone() {
291 (None, token)
292 } else {
293 let condition = self.parse_expression(0)?;
294 let do_token = self.expect(TokenKind::Do, |_| ErrorKind::MissingDo)?;
295 (Some(condition), do_token)
296 };
297 let mut branch = Vec::new();
298 self.parse_terminated_block(&do_token, &mut branch, |k| {
299 matches!(k, TokenKind::Elif | TokenKind::Else | TokenKind::End)
300 })?;
301 branches.push(
302 if let Some(condition) = condition {
303 self.ast.build_node(NodeKind::IfBranch, condition).with_children(branch)
304 } else {
305 self.ast.build_node(NodeKind::ElseBranch, ()).with_children(branch)
306 }
307 .with_location(do_token.location)
308 .done(),
309 );
310
311 let next_token = self.lexer.next_token()?;
312 match &next_token.kind {
313 TokenKind::Elif => {
314 if else_token.is_some() {
315 return Err(self.error(&next_token, ErrorKind::BranchAfterElse));
316 }
317 }
318 TokenKind::Else => {
319 else_token = Some(next_token);
320 }
321 TokenKind::Eof => return Err(self.error(&do_token, ErrorKind::MissingEnd)),
322 TokenKind::End => break,
323 _ => return Err(self.error(&next_token, ErrorKind::InvalidIfBranchToken)),
324 }
325 }
326
327 Ok(self
328 .ast
329 .build_node(NodeKind::If, ())
330 .with_location(if_token.location)
331 .with_children(branches)
332 .done())
333 }
334
335 fn parse_while_expression(&mut self, token: Token) -> Result<NodeId, Error> {
337 let condition = self.parse_expression(0)?;
338 let do_token = self.expect(TokenKind::Do, |_| ErrorKind::MissingDo)?;
339 let mut body = Vec::new();
340 self.parse_terminated_block(&do_token, &mut body, |k| *k == TokenKind::End)?;
341 let _end = self.lexer.next_token();
342 Ok(self
343 .ast
344 .build_node(NodeKind::While, condition)
345 .with_location(token.location)
346 .with_children(body)
347 .done())
348 }
349
350 fn parse_for_expression(&mut self, token: Token) -> Result<NodeId, Error> {
352 let binding = self.parse_expression(0)?;
353 let _in_token = self.expect(TokenKind::In, |_| ErrorKind::InExpectedAfterForBinding)?;
354 let iterator = self.parse_expression(0)?;
355 let do_token = self.expect(TokenKind::Do, |_| ErrorKind::MissingDo)?;
356 let mut body = Vec::new();
357 self.parse_terminated_block(&do_token, &mut body, |k| *k == TokenKind::End)?;
358 let _end = self.lexer.next_token();
359 Ok(self
360 .ast
361 .build_node(NodeKind::For, (binding, iterator))
362 .with_location(token.location)
363 .with_children(body)
364 .done())
365 }
366
367 fn parse_function(&mut self, func_token: Token, anonymous: bool) -> Result<NodeId, Error> {
369 let name = if !anonymous {
370 let name = self.lexer.next_token()?;
371 self.parse_identifier(name)?
372 } else {
373 NodeId::EMPTY
374 };
375
376 let left_paren = self.expect(TokenKind::LeftParen, |_| ErrorKind::LeftParenExpected)?;
377 let mut parameters = Vec::new();
378 self.parse_comma_separated(&mut parameters, TokenKind::RightParen, |p| {
379 let name = p.lexer.next_token()?;
380 p.parse_identifier(name)
381 })?;
382
383 let kind = if let Some(token) = self.try_next(TokenKind::Constructor)? {
385 self.ast.build_node(NodeKind::Constructor, ()).with_location(token.location).done()
386 } else if let Some(token) = self.try_next(TokenKind::Static)? {
387 self.ast.build_node(NodeKind::Static, ()).with_location(token.location).done()
388 } else {
389 NodeId::EMPTY
390 };
391
392 let parameters = self
393 .ast
394 .build_node(NodeKind::Parameters, kind)
395 .with_location(left_paren.location)
396 .with_children(parameters)
397 .done();
398 let name_location = self.ast.location(name);
399 let head = self
400 .ast
401 .build_node(NodeKind::FunctionHead, (name, parameters))
402 .with_location(name_location)
403 .done();
404
405 let body = if self.lexer.peek_token()?.kind == TokenKind::Assign {
406 let _equals = self.lexer.next_token();
407 self.parse_expression(0)?
408 } else {
409 NodeId::EMPTY
410 };
411
412 Ok(self
413 .ast
414 .build_node(NodeKind::Func, (head, body))
415 .with_location(func_token.location)
416 .done())
417 }
418
419 fn parse_break_like(&mut self, token: Token, kind: NodeKind) -> Result<NodeId, Error> {
424 let next_token = self.lexer.peek_token()?;
425 let result = if next_token.location.line > token.location.line
426 || matches!(next_token.kind, TokenKind::End)
427 {
428 NodeId::EMPTY
429 } else {
430 self.parse_expression(0)?
431 };
432 Ok(self.ast.build_node(kind, result).with_location(token.location).done())
433 }
434
435 fn parse_struct(&mut self, struct_token: Token) -> Result<NodeId, Error> {
437 let name = self.lexer.next_token()?;
438 let name = self.parse_identifier(name)?;
439 Ok(self.ast.build_node(NodeKind::Struct, name).with_location(struct_token.location).done())
440 }
441
442 fn parse_as(&mut self, token: Token) -> Result<NodeId, Error> {
444 let implementee = self.parse_expression(0)?;
445 let mut items = Vec::new();
446 self.parse_terminated_block(&token, &mut items, |k| k == &TokenKind::End)?;
449 let _end = self.lexer.next_token()?;
450 Ok(self
451 .ast
452 .build_node(NodeKind::ImplAs, implementee)
453 .with_location(token.location)
454 .with_children(items)
455 .done())
456 }
457
458 fn parse_trait(&mut self, trait_token: Token) -> Result<NodeId, Error> {
460 let name = self.lexer.next_token()?;
461 let name = self.parse_identifier(name)?;
462 let mut items = Vec::new();
463 self.parse_terminated_block(&trait_token, &mut items, |k| k == &TokenKind::End)?;
466 let _end = self.lexer.next_token()?;
467 Ok(self
468 .ast
469 .build_node(NodeKind::Trait, name)
470 .with_location(trait_token.location)
471 .with_children(items)
472 .done())
473 }
474
475 fn parse_prefix(&mut self, token: Token) -> Result<NodeId, Error> {
477 match &token.kind {
478 TokenKind::Nil => Ok(self.parse_unit(token, NodeKind::Nil)),
479 TokenKind::False => Ok(self.parse_unit(token, NodeKind::False)),
480 TokenKind::True => Ok(self.parse_unit(token, NodeKind::True)),
481 TokenKind::Number(_) => Ok(self.parse_number(token)),
482 TokenKind::String(_) => Ok(self.parse_string(token)),
483 TokenKind::LongString(_) => self.parse_long_string(token),
484 TokenKind::Identifier(_) => self.parse_identifier(token),
485
486 TokenKind::Minus => self.unary_operator(token, NodeKind::Negate),
487 TokenKind::Bang => self.unary_operator(token, NodeKind::Not),
488
489 TokenKind::At => {
490 let name = self.lexer.next_token()?;
491 let name = self.parse_identifier(name)?;
492 Ok(self.ast.build_node(NodeKind::Field, name).with_location(token.location).done())
493 }
494
495 TokenKind::LeftParen => {
496 let inner = self.parse_expression(0)?;
497 if !matches!(self.lexer.next_token()?.kind, TokenKind::RightParen) {
498 return Err(self.error(&token, ErrorKind::MissingRightParen));
499 }
500 Ok(inner)
501 }
502 TokenKind::LeftBracket => self.parse_list_or_dict(token),
503
504 TokenKind::Do => self.parse_do_block(token),
505 TokenKind::If => self.parse_if_expression(token),
506 TokenKind::While => self.parse_while_expression(token),
507 TokenKind::For => self.parse_for_expression(token),
508
509 TokenKind::Break => self.parse_break_like(token, NodeKind::Break),
510 TokenKind::Return => self.parse_break_like(token, NodeKind::Return),
511
512 TokenKind::Func => self.parse_function(token, true),
513 TokenKind::Struct => self.parse_struct(token),
514 TokenKind::As => self.parse_as(token),
515 TokenKind::Trait => self.parse_trait(token),
516
517 _ => Err(self.error(&token, ErrorKind::InvalidPrefixToken)),
518 }
519 }
520
521 fn binary_operator(
523 &mut self,
524 left: NodeId,
525 token: Token,
526 kind: NodeKind,
527 ) -> Result<NodeId, Error> {
528 let precedence = Self::precedence(&token.kind)
529 - (Self::associativity(&token.kind) == Associativity::Right) as i8;
530 let right = self.parse_expression(precedence)?;
531 Ok(self.ast.build_node(kind, (left, right)).with_location(token.location).done())
532 }
533
534 fn function_call(&mut self, left: NodeId, left_paren: Token) -> Result<NodeId, Error> {
536 let mut arguments = Vec::new();
537 self.parse_comma_separated(&mut arguments, TokenKind::RightParen, |p| {
538 p.parse_expression(0)
539 })?;
540 Ok(self
541 .ast
542 .build_node(NodeKind::Call, left)
543 .with_location(left_paren.location)
544 .with_children(arguments)
545 .done())
546 }
547
548 fn parse_impl(&mut self, left: NodeId, token: Token) -> Result<NodeId, Error> {
550 let mut items = Vec::new();
551 self.parse_terminated_block(&token, &mut items, |k| k == &TokenKind::End)?;
554 let _end = self.lexer.next_token()?;
555 Ok(self
556 .ast
557 .build_node(NodeKind::Impl, left)
558 .with_location(token.location)
559 .with_children(items)
560 .done())
561 }
562
563 fn parse_infix(&mut self, left: NodeId, token: Token) -> Result<NodeId, Error> {
565 match &token.kind {
566 TokenKind::Plus => self.binary_operator(left, token, NodeKind::Add),
567 TokenKind::Minus => self.binary_operator(left, token, NodeKind::Subtract),
568 TokenKind::Star => self.binary_operator(left, token, NodeKind::Multiply),
569 TokenKind::Slash => self.binary_operator(left, token, NodeKind::Divide),
570
571 TokenKind::And => self.binary_operator(left, token, NodeKind::And),
572 TokenKind::Or => self.binary_operator(left, token, NodeKind::Or),
573 TokenKind::Equal => self.binary_operator(left, token, NodeKind::Equal),
574 TokenKind::NotEqual => self.binary_operator(left, token, NodeKind::NotEqual),
575 TokenKind::Less => self.binary_operator(left, token, NodeKind::Less),
576 TokenKind::Greater => self.binary_operator(left, token, NodeKind::Greater),
577 TokenKind::LessEqual => self.binary_operator(left, token, NodeKind::LessEqual),
578 TokenKind::GreaterEqual => self.binary_operator(left, token, NodeKind::GreaterEqual),
579
580 TokenKind::Assign => self.binary_operator(left, token, NodeKind::Assign),
581 TokenKind::Dot => self.binary_operator(left, token, NodeKind::Dot),
582
583 TokenKind::LeftParen => self.function_call(left, token),
584
585 TokenKind::Impl => self.parse_impl(left, token),
586
587 _ => Err(self.error(&token, ErrorKind::InvalidInfixToken)),
588 }
589 }
590
591 fn is_invalid_continuation_token(token: &TokenKind) -> bool {
593 matches!(token, TokenKind::LeftParen)
594 }
595
596 fn parse_expression(&mut self, precedence: i8) -> Result<NodeId, Error> {
598 let mut token = self.lexer.next_token()?;
599 let mut left = self.parse_prefix(token)?;
600
601 while precedence < Self::precedence(&self.lexer.peek_token()?.kind) {
602 let next_token = self.lexer.peek_token()?;
603 if Self::is_invalid_continuation_token(&next_token.kind)
604 && next_token.location.line > self.ast.location(left).line
605 {
606 break;
607 }
608 token = self.lexer.next_token()?;
609 left = self.parse_infix(left, token)?;
610 }
611
612 Ok(left)
613 }
614
615 fn parse_item(&mut self) -> Result<NodeId, Error> {
617 let token = self.lexer.peek_token()?;
618 match &token.kind {
619 TokenKind::Func => {
620 let func_token = self.lexer.next_token()?;
621 self.parse_function(func_token, false)
622 }
623 _ => self.parse_expression(0),
624 }
625 }
626
627 pub fn parse(mut self) -> Result<(Ast, NodeId), Error> {
629 let first_token = self.lexer.peek_token()?;
630 let mut main = Vec::new();
631 loop {
632 if self.lexer.peek_token()?.kind == TokenKind::Eof {
633 let main = self
634 .ast
635 .build_node(NodeKind::Main, ())
636 .with_location(first_token.location)
637 .with_children(main)
638 .done();
639 return Ok((self.ast, main));
640 }
641 let item = self.parse_item()?;
642 main.push(item);
643 }
644 }
645}
646
647#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
649#[repr(u8)]
650enum Associativity {
651 Left,
652 Right,
653}