use std::rc::Rc;
use crate::ast::{Ast, NodeId, NodeKind};
use crate::common::{Error, ErrorKind};
use crate::lexer::{Lexer, Token, TokenKind};
pub struct Parser {
lexer: Lexer,
ast: Ast,
}
impl Parser {
pub fn new(lexer: Lexer) -> Self {
Self {
ast: Ast::new(Rc::clone(&lexer.module_name)),
lexer,
}
}
fn error(&self, token: &Token, kind: ErrorKind) -> Error {
Error::Compile {
module_name: Rc::clone(&self.lexer.module_name),
kind,
location: token.location,
}
}
fn expect(
&mut self,
kind: TokenKind,
error: impl FnOnce(&Token) -> ErrorKind,
) -> Result<Token, Error> {
let next_token = self.lexer.peek_token()?;
if next_token.kind == kind {
Ok(self.lexer.next_token()?)
} else {
Err(self.error(&next_token, error(&next_token)))
}
}
fn try_next(&mut self, kind: TokenKind) -> Result<Option<Token>, Error> {
let next_token = self.lexer.peek_token()?;
Ok(if next_token.kind == kind {
Some(self.lexer.next_token()?)
} else {
None
})
}
fn precedence(kind: &TokenKind) -> i8 {
match kind {
TokenKind::Or => 1,
TokenKind::And => 2,
TokenKind::Assign => 3,
| TokenKind::Equal
| TokenKind::NotEqual
| TokenKind::Less
| TokenKind::Greater
| TokenKind::LessEqual
| TokenKind::GreaterEqual => 4,
TokenKind::Plus | TokenKind::Minus => 5,
TokenKind::Star | TokenKind::Slash => 6,
TokenKind::LeftParen | TokenKind::Dot | TokenKind::Impl => 7,
_ => 0,
}
}
fn associativity(kind: &TokenKind) -> Associativity {
match kind {
TokenKind::Assign => Associativity::Right,
_ => Associativity::Left,
}
}
fn parse_unit(&mut self, token: Token, kind: NodeKind) -> NodeId {
self.ast.build_node(kind, ()).with_location(token.location).done()
}
fn parse_number(&mut self, token: Token) -> NodeId {
if let &TokenKind::Number(x) = &token.kind {
self
.ast
.build_node(NodeKind::Number, ())
.with_location(token.location)
.with_number(x)
.done()
} else {
panic!("next token must be a number");
}
}
fn parse_string(&mut self, token: Token) -> NodeId {
if let TokenKind::String(s) = token.kind {
self
.ast
.build_node(NodeKind::String, ())
.with_location(token.location)
.with_string(s)
.done()
} else {
panic!("next token must be a string");
}
}
fn parse_long_string(&mut self, first: Token) -> Result<NodeId, Error> {
let mut content = String::new();
if let TokenKind::LongString(s) = first.kind {
content.push_str(&s);
} else {
panic!("first token must be a long string")
}
while let TokenKind::LongString(_) = self.lexer.peek_token()?.kind {
let s = match self.lexer.next_token()?.kind {
TokenKind::LongString(s) => s,
_ => unreachable!(),
};
content.push('\n');
content.push_str(&s);
}
Ok(self
.ast
.build_node(NodeKind::String, ())
.with_location(first.location)
.with_string(Rc::from(content))
.done())
}
fn parse_identifier(&mut self, token: Token) -> Result<NodeId, Error> {
if let TokenKind::Identifier(i) = token.kind {
Ok(self
.ast
.build_node(NodeKind::Identifier, ())
.with_location(token.location)
.with_string(i)
.done())
} else {
Err(self.error(&token, ErrorKind::IdentifierExpected))
}
}
fn unary_operator(&mut self, token: Token, kind: NodeKind) -> Result<NodeId, Error> {
let right = self.parse_expression(Self::precedence(&TokenKind::Star))?;
Ok(self.ast.build_node(kind, right).with_location(token.location).done())
}
fn parse_terminated_block(
&mut self,
token: &Token,
children: &mut Vec<NodeId>,
is_terminator: impl Fn(&TokenKind) -> bool,
) -> Result<(), Error> {
while !is_terminator(&self.lexer.peek_token()?.kind) {
if self.lexer.peek_token()?.kind == TokenKind::Eof {
return Err(self.error(token, ErrorKind::MissingEnd));
}
children.push(self.parse_item()?);
}
Ok(())
}
fn parse_comma_separated(
&mut self,
dest: &mut Vec<NodeId>,
end: TokenKind,
mut next: impl FnMut(&mut Self) -> Result<NodeId, Error>,
) -> Result<(), Error> {
loop {
let token = self.lexer.peek_token()?;
match &token.kind {
TokenKind::Eof => return Err(self.error(&token, ErrorKind::UnexpectedEof)),
kind if *kind == end => {
self.lexer.next_token()?;
return Ok(());
}
_ => (),
}
dest.push(next(self)?);
match self.lexer.next_token()? {
Token {
kind: TokenKind::Comma,
..
} => (),
t if t.kind == end => return Ok(()),
token => return Err(self.error(&token, ErrorKind::CommaExpected)),
}
}
}
fn parse_list_or_dict(&mut self, token: Token) -> Result<NodeId, Error> {
#[derive(Clone, Copy, PartialEq, Eq)]
enum Mode {
Unknown,
Dict,
List,
}
let mut elements = Vec::new();
let mut mode = Mode::Unknown;
if self.lexer.peek_token()?.kind == TokenKind::Colon {
self.lexer.next_token()?;
self.expect(TokenKind::RightBracket, |_| {
ErrorKind::RightBracketExpectedToCloseEmptyDict
})?;
mode = Mode::Dict;
} else {
self.parse_comma_separated(&mut elements, TokenKind::RightBracket, |p| match mode {
Mode::Unknown => {
let key = p.parse_expression(0)?;
if p.lexer.peek_token()?.kind == TokenKind::Colon {
mode = Mode::Dict;
let colon = p.lexer.next_token()?;
let value = p.parse_expression(0)?;
Ok(p
.ast
.build_node(NodeKind::DictPair, (key, value))
.with_location(colon.location)
.done())
} else {
mode = Mode::List;
Ok(key)
}
}
Mode::Dict => {
let key = p.parse_expression(0)?;
let colon = p.expect(TokenKind::Colon, |_| ErrorKind::ColonExpectedAfterDictKey)?;
let value = p.parse_expression(0)?;
Ok(p
.ast
.build_node(NodeKind::DictPair, (key, value))
.with_location(colon.location)
.done())
}
Mode::List => p.parse_expression(0),
})?;
}
Ok(self
.ast
.build_node(
match mode {
Mode::Unknown | Mode::List => NodeKind::List,
Mode::Dict => NodeKind::Dict,
},
(),
)
.with_location(token.location)
.with_children(elements)
.done())
}
fn parse_do_block(&mut self, token: Token) -> Result<NodeId, Error> {
let mut children = Vec::new();
self.parse_terminated_block(&token, &mut children, |k| *k == TokenKind::End)?;
let _end = self.lexer.next_token();
Ok(self
.ast
.build_node(NodeKind::Do, ())
.with_location(token.location)
.with_children(children)
.done())
}
fn parse_if_expression(&mut self, if_token: Token) -> Result<NodeId, Error> {
let mut branches = Vec::new();
let mut else_token = None;
loop {
let (condition, do_token) = if let Some(token) = else_token.clone() {
(None, token)
} else {
let condition = self.parse_expression(0)?;
let do_token = self.expect(TokenKind::Do, |_| ErrorKind::MissingDo)?;
(Some(condition), do_token)
};
let mut branch = Vec::new();
self.parse_terminated_block(&do_token, &mut branch, |k| {
matches!(k, TokenKind::Elif | TokenKind::Else | TokenKind::End)
})?;
branches.push(
if let Some(condition) = condition {
self.ast.build_node(NodeKind::IfBranch, condition).with_children(branch)
} else {
self.ast.build_node(NodeKind::ElseBranch, ()).with_children(branch)
}
.with_location(do_token.location)
.done(),
);
let next_token = self.lexer.next_token()?;
match &next_token.kind {
TokenKind::Elif => {
if else_token.is_some() {
return Err(self.error(&next_token, ErrorKind::BranchAfterElse));
}
}
TokenKind::Else => {
else_token = Some(next_token);
}
TokenKind::Eof => return Err(self.error(&do_token, ErrorKind::MissingEnd)),
TokenKind::End => break,
_ => return Err(self.error(&next_token, ErrorKind::InvalidIfBranchToken)),
}
}
Ok(self
.ast
.build_node(NodeKind::If, ())
.with_location(if_token.location)
.with_children(branches)
.done())
}
fn parse_while_expression(&mut self, token: Token) -> Result<NodeId, Error> {
let condition = self.parse_expression(0)?;
let do_token = self.expect(TokenKind::Do, |_| ErrorKind::MissingDo)?;
let mut body = Vec::new();
self.parse_terminated_block(&do_token, &mut body, |k| *k == TokenKind::End)?;
let _end = self.lexer.next_token();
Ok(self
.ast
.build_node(NodeKind::While, condition)
.with_location(token.location)
.with_children(body)
.done())
}
fn parse_for_expression(&mut self, token: Token) -> Result<NodeId, Error> {
let binding = self.parse_expression(0)?;
let _in_token = self.expect(TokenKind::In, |_| ErrorKind::InExpectedAfterForBinding)?;
let iterator = self.parse_expression(0)?;
let do_token = self.expect(TokenKind::Do, |_| ErrorKind::MissingDo)?;
let mut body = Vec::new();
self.parse_terminated_block(&do_token, &mut body, |k| *k == TokenKind::End)?;
let _end = self.lexer.next_token();
Ok(self
.ast
.build_node(NodeKind::For, (binding, iterator))
.with_location(token.location)
.with_children(body)
.done())
}
fn parse_function(&mut self, func_token: Token, anonymous: bool) -> Result<NodeId, Error> {
let name = if !anonymous {
let name = self.lexer.next_token()?;
self.parse_identifier(name)?
} else {
NodeId::EMPTY
};
let left_paren = self.expect(TokenKind::LeftParen, |_| ErrorKind::LeftParenExpected)?;
let mut parameters = Vec::new();
self.parse_comma_separated(&mut parameters, TokenKind::RightParen, |p| {
let name = p.lexer.next_token()?;
p.parse_identifier(name)
})?;
let kind = if let Some(token) = self.try_next(TokenKind::Constructor)? {
self.ast.build_node(NodeKind::Constructor, ()).with_location(token.location).done()
} else if let Some(token) = self.try_next(TokenKind::Static)? {
self.ast.build_node(NodeKind::Static, ()).with_location(token.location).done()
} else {
NodeId::EMPTY
};
let parameters = self
.ast
.build_node(NodeKind::Parameters, kind)
.with_location(left_paren.location)
.with_children(parameters)
.done();
let name_location = self.ast.location(name);
let head = self
.ast
.build_node(NodeKind::FunctionHead, (name, parameters))
.with_location(name_location)
.done();
let body = if self.lexer.peek_token()?.kind == TokenKind::Assign {
let _equals = self.lexer.next_token();
self.parse_expression(0)?
} else {
NodeId::EMPTY
};
Ok(self
.ast
.build_node(NodeKind::Func, (head, body))
.with_location(func_token.location)
.done())
}
fn parse_break_like(&mut self, token: Token, kind: NodeKind) -> Result<NodeId, Error> {
let next_token = self.lexer.peek_token()?;
let result = if next_token.location.line > token.location.line
|| matches!(next_token.kind, TokenKind::End)
{
NodeId::EMPTY
} else {
self.parse_expression(0)?
};
Ok(self.ast.build_node(kind, result).with_location(token.location).done())
}
fn parse_struct(&mut self, struct_token: Token) -> Result<NodeId, Error> {
let name = self.lexer.next_token()?;
let name = self.parse_identifier(name)?;
Ok(self.ast.build_node(NodeKind::Struct, name).with_location(struct_token.location).done())
}
fn parse_as(&mut self, token: Token) -> Result<NodeId, Error> {
let implementee = self.parse_expression(0)?;
let mut items = Vec::new();
self.parse_terminated_block(&token, &mut items, |k| k == &TokenKind::End)?;
let _end = self.lexer.next_token()?;
Ok(self
.ast
.build_node(NodeKind::ImplAs, implementee)
.with_location(token.location)
.with_children(items)
.done())
}
fn parse_trait(&mut self, trait_token: Token) -> Result<NodeId, Error> {
let name = self.lexer.next_token()?;
let name = self.parse_identifier(name)?;
let mut items = Vec::new();
self.parse_terminated_block(&trait_token, &mut items, |k| k == &TokenKind::End)?;
let _end = self.lexer.next_token()?;
Ok(self
.ast
.build_node(NodeKind::Trait, name)
.with_location(trait_token.location)
.with_children(items)
.done())
}
fn parse_prefix(&mut self, token: Token) -> Result<NodeId, Error> {
match &token.kind {
TokenKind::Nil => Ok(self.parse_unit(token, NodeKind::Nil)),
TokenKind::False => Ok(self.parse_unit(token, NodeKind::False)),
TokenKind::True => Ok(self.parse_unit(token, NodeKind::True)),
TokenKind::Number(_) => Ok(self.parse_number(token)),
TokenKind::String(_) => Ok(self.parse_string(token)),
TokenKind::LongString(_) => self.parse_long_string(token),
TokenKind::Identifier(_) => self.parse_identifier(token),
TokenKind::Minus => self.unary_operator(token, NodeKind::Negate),
TokenKind::Bang => self.unary_operator(token, NodeKind::Not),
TokenKind::At => {
let name = self.lexer.next_token()?;
let name = self.parse_identifier(name)?;
Ok(self.ast.build_node(NodeKind::Field, name).with_location(token.location).done())
}
TokenKind::LeftParen => {
let inner = self.parse_expression(0)?;
if !matches!(self.lexer.next_token()?.kind, TokenKind::RightParen) {
return Err(self.error(&token, ErrorKind::MissingRightParen));
}
Ok(inner)
}
TokenKind::LeftBracket => self.parse_list_or_dict(token),
TokenKind::Do => self.parse_do_block(token),
TokenKind::If => self.parse_if_expression(token),
TokenKind::While => self.parse_while_expression(token),
TokenKind::For => self.parse_for_expression(token),
TokenKind::Break => self.parse_break_like(token, NodeKind::Break),
TokenKind::Return => self.parse_break_like(token, NodeKind::Return),
TokenKind::Func => self.parse_function(token, true),
TokenKind::Struct => self.parse_struct(token),
TokenKind::As => self.parse_as(token),
TokenKind::Trait => self.parse_trait(token),
_ => Err(self.error(&token, ErrorKind::InvalidPrefixToken)),
}
}
fn binary_operator(
&mut self,
left: NodeId,
token: Token,
kind: NodeKind,
) -> Result<NodeId, Error> {
let precedence = Self::precedence(&token.kind)
- (Self::associativity(&token.kind) == Associativity::Right) as i8;
let right = self.parse_expression(precedence)?;
Ok(self.ast.build_node(kind, (left, right)).with_location(token.location).done())
}
fn function_call(&mut self, left: NodeId, left_paren: Token) -> Result<NodeId, Error> {
let mut arguments = Vec::new();
self.parse_comma_separated(&mut arguments, TokenKind::RightParen, |p| {
p.parse_expression(0)
})?;
Ok(self
.ast
.build_node(NodeKind::Call, left)
.with_location(left_paren.location)
.with_children(arguments)
.done())
}
fn parse_impl(&mut self, left: NodeId, token: Token) -> Result<NodeId, Error> {
let mut items = Vec::new();
self.parse_terminated_block(&token, &mut items, |k| k == &TokenKind::End)?;
let _end = self.lexer.next_token()?;
Ok(self
.ast
.build_node(NodeKind::Impl, left)
.with_location(token.location)
.with_children(items)
.done())
}
fn parse_infix(&mut self, left: NodeId, token: Token) -> Result<NodeId, Error> {
match &token.kind {
TokenKind::Plus => self.binary_operator(left, token, NodeKind::Add),
TokenKind::Minus => self.binary_operator(left, token, NodeKind::Subtract),
TokenKind::Star => self.binary_operator(left, token, NodeKind::Multiply),
TokenKind::Slash => self.binary_operator(left, token, NodeKind::Divide),
TokenKind::And => self.binary_operator(left, token, NodeKind::And),
TokenKind::Or => self.binary_operator(left, token, NodeKind::Or),
TokenKind::Equal => self.binary_operator(left, token, NodeKind::Equal),
TokenKind::NotEqual => self.binary_operator(left, token, NodeKind::NotEqual),
TokenKind::Less => self.binary_operator(left, token, NodeKind::Less),
TokenKind::Greater => self.binary_operator(left, token, NodeKind::Greater),
TokenKind::LessEqual => self.binary_operator(left, token, NodeKind::LessEqual),
TokenKind::GreaterEqual => self.binary_operator(left, token, NodeKind::GreaterEqual),
TokenKind::Assign => self.binary_operator(left, token, NodeKind::Assign),
TokenKind::Dot => self.binary_operator(left, token, NodeKind::Dot),
TokenKind::LeftParen => self.function_call(left, token),
TokenKind::Impl => self.parse_impl(left, token),
_ => Err(self.error(&token, ErrorKind::InvalidInfixToken)),
}
}
fn is_invalid_continuation_token(token: &TokenKind) -> bool {
matches!(token, TokenKind::LeftParen)
}
fn parse_expression(&mut self, precedence: i8) -> Result<NodeId, Error> {
let mut token = self.lexer.next_token()?;
let mut left = self.parse_prefix(token)?;
while precedence < Self::precedence(&self.lexer.peek_token()?.kind) {
let next_token = self.lexer.peek_token()?;
if Self::is_invalid_continuation_token(&next_token.kind)
&& next_token.location.line > self.ast.location(left).line
{
break;
}
token = self.lexer.next_token()?;
left = self.parse_infix(left, token)?;
}
Ok(left)
}
fn parse_item(&mut self) -> Result<NodeId, Error> {
let token = self.lexer.peek_token()?;
match &token.kind {
TokenKind::Func => {
let func_token = self.lexer.next_token()?;
self.parse_function(func_token, false)
}
_ => self.parse_expression(0),
}
}
pub fn parse(mut self) -> Result<(Ast, NodeId), Error> {
let first_token = self.lexer.peek_token()?;
let mut main = Vec::new();
loop {
if self.lexer.peek_token()?.kind == TokenKind::Eof {
let main = self
.ast
.build_node(NodeKind::Main, ())
.with_location(first_token.location)
.with_children(main)
.done();
return Ok((self.ast, main));
}
let item = self.parse_item()?;
main.push(item);
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[repr(u8)]
enum Associativity {
Left,
Right,
}