use std::num::IntErrorKind;
use ast::expression::{
Array, Binary, Expression, FunctionCall, Identifier, Index, Selection, Unary,
};
use ast::literal::Literal;
use ast::statement::{
Display, FunctionDeclaration, RepeatForever, RepeatUntil, Return, Set, Statement,
};
use ast::{Program, StatementList};
use lexer::token::{Token, TokenKind};
use lexer::Lexer;
use shared::error::{Error, ErrorKind};
use shared::span::{GetSpan, Span};
pub const VERSION: Option<&str> = std::option_env!("CARGO_PKG_VERSION");
#[cfg(test)]
mod test;
pub struct Parser<'lexer> {
lexer: Lexer<'lexer>,
cur_token: Token,
}
macro_rules! parse_err {
($($arg:tt)*) => {{
Err(shared::error::Error::new(&format!($($arg)*), ErrorKind::SyntaxError))
}}
}
macro_rules! parse_binary_expr {
($self:ident, $downstream_parse_fn:ident, $pattern:pat) => {
let start = $self.cur_token.span.start;
let mut node = $self.$downstream_parse_fn()?;
while matches!(&$self.cur_token.kind, $pattern) {
let operator = $self.cur_token.kind.clone();
$self.next_token();
let right = $self.$downstream_parse_fn()?;
let span = Span::new(start, $self.cur_token.span.end);
node = Expression::Binary(Binary {
operator,
left: Box::new(node),
right: Box::new(right),
span,
});
}
let result: Result<Expression, Error> = Ok(node);
return result;
};
}
impl<'lexer> Parser<'lexer> {
pub fn new(mut lexer: Lexer<'lexer>) -> Self {
let cur = lexer.next_token();
Parser {
lexer,
cur_token: cur,
}
}
fn next_token(&mut self) {
self.cur_token = self.lexer.next_token();
}
#[inline]
fn cur_token_is(&self, token_kind: &TokenKind) -> bool {
self.cur_token.kind == *token_kind
}
fn eat(&mut self, expected_kind: &TokenKind) -> Result<(), Error> {
if self.cur_token_is(expected_kind) {
self.next_token();
Ok(())
} else {
parse_err!("expected {}, got {}", expected_kind, self.cur_token.kind)
}
}
pub fn parse_program(&mut self) -> Result<Program, Error> {
let mut program = Program::new();
if self.cur_token_is(&TokenKind::Eof) {
return Ok(program);
} else {
program.statements = self.parse_statement_list()?.statements;
program.span.end = self.cur_token.span.end;
self.eat(&TokenKind::Eof)?;
return Ok(program);
}
}
fn parse_statement_list(&mut self) -> Result<StatementList, Error> {
let mut list = StatementList::new();
while matches!(self.cur_token.kind, TokenKind::NewLine) {
self.next_token();
}
loop {
list.statements.push(self.parse_statement()?);
if matches!(
self.cur_token.kind,
TokenKind::End | TokenKind::Eof | TokenKind::Otherwise
) {
break; }
if !matches!(
self.cur_token.kind,
TokenKind::Semicolon | TokenKind::NewLine
) {
return parse_err!("expected ';' or newline between statements");
}
while matches!(
self.cur_token.kind,
TokenKind::Semicolon | TokenKind::NewLine
) {
self.next_token();
}
if matches!(
self.cur_token.kind,
TokenKind::End | TokenKind::Eof | TokenKind::Otherwise
) {
break;
}
}
Ok(list)
}
fn parse_statement(&mut self) -> Result<Statement, Error> {
match self.cur_token.kind {
TokenKind::Set => self.parse_set_stmt(),
TokenKind::Return => self.parse_return_stmt(),
TokenKind::DefineFunction => self.parse_fn_decl_stmt(),
TokenKind::Repeat => self.parse_repeat_stmt(),
TokenKind::Display => self.parse_display_stmt(),
_ => self.parse_expr_stmt(),
}
}
fn parse_set_stmt(&mut self) -> Result<Statement, Error> {
let start = self.cur_token.span.start;
self.eat(&TokenKind::Set)?;
let ident = self.parse_identifier()?;
self.eat(&TokenKind::Assign)?;
let expr = self.parse_expression()?;
let span = Span::new(start, self.cur_token.span.end);
return Ok(Statement::Set(Set { ident, expr, span }));
}
fn parse_return_stmt(&mut self) -> Result<Statement, Error> {
let start = self.cur_token.span.start;
self.eat(&TokenKind::Return)?;
let return_value = self.parse_expression()?;
let span = Span::new(start, self.cur_token.span.end);
return Ok(Statement::Return(Return {
value: return_value,
span,
}));
}
fn parse_fn_decl_stmt(&mut self) -> Result<Statement, Error> {
let start = self.cur_token.span.start;
self.eat(&TokenKind::DefineFunction)?;
let name = self.parse_identifier()?;
self.eat(&TokenKind::LParen)?;
let parameters = self.parse_fn_decl_parameters()?;
self.eat(&TokenKind::RParen)?;
let body = if self.cur_token_is(&TokenKind::End) {
self.create_empty_statement_list()
} else {
self.parse_statement_list()?
};
let end = self.cur_token.span.end;
self.eat(&TokenKind::End)?;
let span = Span::new(start, end);
return Ok(Statement::FunctionDeclaration(FunctionDeclaration {
name,
parameters,
body,
span,
}));
}
fn parse_fn_decl_parameters(&mut self) -> Result<Vec<Identifier>, Error> {
if self.cur_token_is(&TokenKind::RParen) {
return Ok(vec![]);
} else {
let mut parameters: Vec<Identifier> = Vec::new();
let param = self.parse_fn_decl_parameter()?;
parameters.push(param);
while self.cur_token_is(&TokenKind::Comma) {
self.eat(&TokenKind::Comma)?;
let param = self.parse_fn_decl_parameter()?;
parameters.push(param);
}
return Ok(parameters);
}
}
fn parse_fn_decl_parameter(&mut self) -> Result<Identifier, Error> {
let prev_token_kind = self.cur_token.kind.clone();
self.parse_identifier().map_err(|mut err| {
err.message = format!("expected function parameter, got '{}'", prev_token_kind);
return err;
})
}
fn parse_display_stmt(&mut self) -> Result<Statement, Error> {
let start = self.cur_token.span.start;
self.eat(&TokenKind::Display)?;
let expressions = match self.cur_token.kind {
TokenKind::NewLine | TokenKind::Semicolon => {
return parse_err!("empty display statement");
}
_ => self.parse_expr_list()?,
};
let span = Span::new(start, self.cur_token.span.end);
return Ok(Statement::Display(Display { expressions, span }));
}
fn parse_repeat_stmt(&mut self) -> Result<Statement, Error> {
let start = self.cur_token.span.start;
self.eat(&TokenKind::Repeat)?;
let statement = match &self.cur_token.kind {
TokenKind::Until => self.parse_repeat_until(start)?,
TokenKind::Forever => self.parse_repeat_forever(start)?,
_ => self.parse_repeat_n_times(start)?,
};
return Ok(statement);
}
fn parse_repeat_until(&mut self, start: usize) -> Result<Statement, Error> {
self.eat(&TokenKind::Until)?;
let expression = self.parse_expression()?;
let statements = match self.cur_token.kind {
TokenKind::End => self.create_empty_statement_list(),
_ => self.parse_statement_list()?,
};
let span = Span::new(start, self.cur_token.span.end);
self.eat(&TokenKind::End)?;
return Ok(Statement::RepeatUntil(RepeatUntil {
condition: expression,
body: statements,
span,
}));
}
fn parse_repeat_forever(&mut self, start: usize) -> Result<Statement, Error> {
self.eat(&TokenKind::Forever)?;
let statements = match self.cur_token.kind {
TokenKind::End => self.create_empty_statement_list(),
_ => self.parse_statement_list()?,
};
let span = Span::new(start, self.cur_token.span.end);
self.eat(&TokenKind::End)?;
return Ok(Statement::RepeatForever(RepeatForever {
body: statements,
span,
}));
}
fn parse_repeat_n_times(&mut self, start: usize) -> Result<Statement, Error> {
let n = self.parse_expression()?;
self.eat(&TokenKind::Times)?;
let statements = match self.cur_token.kind {
TokenKind::End => self.create_empty_statement_list(),
_ => self.parse_statement_list()?,
};
let span = Span::new(start, self.cur_token.span.end);
self.eat(&TokenKind::End)?;
return Ok(Statement::RepeatNTimes(ast::statement::RepeatNTimes {
n,
body: statements,
span,
}));
}
fn parse_expr_stmt(&mut self) -> Result<Statement, Error> {
let expr = self.parse_expression()?;
return Ok(Statement::Expression(expr));
}
fn parse_expression(&mut self) -> Result<Expression, Error> {
self.parse_or_expr()
}
fn parse_or_expr(&mut self) -> Result<Expression, Error> {
parse_binary_expr!(self, parse_and_expr, TokenKind::Or);
}
fn parse_and_expr(&mut self) -> Result<Expression, Error> {
parse_binary_expr!(self, parse_eq_expr, TokenKind::And);
}
fn parse_eq_expr(&mut self) -> Result<Expression, Error> {
parse_binary_expr!(self, parse_comp_expr, TokenKind::Eq | TokenKind::NotEq);
}
fn parse_comp_expr(&mut self) -> Result<Expression, Error> {
parse_binary_expr!(
self,
parse_sum_expr,
TokenKind::Lt | TokenKind::LtEq | TokenKind::Gt | TokenKind::GtEq
);
}
fn parse_sum_expr(&mut self) -> Result<Expression, Error> {
parse_binary_expr!(self, parse_product_expr, TokenKind::Plus | TokenKind::Minus);
}
fn parse_product_expr(&mut self) -> Result<Expression, Error> {
parse_binary_expr!(
self,
parse_postfix_expr,
TokenKind::Mult | TokenKind::Div | TokenKind::Mod
);
}
fn parse_postfix_expr(&mut self) -> Result<Expression, Error> {
let start = self.cur_token.span.start;
let mut node = self.parse_prefix_expr()?;
loop {
match &self.cur_token.kind {
TokenKind::LBracket => {
self.eat(&TokenKind::LBracket)?;
let index_expr = self.parse_expression()?;
self.eat(&TokenKind::RBracket)?;
let span = Span::new(start, self.cur_token.span.end);
node = Expression::Index(Index {
object: Box::new(node),
index: Box::new(index_expr),
span,
})
}
TokenKind::LParen => {
self.eat(&TokenKind::LParen)?;
let arguments = self
.parse_expr_list_maybe_empty(&TokenKind::RParen)
.map_err(|mut err| {
err.message =
format!("expected function parameter, got {}", self.cur_token.kind);
err
})?;
self.eat(&TokenKind::RParen)?;
let span = Span::new(start, self.cur_token.span.end);
node = Expression::FunctionCall(FunctionCall {
callee: Box::new(node),
arguments,
span,
})
}
_ => break,
};
}
return Ok(node);
}
fn parse_prefix_expr(&mut self) -> Result<Expression, Error> {
let mut prefix_operations = Vec::new();
while matches!(&self.cur_token.kind, TokenKind::Not | TokenKind::Minus) {
prefix_operations.push(self.cur_token.clone());
self.next_token();
}
let mut node: Expression = self.parse_ident_expr()?;
for operation in prefix_operations.into_iter().rev() {
let operator = operation.kind.clone();
let span = Span::new(operation.span.start, node.span().end);
node = Expression::Unary(Unary {
operator,
operand: Box::new(node),
span,
})
}
return Ok(node);
}
fn parse_ident_expr(&mut self) -> Result<Expression, Error> {
match &self.cur_token.kind.clone() {
TokenKind::Identifier { .. } => Ok(Expression::Identifier(self.parse_identifier()?)),
_ => self.parse_group_expr(),
}
}
fn parse_group_expr(&mut self) -> Result<Expression, Error> {
if self.cur_token_is(&TokenKind::LParen) {
self.eat(&TokenKind::LParen)?;
let expr = self.parse_expression()?;
self.eat(&TokenKind::RParen)?;
return Ok(expr);
} else {
return self.parse_entity_expr();
}
}
fn parse_entity_expr(&mut self) -> Result<Expression, Error> {
match &self.cur_token.kind {
TokenKind::If => return self.parse_selection_expr(),
TokenKind::LBracket => return self.parse_array_expr(),
_ => {
let literal = self.parse_literal()?;
return Ok(Expression::Literal(literal));
}
}
}
fn parse_array_expr(&mut self) -> Result<Expression, Error> {
let start = self.cur_token.span.start;
self.eat(&TokenKind::LBracket)?;
let elements = self.parse_expr_list_maybe_empty(&TokenKind::RBracket)?;
self.eat(&TokenKind::RBracket)?;
let span = Span::new(start, self.cur_token.span.end);
return Ok(Expression::Array(Array { elements, span }));
}
fn parse_selection_expr(&mut self) -> Result<Expression, Error> {
let start = self.cur_token.span.start;
self.eat(&TokenKind::If)?;
let condition_expr = self.parse_expression()?;
self.eat(&TokenKind::Then)?;
let conditional_statements = self.parse_selection_if_body()?;
let else_conditional_block = self.parse_selection_else_body()?;
let end = self.cur_token.span.end;
self.eat(&TokenKind::End)?;
let span = Span::new(start, end);
return Ok(Expression::Selection(Selection {
condition: Box::new(condition_expr),
conditional: conditional_statements,
else_conditional: else_conditional_block,
span,
}));
}
fn parse_selection_if_body(&mut self) -> Result<StatementList, Error> {
match matches!(self.cur_token.kind, TokenKind::End | TokenKind::Otherwise) {
false => self.parse_statement_list(),
true => Ok(self.create_empty_statement_list()),
}
}
fn parse_selection_else_body(&mut self) -> Result<Option<StatementList>, Error> {
if self.cur_token_is(&TokenKind::Otherwise) {
self.eat(&TokenKind::Otherwise)?;
let else_conditional_statements = match matches!(self.cur_token.kind, TokenKind::End) {
false => self.parse_statement_list()?,
true => self.create_empty_statement_list(),
};
Ok(Some(else_conditional_statements))
} else {
Ok(None)
}
}
fn parse_literal(&mut self) -> Result<Literal, Error> {
let span = self.cur_token.span.clone();
match &self.cur_token.kind.clone() {
TokenKind::Int(n) => {
return self.parse_integer_literal(n);
}
TokenKind::Float(n) => {
return self.parse_float_literal(n);
}
TokenKind::True => {
self.next_token();
return Ok(Literal::Boolean { value: true, span });
}
TokenKind::False => {
self.next_token();
return Ok(Literal::Boolean { value: false, span });
}
TokenKind::String(string) => {
self.next_token();
return Ok(Literal::String {
value: string.to_owned(),
span,
});
}
_ => parse_err!("expected Literal, got {}", self.cur_token.kind),
}
}
fn parse_integer_literal(&mut self, integer_to_parse: &String) -> Result<Literal, Error> {
let span = self.cur_token.span.clone();
self.next_token();
match integer_to_parse.parse::<i64>() {
Ok(n) => Ok(Literal::Integer { value: n, span }),
Err(err) => match err.kind() {
IntErrorKind::PosOverflow => Err(Error::new(
&format!(
"literal to large for type Integer, whose maximum value is `{}`",
i64::MAX
),
ErrorKind::OverflowError,
)),
_ => parse_err!("failed to parse literal into Integer"),
},
}
}
fn parse_float_literal(&mut self, float_to_parse: &String) -> Result<Literal, Error> {
let span = self.cur_token.span.clone();
self.next_token();
match float_to_parse.parse::<f64>() {
Ok(n) => Ok(Literal::Float { value: n, span }),
Err(_) => {
return parse_err!("failed to parse literal into Float");
}
}
}
fn parse_identifier(&mut self) -> Result<Identifier, Error> {
let span = self.cur_token.span.clone();
let name = match &self.cur_token.kind {
TokenKind::Identifier { name } => name.to_string(),
_ => return parse_err!("expected Identifier, got {}", self.cur_token.kind),
};
self.next_token();
Ok(Identifier { name, span })
}
fn parse_expr_list_maybe_empty(
&mut self,
ending_token: &TokenKind,
) -> Result<Vec<Expression>, Error> {
if self.cur_token_is(ending_token) {
return Ok(vec![]);
} else {
return self.parse_expr_list();
};
}
fn parse_expr_list(&mut self) -> Result<Vec<Expression>, Error> {
let mut expressions: Vec<Expression> = Vec::new();
expressions.push(self.parse_expression()?);
while self.cur_token_is(&TokenKind::Comma) {
self.eat(&TokenKind::Comma)?;
expressions.push(self.parse_expression()?);
}
return Ok(expressions);
}
fn create_empty_statement_list(&self) -> StatementList {
StatementList {
statements: vec![],
span: Span::new(self.cur_token.span.start, self.cur_token.span.start),
}
}
}
pub fn parse(input: &str) -> Result<Program, Error> {
let lexer = Lexer::new(input);
let mut parser = Parser::new(lexer);
return parser.parse_program();
}