use crate::front::ast::{self, AstBox};
use crate::front::lexer::Lexer;
use crate::front::span::{Error, Span};
use crate::front::token::{Keyword, Token, TokenKind};
use crate::return_error;
use std::io::Read;
pub struct Parser<T: Read> {
lexer: Lexer<T>,
cur_token: Token,
}
pub type Result = std::result::Result<AstBox, Error>;
macro_rules! read {
($self:ident, $p:path, $prompt:expr) => {{
let Token { span, kind } = &$self.cur_token;
if let $p(v) = kind {
let v = v.clone();
$self.next_token()?;
Ok(v)
} else {
return_error!(span, "expected {}, found {}", $prompt, kind)
}
}};
}
macro_rules! match_token {
{
use $self:ident, $span:ident, $kind:ident;
$($p:pat => $e:expr,)*
? => $default:expr,
$(break if $br_pat:pat $(=> $br_block:block)?,)?
} => {{
let ($span, $kind) = ($self.cur_token.span, &$self.cur_token.kind);
let result = match $self.cur_token.kind {
$($p => $e,)*
_ => $default,
};
match &result {
Err(e) if !e.is_fatal() => {
let mut span = $span;
while !matches!($self.cur_token.kind, $($p)|+) {
$(if matches!($self.cur_token.kind, $br_pat) {
$($br_block)?
break;
})?
match $self.next_token() {
Err(e) if e.is_fatal() => return Err(e),
_ => {}
}
span.update_span($self.cur_token.span);
}
Ok(ast::Error::new_boxed(span))
}
_ => result,
}
}};
}
impl<T: Read> Parser<T> {
pub fn new(lexer: Lexer<T>) -> std::result::Result<Self, Error> {
let mut parser = Self {
lexer,
cur_token: Token::default(),
};
parser.next_token()?;
Ok(parser)
}
pub fn parse_next(&mut self) -> Result {
match_token! {
use self, span, kind;
TokenKind::End => Ok(ast::End::new_boxed(span)),
TokenKind::Keyword(Keyword::Global) => self.parse_global_def(),
TokenKind::Keyword(Keyword::Fun) => self.parse_fun_def(),
TokenKind::Keyword(Keyword::Decl) => self.parse_fun_decl(),
? => return_error!(span, "expected global definition/declaration, found {}", kind),
}
}
fn next_token(&mut self) -> std::result::Result<(), Error> {
self.cur_token = self.lexer.next_token()?;
Ok(())
}
fn span(&self) -> Span {
self.cur_token.span
}
fn parse_global_def(&mut self) -> Result {
let span = self.span();
self.next_token()?;
let name = read!(self, TokenKind::Symbol, "symbol name")?;
self.expect(TokenKind::Other('='))?;
let span_alloc = self.expect(TokenKind::Keyword(Keyword::Alloc))?;
let ty = self.parse_type()?;
self.expect(TokenKind::Other(','))?;
self.parse_init().map(|init| {
let span_last = init.span;
let value = ast::GlobalDecl::new_boxed(span_alloc.into_updated_span(span_last), ty, init);
ast::GlobalDef::new_boxed(span.into_updated_span(span_last), name, value)
})
}
fn parse_fun_def(&mut self) -> Result {
let mut span = self.span();
self.next_token()?;
let name = read!(self, TokenKind::Symbol, "function name")?;
let (params, _) = self.parse_list(|s| {
let name = read!(s, TokenKind::Symbol, "parameter name")?;
s.expect(TokenKind::Other(':'))?;
Ok((name, s.parse_type()?))
})?;
let mut ret = None;
if self.is_token(TokenKind::Other(':')) {
self.next_token()?;
ret = Some(self.parse_type()?);
}
self.expect(TokenKind::Other('{'))?;
let mut bbs = Vec::new();
while !self.is_token(TokenKind::Other('}')) {
bbs.push(self.parse_block()?);
}
span.update_span(self.span());
self.next_token()?;
if bbs.is_empty() {
return_error!(
span,
"expected at least one basic block in function definition"
)
} else {
Ok(ast::FunDef::new_boxed(span, name, params, ret, bbs))
}
}
fn parse_fun_decl(&mut self) -> Result {
let mut span = self.span();
self.next_token()?;
let name = read!(self, TokenKind::Symbol, "function name")?;
let (params, sp) = self.parse_list(|s| s.parse_type())?;
span.update_span(sp);
let mut ret = None;
if self.is_token(TokenKind::Other(':')) {
self.next_token()?;
let ty = self.parse_type()?;
span.update_span(ty.span);
ret = Some(ty);
}
Ok(ast::FunDecl::new_boxed(span, name, params, ret))
}
fn parse_type(&mut self) -> Result {
let Token { span, kind } = &self.cur_token;
match kind {
TokenKind::Keyword(Keyword::I32) => self.parse_int_type(),
TokenKind::Other('[') => self.parse_array_type(),
TokenKind::Other('*') => self.parse_pointer_type(),
TokenKind::Other('(') => self.parse_fun_type(),
_ => return_error!(span, "expected type, found {}", kind),
}
}
fn parse_int_type(&mut self) -> Result {
let span = self.span();
self.next_token()?;
Ok(ast::IntType::new_boxed(span))
}
fn parse_array_type(&mut self) -> Result {
let mut span = self.span();
self.next_token()?;
let base = self.parse_type()?;
self.expect(TokenKind::Other(','))?;
let len = read!(self, TokenKind::Int, "length")? as usize;
span.update_span(self.expect(TokenKind::Other(']'))?);
Ok(ast::ArrayType::new_boxed(span, base, len))
}
fn parse_pointer_type(&mut self) -> Result {
let span = self.span();
self.next_token()?;
self
.parse_type()
.map(|base| ast::PointerType::new_boxed(span.into_updated_span(base.span), base))
}
fn parse_fun_type(&mut self) -> Result {
let mut span = self.span();
let (params, sp) = self.parse_list(|s| s.parse_type())?;
span.update_span(sp);
let mut ret = None;
if self.is_token(TokenKind::Other(':')) {
self.next_token()?;
let ty = self.parse_type()?;
span.update_span(ty.span);
ret = Some(ty);
}
Ok(ast::FunType::new_boxed(span, params, ret))
}
fn parse_block(&mut self) -> Result {
let span = self.span();
let name = read!(self, TokenKind::Symbol, "basic block name")?;
let (params, _) = self.parse_opt_list(|s| {
let name = read!(s, TokenKind::Symbol, "parameter name")?;
s.expect(TokenKind::Other(':'))?;
Ok((name, s.parse_type()?))
})?;
self.expect(TokenKind::Other(':'))?;
let mut stmts = Vec::new();
let mut exit_flag = false;
while !exit_flag {
stmts.push(match_token! {
use self, span, kind;
TokenKind::Symbol(_) => self.parse_symbol_def(),
TokenKind::Keyword(Keyword::Store) => self.parse_store(),
TokenKind::Keyword(Keyword::Call) => self.parse_fun_call(),
TokenKind::Keyword(Keyword::Br) => { exit_flag = true; self.parse_branch() },
TokenKind::Keyword(Keyword::Jump) => { exit_flag = true; self.parse_jump() },
TokenKind::Keyword(Keyword::Ret) => { exit_flag = true; self.parse_return() },
? => return_error!(span, "expected statement, found {}", kind),
break if TokenKind::Other('}') | TokenKind::End => { exit_flag = true; },
}?);
}
Ok(ast::Block::new_boxed(
span.into_updated_span(stmts.last().unwrap().span),
name,
params,
stmts,
))
}
fn parse_symbol_def(&mut self) -> Result {
let span = self.span();
let name = read!(self, TokenKind::Symbol, "symbol name")?;
self.expect(TokenKind::Other('='))?;
let Token { span: sp, kind } = &self.cur_token;
match kind {
TokenKind::Keyword(Keyword::Alloc) => self.parse_mem_decl(),
TokenKind::Keyword(Keyword::Load) => self.parse_load(),
TokenKind::Keyword(Keyword::GetPtr) => self.parse_get_pointer(),
TokenKind::Keyword(Keyword::GetElemPtr) => self.parse_get_element_pointer(),
TokenKind::BinaryOp(_) => self.parse_binary_expr(),
TokenKind::Keyword(Keyword::Call) => self.parse_fun_call(),
_ => return_error!(sp, "expected expression, found {}", kind),
}
.map(|value| ast::SymbolDef::new_boxed(span.into_updated_span(value.span), name, value))
}
fn parse_mem_decl(&mut self) -> Result {
let span = self.span();
self.next_token()?;
self
.parse_type()
.map(|ty| ast::MemDecl::new_boxed(span.into_updated_span(ty.span), ty))
}
fn parse_load(&mut self) -> Result {
let mut span = self.span();
self.next_token()?;
span.update_span(self.span());
read!(self, TokenKind::Symbol, "symbol").map(|symbol| ast::Load::new_boxed(span, symbol))
}
fn parse_store(&mut self) -> Result {
let mut span = self.span();
self.next_token()?;
let value = if let Token {
span,
kind: TokenKind::Symbol(symbol),
} = &self.cur_token
{
let sym = ast::SymbolRef::new_boxed(*span, symbol.clone());
self.next_token()?;
sym
} else {
self.parse_init()?
};
self.expect(TokenKind::Other(','))?;
span.update_span(self.span());
read!(self, TokenKind::Symbol, "symbol")
.map(|symbol| ast::Store::new_boxed(span, value, symbol))
}
fn parse_get_pointer(&mut self) -> Result {
let mut span = self.span();
self.next_token()?;
let symbol = read!(self, TokenKind::Symbol, "symbol")?;
self.expect(TokenKind::Other(','))?;
let value = self.parse_value()?;
span.update_span(value.span);
Ok(ast::GetPointer::new_boxed(span, symbol, value))
}
fn parse_get_element_pointer(&mut self) -> Result {
let mut span = self.span();
self.next_token()?;
let symbol = read!(self, TokenKind::Symbol, "symbol")?;
self.expect(TokenKind::Other(','))?;
let value = self.parse_value()?;
span.update_span(value.span);
Ok(ast::GetElementPointer::new_boxed(span, symbol, value))
}
fn parse_binary_expr(&mut self) -> Result {
let span = self.span();
let op = read!(self, TokenKind::BinaryOp, "binary operator")?;
let lhs = self.parse_value()?;
self.expect(TokenKind::Other(','))?;
self
.parse_value()
.map(|rhs| ast::BinaryExpr::new_boxed(span.into_updated_span(rhs.span), op, lhs, rhs))
}
fn parse_branch(&mut self) -> Result {
let span = self.span();
self.next_token()?;
let cond = self.parse_value()?;
self.expect(TokenKind::Other(','))?;
let tbb = read!(self, TokenKind::Symbol, "basic block name")?;
let (targs, _) = self.parse_opt_list(|s| s.parse_value())?;
self.expect(TokenKind::Other(','))?;
let fbb = read!(self, TokenKind::Symbol, "basic block name")?;
let (fargs, sp) = self.parse_opt_list(|s| s.parse_value())?;
Ok(ast::Branch::new_boxed(
span.into_updated_span(sp),
cond,
tbb,
targs,
fbb,
fargs,
))
}
fn parse_jump(&mut self) -> Result {
let span = self.span();
self.next_token()?;
let target = read!(self, TokenKind::Symbol, "basic block name")?;
let (args, sp) = self.parse_opt_list(|s| s.parse_value())?;
Ok(ast::Jump::new_boxed(
span.into_updated_span(sp),
target,
args,
))
}
fn parse_fun_call(&mut self) -> Result {
let span = self.span();
self.next_token()?;
let fun = read!(self, TokenKind::Symbol, "function name")?;
let (args, sp) = self.parse_list(|s| s.parse_value())?;
Ok(ast::FunCall::new_boxed(
span.into_updated_span(sp),
fun,
args,
))
}
fn parse_return(&mut self) -> Result {
let mut span = self.span();
self.next_token()?;
let mut value = None;
if span.is_in_same_line_as(&self.span()) {
let val = self.parse_value()?;
span.update_span(val.span);
value = Some(val);
}
Ok(ast::Return::new_boxed(span, value))
}
fn parse_value(&mut self) -> Result {
let Token { span, kind } = &self.cur_token;
let ret = match kind {
TokenKind::Symbol(s) => ast::SymbolRef::new_boxed(*span, s.clone()),
TokenKind::Int(i) => ast::IntVal::new_boxed(*span, *i as i32),
TokenKind::Keyword(Keyword::Undef) => ast::UndefVal::new_boxed(*span),
_ => return_error!(span, "expected value, found {}", kind),
};
self.next_token()?;
Ok(ret)
}
fn parse_init(&mut self) -> Result {
let Token { span, kind } = &self.cur_token;
match kind {
TokenKind::Int(i) => {
let ast = ast::IntVal::new_boxed(*span, *i as i32);
self.next_token()?;
Ok(ast)
}
TokenKind::Keyword(Keyword::Undef) => {
let ast = ast::UndefVal::new_boxed(*span);
self.next_token()?;
Ok(ast)
}
TokenKind::Keyword(Keyword::ZeroInit) => {
let ast = ast::ZeroInit::new_boxed(*span);
self.next_token()?;
Ok(ast)
}
TokenKind::Other('{') => self.parse_aggregate(),
_ => return_error!(span, "expected initializer, found {}", kind),
}
}
fn parse_aggregate(&mut self) -> Result {
let span = self.span();
self.expect(TokenKind::Other('{'))?;
let mut elems = vec![self.parse_init()?];
while self.is_token(TokenKind::Other(',')) {
self.next_token()?;
elems.push(self.parse_init()?);
}
Ok(ast::Aggregate::new_boxed(
span.into_updated_span(self.expect(TokenKind::Other('}'))?),
elems,
))
}
fn parse_list<F, U>(&mut self, parser: F) -> std::result::Result<(Vec<U>, Span), Error>
where
F: Fn(&mut Self) -> std::result::Result<U, Error>,
{
self.expect(TokenKind::Other('('))?;
let mut items = Vec::new();
if !self.is_token(TokenKind::Other(')')) {
loop {
items.push(parser(self)?);
if !self.is_token(TokenKind::Other(',')) {
break;
}
self.next_token()?;
}
}
Ok((items, self.expect(TokenKind::Other(')'))?))
}
fn parse_opt_list<F, U>(&mut self, parser: F) -> std::result::Result<(Vec<U>, Span), Error>
where
F: Fn(&mut Self) -> std::result::Result<U, Error>,
{
if self.is_token(TokenKind::Other('(')) {
self.parse_list(parser)
} else {
Ok((Vec::new(), self.span()))
}
}
fn is_token(&self, tk: TokenKind) -> bool {
self.cur_token.kind == tk
}
fn expect(&mut self, tk: TokenKind) -> std::result::Result<Span, Error> {
let Token { span, kind } = &self.cur_token;
if kind == &tk {
let span = *span;
self.next_token()?;
Ok(span)
} else {
return_error!(span, "expected {}, found {}", tk, kind)
}
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::ir::values::BinaryOp;
use std::io::Cursor;
macro_rules! new_ast {
($name:ident { $($field:ident: $value:expr),+ $(,)? }) => {
ast::$name::new_boxed(Span::default(), $($value),+)
};
($name:ident) => {
ast::$name::new_boxed(Span::default())
};
}
#[test]
fn parse_string() {
let mut parser = Parser::new(Lexer::new(Cursor::new(
r#"
global @x = alloc [i32, 10], zeroinit
fun @test(@i: i32): i32 {
%entry:
%0 = getptr @x, 0
store {1, 2, 3, 4, 5, 0, 0, 0, 0, 10}, %0
%1 = getelemptr @x, @i
%2 = load %1
%3 = mul %2, 7
ret %3
}
"#,
)))
.unwrap();
let ast = parser.parse_next().unwrap();
let expected = new_ast!(GlobalDef {
name: "@x".into(),
value: new_ast!(GlobalDecl {
ty: new_ast!(ArrayType {
base: new_ast!(IntType),
len: 10,
}),
init: new_ast!(ZeroInit),
}),
});
assert_eq!(ast, expected);
let ast = parser.parse_next().unwrap();
let expected = new_ast!(FunDef {
name: "@test".into(),
params: vec![("@i".into(), new_ast!(IntType))],
ret: Some(new_ast!(IntType)),
bbs: vec![new_ast!(Block {
name: "%entry".into(),
params: vec![],
stmts: vec![
new_ast!(SymbolDef {
name: "%0".into(),
value: new_ast!(GetPointer {
symbol: "@x".into(),
value: new_ast!(IntVal { value: 0 }),
}),
}),
new_ast!(Store {
value: new_ast!(Aggregate {
elems: [1, 2, 3, 4, 5, 0, 0, 0, 0, 10]
.iter()
.map(|i| new_ast!(IntVal { value: *i }))
.collect()
}),
symbol: "%0".into(),
}),
new_ast!(SymbolDef {
name: "%1".into(),
value: new_ast!(GetElementPointer {
symbol: "@x".into(),
value: new_ast!(SymbolRef {
symbol: "@i".into(),
}),
}),
}),
new_ast!(SymbolDef {
name: "%2".into(),
value: new_ast!(Load {
symbol: "%1".into(),
}),
}),
new_ast!(SymbolDef {
name: "%3".into(),
value: new_ast!(BinaryExpr {
op: BinaryOp::Mul,
lhs: new_ast!(SymbolRef {
symbol: "%2".into(),
}),
rhs: new_ast!(IntVal { value: 7 }),
}),
}),
new_ast!(Return {
value: Some(new_ast!(SymbolRef {
symbol: "%3".into(),
})),
}),
],
})],
});
assert_eq!(ast, expected);
let ast = parser.parse_next().unwrap();
let expected = new_ast!(End);
assert_eq!(ast, expected);
let ast = parser.parse_next().unwrap();
let expected = new_ast!(End);
assert_eq!(ast, expected);
}
#[test]
fn parse_error() {
let mut parser = Parser::new(Lexer::new(Cursor::new(
r#"
global @x = alloc [i32, 10, zeroinit
fun @test(@i: i32): i32 {
%entry:
%0 = getptr @x, 0
store {1, 2, 3, 4, 5, 0, 0, 0, 0, 10}, %
%1 = getelemptr @x, @i
%2 = load %1
%3 = mul , 7
ret %3
}
"#,
)))
.unwrap();
assert_eq!(parser.parse_next().unwrap(), new_ast!(Error));
let ast = parser.parse_next().unwrap();
let expected = new_ast!(FunDef {
name: "@test".into(),
params: vec![("@i".into(), new_ast!(IntType))],
ret: Some(new_ast!(IntType)),
bbs: vec![new_ast!(Block {
name: "%entry".into(),
params: vec![],
stmts: vec![
new_ast!(SymbolDef {
name: "%0".into(),
value: new_ast!(GetPointer {
symbol: "@x".into(),
value: new_ast!(IntVal { value: 0 }),
}),
}),
new_ast!(Error),
new_ast!(SymbolDef {
name: "%1".into(),
value: new_ast!(GetElementPointer {
symbol: "@x".into(),
value: new_ast!(SymbolRef {
symbol: "@i".into(),
}),
}),
}),
new_ast!(SymbolDef {
name: "%2".into(),
value: new_ast!(Load {
symbol: "%1".into(),
}),
}),
new_ast!(Error),
new_ast!(Return {
value: Some(new_ast!(SymbolRef {
symbol: "%3".into(),
})),
}),
],
})],
});
assert_eq!(ast, expected);
assert_eq!(parser.parse_next().unwrap(), new_ast!(End));
assert_eq!(parser.parse_next().unwrap(), new_ast!(End));
}
}