use crate::types::{Expression, Keyword, Literal, ParserError, Token, TokenKind};
use unicode_segmentation::UnicodeSegmentation;
use parselets::{
BlockParselet, CallParselet, ConditionalParselet, FieldPatternParselet, InfixOperatorParselet,
InfixParselet, ListParselet, LiteralParselet, MemberParselet, MethodParselet, PairParselet,
PrefixOperatorParselet, PrefixParselet, ReturnParselet, TuplePatternParselet,
VarParselet, VariablePatternParselet,
};
use std::collections::HashMap;
use std::rc::Rc;
pub mod parselets;
pub type ParserResult = Result<Expression, ParserError>;
pub static PREC_ASSIGNMENT: usize = 10;
pub static PREC_PAIR: usize = 15;
pub static PREC_RECORD: usize = 20;
pub static PREC_LOGICAL: usize = 30;
pub static PREC_EQUALITY: usize = 40;
pub static PREC_COMPARISON: usize = 50;
pub static PREC_TERM: usize = 60;
pub static PREC_PRODUCT: usize = 70;
pub static PREC_EXPONENT: usize = 80;
pub static PREC_UNARY: usize = 90;
pub static PREC_CALL: usize = 100;
pub struct Parser {
position: usize,
prefix_parselets: HashMap<TokenKind, &'static dyn PrefixParselet>,
infix_parselets: HashMap<TokenKind, Rc<dyn InfixParselet>>,
tokens: Vec<Token>,
source: Vec<String>,
}
fn infix_operator(precedence: usize) -> Rc<dyn InfixParselet> {
Rc::new(InfixOperatorParselet { precedence }) as Rc<dyn InfixParselet>
}
impl Parser {
pub fn new() -> Self {
let mut prefix_parselets = HashMap::new();
let mut infix_parselets = HashMap::new();
prefix_parselets.insert(
TokenKind::Identifier,
&VariablePatternParselet as &dyn PrefixParselet,
);
prefix_parselets.insert(
TokenKind::Literal(Literal::Int),
&LiteralParselet as &dyn PrefixParselet,
);
prefix_parselets.insert(
TokenKind::Literal(Literal::Float),
&LiteralParselet as &dyn PrefixParselet,
);
prefix_parselets.insert(
TokenKind::Literal(Literal::Boolean),
&LiteralParselet as &dyn PrefixParselet,
);
prefix_parselets.insert(
TokenKind::Literal(Literal::String),
&LiteralParselet as &dyn PrefixParselet,
);
prefix_parselets.insert(
TokenKind::Keyword(Keyword::If),
&ConditionalParselet as &dyn PrefixParselet,
);
prefix_parselets.insert(
TokenKind::Keyword(Keyword::Def),
&MethodParselet as &dyn PrefixParselet,
);
prefix_parselets.insert(
TokenKind::Keyword(Keyword::Var),
&VarParselet as &dyn PrefixParselet,
);
prefix_parselets.insert(
TokenKind::Keyword(Keyword::Return),
&ReturnParselet as &dyn PrefixParselet,
);
prefix_parselets.insert(
TokenKind::Keyword(Keyword::Do),
&BlockParselet as &dyn PrefixParselet,
);
prefix_parselets.insert(
TokenKind::Bang,
&PrefixOperatorParselet as &dyn PrefixParselet,
);
prefix_parselets.insert(
TokenKind::Plus,
&PrefixOperatorParselet as &dyn PrefixParselet,
);
prefix_parselets.insert(
TokenKind::Minus,
&PrefixOperatorParselet as &dyn PrefixParselet,
);
prefix_parselets.insert(
TokenKind::LeftParen,
&TuplePatternParselet as &dyn PrefixParselet,
);
prefix_parselets.insert(TokenKind::LeftBracket, &ListParselet as &dyn PrefixParselet);
infix_parselets.insert(TokenKind::Plus, infix_operator(PREC_TERM));
infix_parselets.insert(TokenKind::Minus, infix_operator(PREC_TERM));
infix_parselets.insert(TokenKind::Star, infix_operator(PREC_PRODUCT));
infix_parselets.insert(TokenKind::Slash, infix_operator(PREC_PRODUCT));
infix_parselets.insert(TokenKind::EqualEqual, infix_operator(PREC_EQUALITY));
infix_parselets.insert(TokenKind::BangEqual, infix_operator(PREC_EQUALITY));
infix_parselets.insert(TokenKind::Greater, infix_operator(PREC_COMPARISON));
infix_parselets.insert(TokenKind::GreaterEqual, infix_operator(PREC_COMPARISON));
infix_parselets.insert(TokenKind::Smaller, infix_operator(PREC_COMPARISON));
infix_parselets.insert(TokenKind::SmallerEqual, infix_operator(PREC_COMPARISON));
infix_parselets.insert(TokenKind::Percent, infix_operator(PREC_PRODUCT));
infix_parselets.insert(TokenKind::Caret, infix_operator(PREC_EXPONENT));
infix_parselets.insert(
TokenKind::Comma,
Rc::new(PairParselet) as Rc<dyn InfixParselet>,
);
infix_parselets.insert(
TokenKind::LeftParen,
Rc::new(CallParselet) as Rc<dyn InfixParselet>,
);
infix_parselets.insert(
TokenKind::Colon,
Rc::new(FieldPatternParselet) as Rc<dyn InfixParselet>,
);
infix_parselets.insert(
TokenKind::Dot,
Rc::new(MemberParselet) as Rc<dyn InfixParselet>,
);
Self {
position: 0,
prefix_parselets,
infix_parselets,
tokens: vec![],
source: vec![],
}
}
pub fn add_tokens(&mut self, source: String, mut tokens: Vec<Token>) {
self.source
.extend(source.graphemes(true).map(String::from));
self.tokens.append(&mut tokens);
}
pub fn parse(&mut self) -> Result<Vec<Expression>, ParserError> {
let mut expressions = vec![];
while !self.eof() {
expressions.push(self.parse_expression(0)?);
}
Ok(expressions)
}
pub fn get_lexeme(&self, start: usize, end: usize) -> Result<String, ParserError> {
if end <= self.source.len() {
Ok(self.source[start..end].concat())
} else {
Err(ParserError::UnexpectedEOF)
}
}
pub fn parse_expression(&mut self, precedence: usize) -> Result<Expression, ParserError> {
let token = self.consume();
let start_pos = token.start_pos;
let mut end_pos = token.end_pos;
if let Some(prefix) = self.prefix_parselets.get(&token.kind) {
let mut left = prefix.parse(self, token.clone())?;
if self.eof() {
end_pos = token.end_pos;
return Ok(Expression {
kind: left.kind,
start_pos,
end_pos,
});
}
while !self.eof() && precedence < self.get_precedence()? {
let token = self.peek()?;
end_pos = token.end_pos;
if let Some(infix) = self.infix_parselets.get(&token.kind).cloned() {
left = infix.parse(self, Box::new(left.clone()), token)?;
}
}
Ok(Expression {
kind: left.kind,
start_pos,
end_pos,
})
} else {
return Err(ParserError::MissingPrefixParselet(token.clone().kind));
}
}
fn get_precedence(&self) -> Result<usize, ParserError> {
if let Some(infix) = self.infix_parselets.get(&self.peek()?.kind) {
Ok(infix.get_precedence())
} else {
Ok(0)
}
}
fn consume(&mut self) -> Token {
let token = self.tokens[self.position].clone();
self.advance();
token
}
fn consume_expect(&mut self, kind: TokenKind) -> Result<Token, ParserError> {
if self.eof() {
return Err(ParserError::UnexpectedEOF);
}
let token = self.tokens[self.position].clone();
if token.kind == kind {
self.advance();
Ok(token)
} else {
Err(ParserError::UnexpectedToken {
expected: kind,
found: token,
})
}
}
fn advance(&mut self) {
if !self.eof() {
self.position += 1;
}
}
fn peek(&self) -> Result<Token, ParserError> {
if !self.eof() {
Ok(self.tokens[self.position].clone())
} else {
Err(ParserError::UnexpectedEOF)
}
}
fn eof(&self) -> bool {
self.position == self.tokens.len()
}
}
#[cfg(test)]
mod tests {
use crate::lexer::Lexer;
use crate::parser::Parser;
use crate::types::*;
#[test]
fn parse_infix_plus() {
let mut parser = Parser::new();
let mut lexer = Lexer::new();
lexer.add_text("1 + 2".to_string());
parser.add_tokens("1 + 2".to_string(), lexer.parse());
assert_eq!(
parser.parse(),
Ok(vec![Expression {
kind: ExpressionKind::Infix(Infix {
left: Box::new(Expression {
kind: ExpressionKind::Literal(Literal::Int,),
start_pos: 0,
end_pos: 1,
}),
operator: Token {
kind: TokenKind::Plus,
line: 1,
start_pos: 2,
end_pos: 3,
},
right: Box::new(Expression {
kind: ExpressionKind::Literal(Literal::Int,),
start_pos: 4,
end_pos: 5,
}),
}),
start_pos: 2,
end_pos: 3,
}])
);
}
}