1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
use crate::ast::AST;
use crate::lexer::Lexer;
use crate::token::Token;
// Top level parser.
// parses linear array of tokens into AST.
pub struct Parser<'a> {
lexer: &'a mut Lexer,
}
impl<'a> Parser<'a> {
// constructor for parser.
pub fn new(lexer: &'a mut Lexer) -> Self {
Self { lexer }
}
// public parse method.
pub fn parse(&mut self) -> Result<AST, String> {
// we start with binding power of 0.
self.expr(0)
}
/// Parses an expression using Operator-Precedence parse (Pratt Parsing)
/// ref : https://en.wikipedia.org/wiki/Operator-precedence_parser
/// # Arguments
/// * min_binding_power - minimum binding power till recursivel parse the expression.
/// # Returns
/// * AST - ast of the expression.
fn expr(&mut self, min_binding_power: u8) -> Result<AST, String> {
// Parsing left hand side of the expression.
let mut left_hand_side = match self.lexer.next_token() {
// if the token is a number we simply create a node out of it.
Token::Number(f) => AST::Node(Token::Number(f)),
// if we reached the end we panic.
Token::Eof => return Err("Unexpected token : EOF".to_string()),
// if its a operator, then it means the operator is a unary.
operator => {
// we get the right binding power of the unary operator.
let ((), right_binding_power) = Parser::prefix_binding_power(operator);
// then recursively parse it.
match self.expr(right_binding_power) {
// save if its safe.
Ok(right_hand_side) => AST::Con(operator, vec![right_hand_side]),
// return if err.
Err(err) => return Err(err),
}
}
};
loop {
// Operator: Infix operator.
let operator = match self.lexer.peek() {
// shouldn't be a number, obviously.
Token::Number(n) => return Err(format!("Expected operator recieved number : {n}")),
// also shouldn't end.
Token::Eof => break,
// take the operator.
op => op,
};
// get the left binding power and right binding power of this infix operator.
let (left_bp, right_bp) = Parser::infix_binding_power(operator);
// ends recursion when the minimum binding power for this
// expr function call is less then left binding power of the current operator.
if left_bp < min_binding_power {
break;
}
// consume operator token.
self.lexer.next_token();
// recurisvely call expr to parse right hand side of the expression.
match self.expr(right_bp) {
Ok(right_hand_side) => {
// create ast.
left_hand_side = AST::Con(operator, vec![left_hand_side, right_hand_side]);
}
Err(err) => return Err(err),
}
}
// return the created AST.
Ok(left_hand_side)
}
/// Gets the infix binding power of a operator.
/// # Arguments
/// * token - the operator token.
/// # Returns
/// * (left, right) - left and right infix binding power of the operator.
fn infix_binding_power(token: Token) -> (u8, u8) {
match token {
Token::Plus => (1, 2),
Token::Minus => (1, 2),
Token::Star => (3, 4),
Token::Slash => (3, 4),
// basically unreachable.
t => panic!("Cannot get infix binding power of {t}"),
}
}
/// Gets the prefix binding power of a unary operators.
/// # Arguments
/// * token - the operator token.
/// # Returns
/// * ((), right) - right prefix binding power of the operator.
fn prefix_binding_power(token: Token) -> ((), u8) {
match token {
Token::Minus | Token::Plus => ((), 5),
// basically unreachable.
t => panic!("Cannot get infix binding power of {t}"),
}
}
}