use crate::error;
use crate::expression::Expression;
use crate::expression::MemOffset;
use crate::statement::Statement;
use crate::token::Token;
use crate::token::TokenType;
pub struct Parser {
tokens: Vec<Token>,
}
impl Parser {
pub fn new(tokens: Vec<Token>) -> Parser {
Parser { tokens }
}
pub fn parse(&self) -> Result<Vec<Statement>, String> {
let mut statements = vec![];
let mut statement_tokens: Vec<&Token> = vec![];
for token in self.tokens.iter() {
match token.get_type() {
TokenType::Colon => {
if statement_tokens.len() != 0 {
let line_number = statement_tokens[0].get_line_number();
statements.push(Self::parse_label(&statement_tokens, line_number)?);
statement_tokens.clear();
}
}
TokenType::LineBreak => {
if statement_tokens.len() != 0 {
let line_number = statement_tokens[0].get_line_number();
statements.push(Self::parse_ops(&statement_tokens, line_number)?);
statement_tokens.clear()
}
}
_ => statement_tokens.push(token),
}
}
if statement_tokens.len() != 0 {
let line_number = statement_tokens[0].get_line_number();
statements.push(Self::parse_ops(&statement_tokens, line_number)?);
statement_tokens.clear()
}
Ok(statements)
}
fn parse_label(statement_tokens: &Vec<&Token>, line_number: i32) -> Result<Statement, String> {
let mut identifiers = vec![];
for (i, token) in statement_tokens.iter().enumerate() {
if i % 2 == 0 {
match token.get_type() {
TokenType::Identifier => identifiers.push(token.get_lexeme()),
_ => {
return Err(error::error(
token.get_line_number(),
"Syntax error",
"Expected identifier",
))
}
}
} else {
match token.get_type() {
TokenType::Comma => (),
_ => {
return Err(error::error(
token.get_line_number(),
"Syntax error",
"Expected comma",
))
}
}
}
}
let label = Statement::new_label(identifiers, line_number);
Ok(label)
}
fn parse_ops(statement_tokens: &Vec<&Token>, line_number: i32) -> Result<Statement, String> {
let op;
let is_opcode;
let token = statement_tokens.get(0).unwrap();
match token.get_type() {
TokenType::Opcode => {
op = token.get_lexeme();
is_opcode = true;
}
TokenType::Directive => {
op = token.get_lexeme();
is_opcode = false;
}
_ => {
return Err(error::error(
token.get_line_number(),
"Syntax error",
format!("Unknown opcode or directive `{}`.", token.get_lexeme()).as_str(),
))
}
}
let mut arguments = vec![];
let mut expression_tokens = vec![];
for token in statement_tokens.iter().skip(1) {
match token.get_type() {
TokenType::Comma => {
if expression_tokens.len() != 0 {
arguments.push(Self::parse_expression(&mut expression_tokens)?);
expression_tokens.clear();
}
}
_ => expression_tokens.push(token),
}
}
if expression_tokens.len() != 0 {
arguments.push(Self::parse_expression(&mut expression_tokens)?);
}
if is_opcode {
Ok(Statement::new_operation(op, arguments, line_number))
} else {
Ok(Statement::new_directive(op, arguments, line_number))
}
}
fn parse_expression(expression_tokens: &mut Vec<&Token>) -> Result<Expression, String> {
let mut fail = false;
let mut err_msg = "Only literal and memory addressing expressions are supported";
let ln = expression_tokens[0].get_line_number();
if expression_tokens.len() == 1 {
let token = expression_tokens[0];
let lxm = token.get_lexeme();
let ln = token.get_line_number();
match token.get_type() {
TokenType::Number(n) => return Ok(Expression::new_num(n, ln)),
TokenType::String => return Ok(Expression::new_str(lxm, ln)),
TokenType::Register(r) => return Ok(Expression::new_reg(r, ln)),
TokenType::Identifier | TokenType::Directive => {
return Ok(Expression::new_sym(lxm, ln))
}
_ => {
fail = true;
err_msg = "Unexpected argument";
}
};
};
if !fail && expression_tokens.len() == 4 {
if expression_tokens[1].get_type() != TokenType::LeftParantheses
|| expression_tokens[3].get_type() != TokenType::RightParantheses
{
fail = true;
err_msg = "Expected memory addressing expression but parantheses pair not found";
}
let mut reg = 0;
if let TokenType::Register(r) = expression_tokens[2].get_type() {
reg = r;
} else if !fail {
fail = true;
err_msg = "Expected register identifier between parantheses in memory addressing expression";
}
let token = expression_tokens[0];
let lxm = token.get_lexeme();
let ln = token.get_line_number();
match token.get_type() {
TokenType::Number(n) if !fail => {
return Ok(Expression::new_memaddr(reg, MemOffset::Number(n), ln))
}
TokenType::Identifier if !fail => {
return Ok(Expression::new_memaddr(reg, MemOffset::Symbol(lxm), ln))
}
_ if !fail => {
err_msg = "Expected either number or symbol as memory offset";
}
_ => {}
}
}
Err(error::error(ln, "Parser error", err_msg))
}
}
#[cfg(test)]
mod test {
use core::panic;
use crate::{
expression::ExprType, expression::MemOffset, parser::Parser, scanner::Scanner,
statement::StmtType,
};
#[test]
fn all() {
let source = "
add t0, 16(t1), 2 # operation statement
# with Register, MemAddr, and Number expressions
test: # label statement
.string \"success\" # operation statement with String expression
"
.to_string();
let scanner = Scanner::new(source);
let tokens = scanner.scan_tokens().unwrap();
let parser = Parser::new(tokens);
let statements = parser.parse().unwrap();
assert_eq!(statements.len(), 3);
if let StmtType::Operation(op, args) = &statements[0].get_type() {
assert_eq!(op, "add");
assert_eq!(args.len(), 3);
if let ExprType::RegisterLiteral(n) = &args[0].get_type() {
assert_eq!(*n, 5)
} else {
panic!("Expected register literal!")
}
if let ExprType::MemAddrLiteral(r, MemOffset::Number(n)) = &args[1].get_type() {
assert_eq!(*r, 6);
assert_eq!(*n, 16);
} else {
panic!("Expected memory addressing literal!")
}
if let ExprType::NumberLiteral(n) = &args[2].get_type() {
assert_eq!(*n, 2)
} else {
panic!("Expected number literal")
}
} else {
panic!("Expected operation statement")
}
if let StmtType::Label(identifiers) = &statements[1].get_type() {
assert_eq!(identifiers.len(), 1);
assert_eq!(identifiers[0], "test");
} else {
panic!("Expected label statement!")
}
if let StmtType::Directive(op, args) = &statements[2].get_type() {
assert_eq!(op, ".string");
assert_eq!(args.len(), 1);
if let ExprType::StringLiteral(s) = &args[0].get_type() {
assert_eq!(s, "success")
} else {
panic!("Expected string literal!")
}
} else {
panic!("Expected operation statement")
}
}
}