rubbler 0.1.2

Rubbler is a RISC-V assembler written in Rust 🦀. This library was written with the main purpose of embedding a simple RISC-V assembler inside of a RISC-V CPU test bench code written with verilator.
Documentation
use crate::error;
use crate::expression::Expression;
use crate::expression::MemOffset;
use crate::statement::Statement;
use crate::token::Token;
use crate::token::TokenType;

pub struct Parser {
    tokens: Vec<Token>,
}

impl Parser {
    pub fn new(tokens: Vec<Token>) -> Parser {
        Parser { tokens }
    }

    pub fn parse(&self) -> Result<Vec<Statement>, String> {
        let mut statements = vec![];
        let mut statement_tokens: Vec<&Token> = vec![];
        for token in self.tokens.iter() {
            match token.get_type() {
                TokenType::Colon => {
                    if statement_tokens.len() != 0 {
                        let line_number = statement_tokens[0].get_line_number();
                        statements.push(Self::parse_label(&statement_tokens, line_number)?);
                        statement_tokens.clear();
                    }
                }
                TokenType::LineBreak => {
                    if statement_tokens.len() != 0 {
                        let line_number = statement_tokens[0].get_line_number();
                        statements.push(Self::parse_ops(&statement_tokens, line_number)?);
                        statement_tokens.clear()
                    }
                }
                _ => statement_tokens.push(token),
            }
        }
        if statement_tokens.len() != 0 {
            let line_number = statement_tokens[0].get_line_number();
            statements.push(Self::parse_ops(&statement_tokens, line_number)?);
            statement_tokens.clear()
        }
        Ok(statements)
    }

    fn parse_label(statement_tokens: &Vec<&Token>, line_number: i32) -> Result<Statement, String> {
        let mut identifiers = vec![];
        for (i, token) in statement_tokens.iter().enumerate() {
            if i % 2 == 0 {
                match token.get_type() {
                    TokenType::Identifier => identifiers.push(token.get_lexeme()),
                    _ => {
                        return Err(error::error(
                            token.get_line_number(),
                            "Syntax error",
                            "Expected identifier",
                        ))
                    }
                }
            } else {
                match token.get_type() {
                    TokenType::Comma => (),
                    _ => {
                        return Err(error::error(
                            token.get_line_number(),
                            "Syntax error",
                            "Expected comma",
                        ))
                    }
                }
            }
        }
        let label = Statement::new_label(identifiers, line_number);
        Ok(label)
    }

    fn parse_ops(statement_tokens: &Vec<&Token>, line_number: i32) -> Result<Statement, String> {
        let op;
        let is_opcode;
        let token = statement_tokens.get(0).unwrap();
        // Get operation name
        match token.get_type() {
            TokenType::Opcode => {
                op = token.get_lexeme();
                is_opcode = true;
            }
            TokenType::Directive => {
                op = token.get_lexeme();
                is_opcode = false;
            }
            _ => {
                return Err(error::error(
                    token.get_line_number(),
                    "Syntax error",
                    format!("Unknown opcode or directive `{}`.", token.get_lexeme()).as_str(),
                ))
            }
        }
        // Get arguments
        let mut arguments = vec![];
        let mut expression_tokens = vec![];
        for token in statement_tokens.iter().skip(1) {
            match token.get_type() {
                TokenType::Comma => {
                    if expression_tokens.len() != 0 {
                        arguments.push(Self::parse_expression(&mut expression_tokens)?);
                        expression_tokens.clear();
                    }
                }
                _ => expression_tokens.push(token),
            }
        }
        if expression_tokens.len() != 0 {
            arguments.push(Self::parse_expression(&mut expression_tokens)?);
        }
        if is_opcode {
            Ok(Statement::new_operation(op, arguments, line_number))
        } else {
            Ok(Statement::new_directive(op, arguments, line_number))
        }
    }

    /// Parse a vector of Token(s) to an Expression
    ///
    /// # Arguments
    /// * `expression_tokens` - tokens associated with the expression to parse
    fn parse_expression(expression_tokens: &mut Vec<&Token>) -> Result<Expression, String> {
        let mut fail = false;
        let mut err_msg = "Only literal and memory addressing expressions are supported";
        let ln = expression_tokens[0].get_line_number();
        // Parse single token expressions
        // Only numbers, strings, and identifiers are single token expressions
        if expression_tokens.len() == 1 {
            let token = expression_tokens[0];
            let lxm = token.get_lexeme();
            let ln = token.get_line_number();
            match token.get_type() {
                TokenType::Number(n) => return Ok(Expression::new_num(n, ln)),
                TokenType::String => return Ok(Expression::new_str(lxm, ln)),
                TokenType::Register(r) => return Ok(Expression::new_reg(r, ln)),
                TokenType::Identifier | TokenType::Directive => {
                    return Ok(Expression::new_sym(lxm, ln))
                }
                _ => {
                    fail = true;
                    err_msg = "Unexpected argument";
                }
            };
        };
        // Parse memory addressing expression
        // For now, assume if number of expression tokens is equal to 4, it must
        // be a memory addressing expression. This assumption however is not true,
        // since it could be a relocation function
        if !fail && expression_tokens.len() == 4 {
            // The expression must be of the following form: <Number | Identifier>(<Register>)
            // First, check if pair there is a pair of parantheses
            if expression_tokens[1].get_type() != TokenType::LeftParantheses
                || expression_tokens[3].get_type() != TokenType::RightParantheses
            {
                fail = true;
                err_msg = "Expected memory addressing expression but parantheses pair not found";
            }
            // Next, check if the token between the parantheses is an identifier
            let mut reg = 0;
            if let TokenType::Register(r) = expression_tokens[2].get_type() {
                reg = r;
            } else if !fail {
                fail = true;
                err_msg = "Expected register identifier between parantheses in memory addressing expression";
            }
            // Lastly, check if the first token is either a number or an identifier
            let token = expression_tokens[0];
            let lxm = token.get_lexeme();
            let ln = token.get_line_number();
            match token.get_type() {
                TokenType::Number(n) if !fail => {
                    return Ok(Expression::new_memaddr(reg, MemOffset::Number(n), ln))
                }
                TokenType::Identifier if !fail => {
                    return Ok(Expression::new_memaddr(reg, MemOffset::Symbol(lxm), ln))
                }
                _ if !fail => {
                    err_msg = "Expected either number or symbol as memory offset";
                }
                _ => {}
            }
        }
        Err(error::error(ln, "Parser error", err_msg))
    }
}

#[cfg(test)]
mod test {
    use core::panic;

    use crate::{
        expression::ExprType, expression::MemOffset, parser::Parser, scanner::Scanner,
        statement::StmtType,
    };

    #[test]
    fn all() {
        let source = "
        add t0, 16(t1), 2       # operation statement 
                                # with Register, MemAddr, and Number expressions
        test:                   # label statement
            .string \"success\"  # operation statement with String expression
        "
        .to_string();
        let scanner = Scanner::new(source);
        let tokens = scanner.scan_tokens().unwrap();
        let parser = Parser::new(tokens);
        let statements = parser.parse().unwrap();
        assert_eq!(statements.len(), 3);
        // add t0, 16(t1), 2
        if let StmtType::Operation(op, args) = &statements[0].get_type() {
            assert_eq!(op, "add");
            assert_eq!(args.len(), 3);
            if let ExprType::RegisterLiteral(n) = &args[0].get_type() {
                assert_eq!(*n, 5)
            } else {
                panic!("Expected register literal!")
            }
            if let ExprType::MemAddrLiteral(r, MemOffset::Number(n)) = &args[1].get_type() {
                assert_eq!(*r, 6);
                assert_eq!(*n, 16);
            } else {
                panic!("Expected memory addressing literal!")
            }
            if let ExprType::NumberLiteral(n) = &args[2].get_type() {
                assert_eq!(*n, 2)
            } else {
                panic!("Expected number literal")
            }
        } else {
            panic!("Expected operation statement")
        }
        // test:
        if let StmtType::Label(identifiers) = &statements[1].get_type() {
            assert_eq!(identifiers.len(), 1);
            assert_eq!(identifiers[0], "test");
        } else {
            panic!("Expected label statement!")
        }
        // .print \"success\"
        if let StmtType::Directive(op, args) = &statements[2].get_type() {
            assert_eq!(op, ".string");
            assert_eq!(args.len(), 1);
            if let ExprType::StringLiteral(s) = &args[0].get_type() {
                assert_eq!(s, "success")
            } else {
                panic!("Expected string literal!")
            }
        } else {
            panic!("Expected operation statement")
        }
    }
}