rubbler 0.1.2

Rubbler is a RISC-V assembler written in Rust 🦀. This library was written with the main purpose of embedding a simple RISC-V assembler inside of a RISC-V CPU test bench code written with verilator.
Documentation
use std::collections::HashMap;

use crate::{
    directives,
    error::error,
    statement::{Statement, StmtType},
};

/// Context stores code, data, and symbol table
pub struct Context {
    curr_section: Section,
    text_addr: usize,
    data_addr: usize,
    rodata_addr: usize,
    bss_addr: usize,
    text_syms: HashMap<String, usize>,
    data_syms: HashMap<String, usize>,
    rodata_syms: HashMap<String, usize>,
    bss_syms: HashMap<String, usize>,
    constants: HashMap<String, i32>,
}

pub enum Section {
    Text,
    Data,
    ROData,
    BSS,
}

impl Context {
    pub fn new() -> Context {
        Context {
            curr_section: Section::Text,
            text_addr: 0,
            data_addr: 0,
            rodata_addr: 0,
            bss_addr: 0,
            text_syms: HashMap::new(),
            data_syms: HashMap::new(),
            rodata_syms: HashMap::new(),
            bss_syms: HashMap::new(),
            constants: HashMap::new(),
        }
    }
    pub fn inc_addr(&mut self, amount: usize) {
        match self.curr_section {
            Section::Text => self.text_addr += amount,
            Section::Data => self.data_addr += amount,
            Section::ROData => self.rodata_addr += amount,
            Section::BSS => self.bss_addr += amount,
        }
    }
    pub fn align_addr(&mut self, alignment: usize) {
        let curr_addr = self.get_addr();
        let offset = (alignment - curr_addr % alignment) % alignment;
        self.inc_addr(offset);
    }
    pub fn to_text(&mut self) {
        self.curr_section = Section::Text;
    }
    pub fn to_data(&mut self) {
        self.curr_section = Section::Data;
    }
    pub fn to_rodata(&mut self) {
        self.curr_section = Section::ROData;
    }
    pub fn to_bss(&mut self) {
        self.curr_section = Section::BSS;
    }
    pub fn emit_sym(&mut self, sym: String) {
        match self.curr_section {
            Section::Text => self.text_syms.insert(sym, self.text_addr),
            Section::Data => self.data_syms.insert(sym, self.data_addr),
            Section::ROData => self.rodata_syms.insert(sym, self.rodata_addr),
            Section::BSS => self.bss_syms.insert(sym, self.bss_addr),
        };
    }
    pub fn emit_const(&mut self, name: String, value: i32) {
        self.constants.insert(name, value);
    }
    pub fn resolve_sym(&self, sym: &str) -> Option<usize> {
        if let Some(rel_addr) = self.text_syms.get(sym) {
            Some(*rel_addr)
        } else if let Some(rel_addr) = self.rodata_syms.get(sym) {
            let addr = self.text_size() + *rel_addr;
            Some(addr)
        } else if let Some(rel_addr) = self.data_syms.get(sym) {
            let addr = self.text_size() + self.rodata_size() + *rel_addr;
            Some(addr)
        } else if let Some(rel_addr) = self.bss_syms.get(sym) {
            let addr = self.text_size() + self.rodata_size() + self.data_size() + *rel_addr;
            Some(addr)
        } else {
            None
        }
    }
    pub fn resolve_const(&self, name: &str) -> Option<i32> {
        self.constants.get(name).copied()
    }
    pub fn text_size(&self) -> usize {
        self.text_addr
    }
    pub fn rodata_size(&self) -> usize {
        self.rodata_addr
    }
    pub fn data_size(&self) -> usize {
        self.data_addr
    }
    pub fn get_addr(&self) -> usize {
        match self.curr_section {
            Section::Text => self.text_addr,
            Section::Data => self.data_addr,
            Section::ROData => self.rodata_addr,
            Section::BSS => self.bss_addr,
        }
    }
}

pub struct Analyzer {
    ctx: Context,
    stmts: Vec<Statement>,
}

impl Analyzer {
    pub fn new(stmts: Vec<Statement>) -> Analyzer {
        Analyzer {
            ctx: Context::new(),
            stmts,
        }
    }
    pub fn analyze(mut self) -> Result<(Vec<Statement>, Context), String> {
        for stmt in self.stmts.iter() {
            let ln = stmt.get_line_number();
            match stmt.get_type() {
                StmtType::Operation(..) => self.ctx.inc_addr(4),
                StmtType::Directive(dir, args) => {
                    directives::execute_directive(dir, args, &mut self.ctx)
                        .map_err(|e| error(ln, "Analyzer error", &e))?;
                }
                StmtType::Label(syms) => {
                    for sym in syms {
                        self.ctx.emit_sym(sym.to_string())
                    }
                }
            }
        }
        Ok((self.stmts, self.ctx))
    }
}

#[cfg(test)]
mod test {
    use super::Analyzer;
    use crate::{parser::Parser, scanner::Scanner, statement::Statement};

    fn source_to_stmts(source: &str) -> Vec<Statement> {
        let scanner = Scanner::new(source.to_string());
        let parser = Parser::new(scanner.scan_tokens().unwrap());
        parser.parse().unwrap()
    }

    #[test]
    fn operation_stmt() {
        let source = "add";
        let stmts = source_to_stmts(source);
        let analyzer = Analyzer::new(stmts);
        let (_, ctx) = analyzer.analyze().unwrap();
        assert_eq!(ctx.get_addr(), 4);
    }
    #[test]
    fn comm_align() {
        let source = "
        .comm fuad, 5, 4
        .comm ismail, 5, 4
        ";
        let stmts = source_to_stmts(source);
        let analyzer = Analyzer::new(stmts);
        let (_, ctx) = analyzer.analyze().unwrap();
        assert_eq!(ctx.get_addr(), 13);
        assert_eq!(ctx.resolve_sym("fuad"), Some(0));
        assert_eq!(ctx.resolve_sym("ismail"), Some(8));
    }
    #[test]
    fn section() {
        let source = "
        .comm fuad, 5, 1
        .section
        add
        .section .bss
        ";
        let stmts = source_to_stmts(source);
        let analyzer = Analyzer::new(stmts);
        let (_, ctx) = analyzer.analyze().unwrap();
        assert_eq!(ctx.resolve_sym("fuad"), Some(4));
        assert_eq!(ctx.get_addr(), 5);
    }
    #[test]
    fn equ() {
        let source = ".equ fuad, 5";
        let stmts = source_to_stmts(source);
        let analyzer = Analyzer::new(stmts);
        let (_, ctx) = analyzer.analyze().unwrap();
        assert_eq!(ctx.resolve_const("fuad"), Some(5));
    }
    #[test]
    fn label() {
        let source = "
        fuad:
        add
        add
        ismail, sri:
        .section .data
        umay:
        ";
        let stmts = source_to_stmts(source);
        let analyzer = Analyzer::new(stmts);
        let (_, ctx) = analyzer.analyze().unwrap();
        assert_eq!(ctx.resolve_sym("fuad"), Some(0));
        assert_eq!(ctx.resolve_sym("ismail"), Some(8));
        assert_eq!(ctx.resolve_sym("sri"), Some(8));
        assert_eq!(ctx.resolve_sym("umay"), Some(8));
    }

    #[test]
    fn bytes() {
        let source = "
        .section .data
        .byte 1, 2, 3
        .section
        .byte 1
        ";
        let stmts = source_to_stmts(source);
        let analyzer = Analyzer::new(stmts);
        let (_, ctx) = analyzer.analyze().unwrap();
        assert_eq!(ctx.data_size(), 3);
        assert_eq!(ctx.text_size(), 1);
    }
}