lib_rv32_asm/
assembler.rs

1use std::collections::HashMap;
2#[cfg(not(target_arch = "wasm32"))]
3use std::io::prelude::*;
4
5use log::info;
6
7use lib_rv32_common::constants::*;
8
9use crate::{
10    encode_b_imm, encode_func3, encode_func7, encode_i_imm, encode_j_imm, encode_opcode, encode_rd,
11    encode_rs1, encode_rs2, encode_s_imm, encode_u_imm, error::AssemblerError, match_func3,
12    match_func7, parse::*, tokenize,
13};
14
15enum InstructionFormat {
16    Itype,
17    Rtype,
18    Jtype,
19    Utype,
20    Stype,
21    Btype,
22}
23
24/// Assemble a single instruction.
25///
26/// Parameters:
27///     `ir_string: &str`: The instruction
28///     `labels: &mut std::collections::HashMap<String, u32>`: Map of labels
29///     `pc: u32` Current location of the program
30///
31/// Returns:
32///     `Result<Option<u32>>`: The assembled binary instruction, an error, or nothing.
33pub fn assemble_ir(
34    ir_string: &str,
35    labels: &mut HashMap<String, u32>,
36    pc: u32,
37) -> Result<Option<u32>, AssemblerError> {
38    let mut msg = String::new();
39    let mut ir: u32 = 0;
40
41    let mut tokens: Vec<String> = tokenize!(ir_string);
42
43    if tokens.is_empty() {
44        return Ok(None);
45    } else if tokens.len() > 5 {
46        return Err(AssemblerError::TooManyTokensError);
47    }
48
49    // Add and remove leading label.
50    if tokens[0].ends_with(':') {
51        labels.insert(tokens[0].strip_suffix(':').unwrap().to_owned(), pc);
52        tokens.remove(0);
53    }
54
55    if tokens.is_empty() {
56        return Ok(None);
57    }
58
59    msg += &format!("{:18} -> [{:02x}] ", ir_string, pc);
60
61    let op = &tokens[0][..];
62    let opcode = match_opcode(op);
63    if let Err(why) = opcode {
64        return Err(why);
65    }
66    let opcode = opcode.unwrap();
67    ir |= encode_opcode!(opcode);
68
69    // Use the opcode to identify the instruction format.
70    let format = match opcode {
71        OPCODE_ARITHMETIC_IMM | OPCODE_JALR | OPCODE_LOAD => InstructionFormat::Itype,
72        OPCODE_ARITHMETIC => InstructionFormat::Rtype,
73        OPCODE_JAL => InstructionFormat::Jtype,
74        OPCODE_LUI | OPCODE_AUIPC => InstructionFormat::Utype,
75        OPCODE_BRANCH => InstructionFormat::Btype,
76        OPCODE_STORE => InstructionFormat::Stype,
77        _ => unreachable!(),
78    };
79
80    // Use the destination register field.
81    if let InstructionFormat::Rtype | InstructionFormat::Itype | InstructionFormat::Utype = format {
82        let rd = match_register(&tokens[1]);
83        if let Err(why) = rd {
84            return Err(why);
85        }
86        ir |= encode_rd!(rd.unwrap());
87    }
88
89    // Use the first register operand and func3 fields.
90    if let InstructionFormat::Itype
91    | InstructionFormat::Rtype
92    | InstructionFormat::Btype
93    | InstructionFormat::Stype = format
94    {
95        let rs1 = match_register(
96            &tokens[match opcode {
97                OPCODE_LOAD => 3,
98                OPCODE_BRANCH => 1,
99                _ => 2,
100            }],
101        );
102        if let Err(why) = rs1 {
103            return Err(why);
104        }
105        ir |= encode_rs1!(rs1.unwrap());
106
107        ir |= encode_func3!(match_func3!(op));
108    }
109
110    // Use the second register operand field.
111    if let InstructionFormat::Rtype | InstructionFormat::Stype | InstructionFormat::Btype = format {
112        let rs2 = match_register(
113            &tokens[match opcode {
114                OPCODE_STORE => 1,
115                OPCODE_BRANCH => 2,
116                _ => 3,
117            }],
118        );
119        if let Err(why) = rs2 {
120            return Err(why);
121        }
122        ir |= encode_rs2!(rs2.unwrap());
123    }
124
125    // Use the func7 field.
126    if let InstructionFormat::Rtype = format {
127        ir |= encode_func7!(match_func7!(op));
128    }
129
130    match format {
131        InstructionFormat::Itype => {
132            let imm = parse_imm(
133                &tokens[match opcode {
134                    OPCODE_LOAD => 2,
135                    _ => 3,
136                }],
137                labels,
138                pc,
139            );
140            if let Err(why) = imm {
141                return Err(why);
142            }
143            let imm = imm.unwrap();
144            ir |= encode_i_imm!(imm);
145        }
146        InstructionFormat::Utype => {
147            let imm = parse_imm(&tokens[2], labels, pc);
148            if let Err(why) = imm {
149                return Err(why);
150            }
151            let imm = imm.unwrap();
152            ir |= encode_u_imm!(imm);
153        }
154        InstructionFormat::Jtype => {
155            let imm = parse_imm(&tokens[2], labels, pc);
156            if let Err(why) = imm {
157                return Err(why);
158            }
159            let imm = imm.unwrap();
160            ir |= encode_j_imm!(imm);
161        }
162        InstructionFormat::Btype => {
163            let imm = parse_imm(&tokens[3], labels, pc);
164            if let Err(why) = imm {
165                return Err(why);
166            }
167            let imm = imm.unwrap();
168            ir |= encode_b_imm!(imm);
169        }
170        InstructionFormat::Stype => {
171            let imm = parse_imm(&tokens[2], labels, pc);
172            if let Err(why) = imm {
173                return Err(why);
174            }
175            let imm = imm.unwrap();
176            ir |= encode_s_imm!(imm);
177        }
178        InstructionFormat::Rtype => (),
179    }
180
181    msg += &format!("{:08x}", ir);
182    info!("{}", msg);
183
184    Ok(Some(ir))
185}
186
187/// Assemble a `BufRead` down to a vector of words. The input should contain
188/// the entire program.
189#[cfg(not(target_arch = "wasm32"))]
190pub fn assemble_program_buf<R>(reader: &mut R) -> Result<Vec<u32>, AssemblerError>
191where
192    R: BufRead,
193{
194    let mut prog = Vec::new();
195    let mut buf = String::new();
196    let mut labels = HashMap::new();
197    let mut pc: u32 = 0;
198
199    loop {
200        let bytes_rd = reader.read_line(&mut buf);
201
202        if bytes_rd.is_err() {
203            return Err(AssemblerError::IOError);
204        }
205
206        if bytes_rd.unwrap() == 0 {
207            break;
208        }
209
210        let ir = assemble_ir(buf.trim_end(), &mut labels, pc);
211
212        if let Err(why) = ir {
213            return Err(why);
214        }
215
216        if let Some(i) = ir.unwrap() {
217            prog.push(i);
218            pc += 4;
219        }
220        buf.clear();
221    }
222
223    Ok(prog)
224}
225
226/// Assemble a full program of newline-separated instructions.
227pub fn assemble_program(program: &str) -> Result<Vec<u32>, AssemblerError> {
228    let mut prog = Vec::new();
229    let mut labels = HashMap::new();
230    let mut pc: u32 = 0;
231
232    for line in program.split("\n") {
233        let ir = assemble_ir(line, &mut labels, pc);
234
235        if let Err(why) = ir {
236            return Err(why);
237        }
238
239        if let Some(i) = ir.unwrap() {
240            prog.push(i);
241            pc += 4;
242        }
243    }
244
245    Ok(prog)
246}