Skip to main content

ras/encoder/
riscv.rs

1//! RISC-V binary instruction encoder (RV32I / RV64I + M-extension)
2//!
3//! All RISC-V instructions are 32 bits wide and stored little-endian.
4//! Supports the standard register ABI names (zero/ra/sp/…/a0-a7/s0-s11/t0-t6).
5
6use crate::encoder::traits::{InstructionEncoder, ParsedInstruction};
7use crate::error::RasError;
8
9pub struct RiscVEncoder {
10    position: usize,
11    /// true = RV64 (64-bit), false = RV32
12    rv64: bool,
13}
14
15impl Default for RiscVEncoder {
16    fn default() -> Self {
17        Self::new(true)
18    }
19}
20
21impl RiscVEncoder {
22    pub fn new(rv64: bool) -> Self {
23        Self { position: 0, rv64 }
24    }
25
26    // -----------------------------------------------------------------------
27    // Register parser
28    // -----------------------------------------------------------------------
29
30    fn parse_register(&self, s: &str) -> Result<u8, RasError> {
31        let s = s.trim_start_matches('%').trim();
32        match s {
33            // Numeric names
34            "x0" => Ok(0),
35            "x1" => Ok(1),
36            "x2" => Ok(2),
37            "x3" => Ok(3),
38            "x4" => Ok(4),
39            "x5" => Ok(5),
40            "x6" => Ok(6),
41            "x7" => Ok(7),
42            "x8" => Ok(8),
43            "x9" => Ok(9),
44            "x10" => Ok(10),
45            "x11" => Ok(11),
46            "x12" => Ok(12),
47            "x13" => Ok(13),
48            "x14" => Ok(14),
49            "x15" => Ok(15),
50            "x16" => Ok(16),
51            "x17" => Ok(17),
52            "x18" => Ok(18),
53            "x19" => Ok(19),
54            "x20" => Ok(20),
55            "x21" => Ok(21),
56            "x22" => Ok(22),
57            "x23" => Ok(23),
58            "x24" => Ok(24),
59            "x25" => Ok(25),
60            "x26" => Ok(26),
61            "x27" => Ok(27),
62            "x28" => Ok(28),
63            "x29" => Ok(29),
64            "x30" => Ok(30),
65            "x31" => Ok(31),
66            // ABI names
67            "zero" => Ok(0),
68            "ra" => Ok(1),
69            "sp" => Ok(2),
70            "gp" => Ok(3),
71            "tp" => Ok(4),
72            "t0" => Ok(5),
73            "t1" => Ok(6),
74            "t2" => Ok(7),
75            "s0" | "fp" => Ok(8),
76            "s1" => Ok(9),
77            "a0" => Ok(10),
78            "a1" => Ok(11),
79            "a2" => Ok(12),
80            "a3" => Ok(13),
81            "a4" => Ok(14),
82            "a5" => Ok(15),
83            "a6" => Ok(16),
84            "a7" => Ok(17),
85            "s2" => Ok(18),
86            "s3" => Ok(19),
87            "s4" => Ok(20),
88            "s5" => Ok(21),
89            "s6" => Ok(22),
90            "s7" => Ok(23),
91            "s8" => Ok(24),
92            "s9" => Ok(25),
93            "s10" => Ok(26),
94            "s11" => Ok(27),
95            "t3" => Ok(28),
96            "t4" => Ok(29),
97            "t5" => Ok(30),
98            "t6" => Ok(31),
99            _ => Err(RasError::EncodingError(format!(
100                "Unknown RISC-V register: {}",
101                s
102            ))),
103        }
104    }
105
106    fn parse_imm(&self, s: &str) -> Result<i64, RasError> {
107        let s = s.trim();
108        // Strip optional '#' or leading '+'.
109        let s = s.trim_start_matches('#');
110        if let Some(hex) = s.strip_prefix("0x").or_else(|| s.strip_prefix("0X")) {
111            i64::from_str_radix(hex, 16)
112                .map_err(|_| RasError::EncodingError(format!("Invalid hex immediate: {}", s)))
113        } else {
114            s.parse::<i64>()
115                .map_err(|_| RasError::EncodingError(format!("Invalid immediate: {}", s)))
116        }
117    }
118
119    /// Parse `base_reg` and optional offset from an address operand like
120    /// `(sp)`, `8(sp)`, `-4(s0)`.
121    fn parse_mem_operand(&self, s: &str) -> Result<(u8, i32), RasError> {
122        let s = s.trim();
123        if let Some(paren) = s.find('(') {
124            let offset_str = s[..paren].trim();
125            let reg_str = s[paren + 1..].trim_end_matches(')').trim();
126            let offset = if offset_str.is_empty() {
127                0i32
128            } else {
129                self.parse_imm(offset_str)? as i32
130            };
131            let base = self.parse_register(reg_str)?;
132            Ok((base, offset))
133        } else {
134            // No offset: treat the whole string as a register with offset 0
135            Ok((self.parse_register(s)?, 0))
136        }
137    }
138
139    // -----------------------------------------------------------------------
140    // Instruction format encoders
141    // -----------------------------------------------------------------------
142
143    #[inline]
144    fn r_type(funct7: u8, rs2: u8, rs1: u8, funct3: u8, rd: u8, opcode: u8) -> u32 {
145        ((funct7 as u32) << 25)
146            | ((rs2 as u32) << 20)
147            | ((rs1 as u32) << 15)
148            | ((funct3 as u32) << 12)
149            | ((rd as u32) << 7)
150            | (opcode as u32)
151    }
152
153    #[inline]
154    fn i_type(imm12: i32, rs1: u8, funct3: u8, rd: u8, opcode: u8) -> u32 {
155        ((imm12 as u32 & 0xFFF) << 20)
156            | ((rs1 as u32) << 15)
157            | ((funct3 as u32) << 12)
158            | ((rd as u32) << 7)
159            | (opcode as u32)
160    }
161
162    #[inline]
163    fn s_type(imm12: i32, rs2: u8, rs1: u8, funct3: u8, opcode: u8) -> u32 {
164        let imm = imm12 as u32 & 0xFFF;
165        ((imm >> 5) << 25)
166            | ((rs2 as u32) << 20)
167            | ((rs1 as u32) << 15)
168            | ((funct3 as u32) << 12)
169            | ((imm & 0x1F) << 7)
170            | (opcode as u32)
171    }
172
173    /// B-type: offset is in bytes, must be 2-byte aligned.
174    /// Bits: [31]=imm[12], [30:25]=imm[10:5], [24:20]=rs2, [19:15]=rs1,
175    ///       [14:12]=funct3, [11:8]=imm[4:1], [7]=imm[11], [6:0]=opcode
176    #[inline]
177    fn b_type(offset: i32, rs2: u8, rs1: u8, funct3: u8, opcode: u8) -> u32 {
178        let o = offset as u32;
179        let imm12 = (o >> 12) & 1;
180        let imm11 = (o >> 11) & 1;
181        let imm10_5 = (o >> 5) & 0x3F;
182        let imm4_1 = (o >> 1) & 0xF;
183        (imm12 << 31)
184            | (imm10_5 << 25)
185            | ((rs2 as u32) << 20)
186            | ((rs1 as u32) << 15)
187            | ((funct3 as u32) << 12)
188            | (imm4_1 << 8)
189            | (imm11 << 7)
190            | (opcode as u32)
191    }
192
193    /// U-type: imm20 occupies bits [31:12].
194    #[inline]
195    fn u_type(imm20: i32, rd: u8, opcode: u8) -> u32 {
196        ((imm20 as u32 & 0xF_FFFF) << 12) | ((rd as u32) << 7) | (opcode as u32)
197    }
198
199    /// J-type: offset is in bytes, must be 2-byte aligned.
200    /// Bits: [31]=imm[20], [30:21]=imm[10:1], [20]=imm[11], [19:12]=imm[19:12],
201    ///       [11:7]=rd, [6:0]=opcode
202    #[inline]
203    fn j_type(offset: i32, rd: u8, opcode: u8) -> u32 {
204        let o = offset as u32;
205        let imm20 = (o >> 20) & 1;
206        let imm19_12 = (o >> 12) & 0xFF;
207        let imm11 = (o >> 11) & 1;
208        let imm10_1 = (o >> 1) & 0x3FF;
209        (imm20 << 31)
210            | (imm10_1 << 21)
211            | (imm11 << 20)
212            | (imm19_12 << 12)
213            | ((rd as u32) << 7)
214            | (opcode as u32)
215    }
216
217    #[inline]
218    fn emit(word: u32) -> Vec<u8> {
219        word.to_le_bytes().to_vec()
220    }
221
222    // -----------------------------------------------------------------------
223    // Convenience dispatch for common 3-operand R-type ops
224    // -----------------------------------------------------------------------
225
226    fn encode_r3(
227        &self,
228        ops: &[String],
229        funct7: u8,
230        funct3: u8,
231        opcode: u8,
232    ) -> Result<Vec<u8>, RasError> {
233        if ops.len() != 3 {
234            return Err(RasError::EncodingError(
235                "R-type instruction requires 3 operands: rd, rs1, rs2".to_string(),
236            ));
237        }
238        let rd = self.parse_register(&ops[0])?;
239        let rs1 = self.parse_register(&ops[1])?;
240        let rs2 = self.parse_register(&ops[2])?;
241        Ok(Self::emit(Self::r_type(
242            funct7, rs2, rs1, funct3, rd, opcode,
243        )))
244    }
245
246    /// 3-operand I-type: `mnemonic rd, rs1, imm`
247    fn encode_i3(&self, ops: &[String], funct3: u8, opcode: u8) -> Result<Vec<u8>, RasError> {
248        if ops.len() != 3 {
249            return Err(RasError::EncodingError(
250                "I-type instruction requires 3 operands: rd, rs1, imm".to_string(),
251            ));
252        }
253        let rd = self.parse_register(&ops[0])?;
254        let rs1 = self.parse_register(&ops[1])?;
255        let imm = self.parse_imm(&ops[2])? as i32;
256        if !(-2048..=2047).contains(&imm) {
257            return Err(RasError::EncodingError(format!(
258                "Immediate {} out of 12-bit signed range [-2048, 2047]",
259                imm
260            )));
261        }
262        Ok(Self::emit(Self::i_type(imm, rs1, funct3, rd, opcode)))
263    }
264
265    /// Shift-immediate: uses the upper 7 bits as funct7 and lower 5 (or 6 for RV64) as shamt.
266    fn encode_shift_imm(
267        &self,
268        ops: &[String],
269        funct7: u8,
270        funct3: u8,
271        opcode: u8,
272    ) -> Result<Vec<u8>, RasError> {
273        if ops.len() != 3 {
274            return Err(RasError::EncodingError(
275                "Shift immediate requires 3 operands: rd, rs1, shamt".to_string(),
276            ));
277        }
278        let rd = self.parse_register(&ops[0])?;
279        let rs1 = self.parse_register(&ops[1])?;
280        let shamt = self.parse_imm(&ops[2])? as u32;
281        let max_shamt = if self.rv64 { 63u32 } else { 31u32 };
282        if shamt > max_shamt {
283            return Err(RasError::EncodingError(format!(
284                "Shift amount {} exceeds maximum {}",
285                shamt, max_shamt
286            )));
287        }
288        // For RV64: imm[11:0] = funct7[6:1] | shamt[5:0]
289        let imm12 = ((funct7 as i32) << 5) | (shamt as i32 & 0x3F);
290        Ok(Self::emit(Self::i_type(imm12, rs1, funct3, rd, opcode)))
291    }
292
293    /// B-type branch: `mnemonic rs1, rs2, offset`
294    fn encode_branch(&self, ops: &[String], funct3: u8) -> Result<Vec<u8>, RasError> {
295        if ops.len() != 3 {
296            return Err(RasError::EncodingError(
297                "Branch requires 3 operands: rs1, rs2, offset".to_string(),
298            ));
299        }
300        let rs1 = self.parse_register(&ops[0])?;
301        let rs2 = self.parse_register(&ops[1])?;
302        let offset = self.parse_imm(&ops[2])? as i32;
303        Ok(Self::emit(Self::b_type(offset, rs2, rs1, funct3, 0x63)))
304    }
305
306    /// Load: `mnemonic rd, offset(rs1)`
307    fn encode_load(&self, ops: &[String], funct3: u8) -> Result<Vec<u8>, RasError> {
308        if ops.len() != 2 {
309            return Err(RasError::EncodingError(
310                "Load requires 2 operands: rd, offset(rs1)".to_string(),
311            ));
312        }
313        let rd = self.parse_register(&ops[0])?;
314        let (rs1, offset) = self.parse_mem_operand(&ops[1])?;
315        Ok(Self::emit(Self::i_type(offset, rs1, funct3, rd, 0x03)))
316    }
317
318    /// Store: `mnemonic rs2, offset(rs1)`
319    fn encode_store(&self, ops: &[String], funct3: u8) -> Result<Vec<u8>, RasError> {
320        if ops.len() != 2 {
321            return Err(RasError::EncodingError(
322                "Store requires 2 operands: rs2, offset(rs1)".to_string(),
323            ));
324        }
325        let rs2 = self.parse_register(&ops[0])?;
326        let (rs1, offset) = self.parse_mem_operand(&ops[1])?;
327        Ok(Self::emit(Self::s_type(offset, rs2, rs1, funct3, 0x23)))
328    }
329}
330
331impl InstructionEncoder for RiscVEncoder {
332    fn encode_instruction(&mut self, inst: &ParsedInstruction) -> Result<Vec<u8>, RasError> {
333        let opcode = inst.opcode.to_lowercase();
334        let ops = &inst.operands;
335
336        let bytes = match opcode.as_str() {
337            // ------- Integer Arithmetic (R-type, opcode 0x33) -------
338            "add" => self.encode_r3(ops, 0x00, 0x0, 0x33)?,
339            "sub" => self.encode_r3(ops, 0x20, 0x0, 0x33)?,
340            "sll" => self.encode_r3(ops, 0x00, 0x1, 0x33)?,
341            "slt" => self.encode_r3(ops, 0x00, 0x2, 0x33)?,
342            "sltu" => self.encode_r3(ops, 0x00, 0x3, 0x33)?,
343            "xor" => self.encode_r3(ops, 0x00, 0x4, 0x33)?,
344            "srl" => self.encode_r3(ops, 0x00, 0x5, 0x33)?,
345            "sra" => self.encode_r3(ops, 0x20, 0x5, 0x33)?,
346            "or" => self.encode_r3(ops, 0x00, 0x6, 0x33)?,
347            "and" => self.encode_r3(ops, 0x00, 0x7, 0x33)?,
348
349            // ------- M-extension multiply/divide (R-type, funct7=1, opcode 0x33) -------
350            "mul" => self.encode_r3(ops, 0x01, 0x0, 0x33)?,
351            "mulh" => self.encode_r3(ops, 0x01, 0x1, 0x33)?,
352            "mulhsu" => self.encode_r3(ops, 0x01, 0x2, 0x33)?,
353            "mulhu" => self.encode_r3(ops, 0x01, 0x3, 0x33)?,
354            "div" => self.encode_r3(ops, 0x01, 0x4, 0x33)?,
355            "divu" => self.encode_r3(ops, 0x01, 0x5, 0x33)?,
356            "rem" => self.encode_r3(ops, 0x01, 0x6, 0x33)?,
357            "remu" => self.encode_r3(ops, 0x01, 0x7, 0x33)?,
358
359            // ------- RV64 word-width R-type (opcode 0x3B) -------
360            "addw" => self.encode_r3(ops, 0x00, 0x0, 0x3B)?,
361            "subw" => self.encode_r3(ops, 0x20, 0x0, 0x3B)?,
362            "sllw" => self.encode_r3(ops, 0x00, 0x1, 0x3B)?,
363            "srlw" => self.encode_r3(ops, 0x00, 0x5, 0x3B)?,
364            "sraw" => self.encode_r3(ops, 0x20, 0x5, 0x3B)?,
365            "mulw" => self.encode_r3(ops, 0x01, 0x0, 0x3B)?,
366            "divw" => self.encode_r3(ops, 0x01, 0x4, 0x3B)?,
367            "divuw" => self.encode_r3(ops, 0x01, 0x5, 0x3B)?,
368            "remw" => self.encode_r3(ops, 0x01, 0x6, 0x3B)?,
369            "remuw" => self.encode_r3(ops, 0x01, 0x7, 0x3B)?,
370
371            // ------- Integer Immediate (I-type, opcode 0x13) -------
372            "addi" => self.encode_i3(ops, 0x0, 0x13)?,
373            "slti" => self.encode_i3(ops, 0x2, 0x13)?,
374            "sltiu" => self.encode_i3(ops, 0x3, 0x13)?,
375            "xori" => self.encode_i3(ops, 0x4, 0x13)?,
376            "ori" => self.encode_i3(ops, 0x6, 0x13)?,
377            "andi" => self.encode_i3(ops, 0x7, 0x13)?,
378            "slli" => self.encode_shift_imm(ops, 0x00, 0x1, 0x13)?,
379            "srli" => self.encode_shift_imm(ops, 0x00, 0x5, 0x13)?,
380            "srai" => self.encode_shift_imm(ops, 0x20, 0x5, 0x13)?,
381
382            // ------- RV64 word-width immediate (opcode 0x1B) -------
383            "addiw" => self.encode_i3(ops, 0x0, 0x1B)?,
384            "slliw" => self.encode_shift_imm(ops, 0x00, 0x1, 0x1B)?,
385            "srliw" => self.encode_shift_imm(ops, 0x00, 0x5, 0x1B)?,
386            "sraiw" => self.encode_shift_imm(ops, 0x20, 0x5, 0x1B)?,
387
388            // ------- Loads -------
389            "lb" => self.encode_load(ops, 0x0)?,
390            "lh" => self.encode_load(ops, 0x1)?,
391            "lw" => self.encode_load(ops, 0x2)?,
392            "ld" => self.encode_load(ops, 0x3)?,
393            "lbu" => self.encode_load(ops, 0x4)?,
394            "lhu" => self.encode_load(ops, 0x5)?,
395            "lwu" => self.encode_load(ops, 0x6)?,
396
397            // ------- Stores -------
398            "sb" => self.encode_store(ops, 0x0)?,
399            "sh" => self.encode_store(ops, 0x1)?,
400            "sw" => self.encode_store(ops, 0x2)?,
401            "sd" => self.encode_store(ops, 0x3)?,
402
403            // ------- Branches -------
404            "beq" => self.encode_branch(ops, 0x0)?,
405            "bne" => self.encode_branch(ops, 0x1)?,
406            "blt" => self.encode_branch(ops, 0x4)?,
407            "bge" => self.encode_branch(ops, 0x5)?,
408            "bltu" => self.encode_branch(ops, 0x6)?,
409            "bgeu" => self.encode_branch(ops, 0x7)?,
410
411            // ------- JAL (J-type) -------
412            "jal" => {
413                if ops.len() != 2 {
414                    return Err(RasError::EncodingError(
415                        "JAL requires 2 operands: rd, offset".to_string(),
416                    ));
417                }
418                let rd = self.parse_register(&ops[0])?;
419                let offset = self.parse_imm(&ops[1])? as i32;
420                Self::emit(Self::j_type(offset, rd, 0x6F))
421            }
422
423            // ------- JALR (I-type) -------
424            "jalr" => {
425                if ops.len() == 3 {
426                    // jalr rd, rs1, imm  (explicit form)
427                    let rd = self.parse_register(&ops[0])?;
428                    let rs1 = self.parse_register(&ops[1])?;
429                    let imm = self.parse_imm(&ops[2])? as i32;
430                    Self::emit(Self::i_type(imm, rs1, 0x0, rd, 0x67))
431                } else if ops.len() == 2 {
432                    // jalr rd, offset(rs1)  (memory form)
433                    let rd = self.parse_register(&ops[0])?;
434                    let (rs1, offset) = self.parse_mem_operand(&ops[1])?;
435                    Self::emit(Self::i_type(offset, rs1, 0x0, rd, 0x67))
436                } else if ops.len() == 1 {
437                    // jalr rs1  (implicit rd=ra, offset=0)
438                    let rs1 = self.parse_register(&ops[0])?;
439                    Self::emit(Self::i_type(0, rs1, 0x0, 1, 0x67))
440                } else {
441                    return Err(RasError::EncodingError(
442                        "JALR requires 1–3 operands".to_string(),
443                    ));
444                }
445            }
446
447            // ------- LUI / AUIPC (U-type) -------
448            "lui" => {
449                if ops.len() != 2 {
450                    return Err(RasError::EncodingError(
451                        "LUI requires 2 operands: rd, imm20".to_string(),
452                    ));
453                }
454                let rd = self.parse_register(&ops[0])?;
455                let imm20 = self.parse_imm(&ops[1])? as i32;
456                Self::emit(Self::u_type(imm20, rd, 0x37))
457            }
458            "auipc" => {
459                if ops.len() != 2 {
460                    return Err(RasError::EncodingError(
461                        "AUIPC requires 2 operands: rd, imm20".to_string(),
462                    ));
463                }
464                let rd = self.parse_register(&ops[0])?;
465                let imm20 = self.parse_imm(&ops[1])? as i32;
466                Self::emit(Self::u_type(imm20, rd, 0x17))
467            }
468
469            // ------- SYSTEM / FENCE -------
470            "ecall" => Self::emit(Self::i_type(0, 0, 0, 0, 0x73)),
471            "ebreak" => Self::emit(Self::i_type(1, 0, 0, 0, 0x73)),
472            "fence" | "fence.i" => Self::emit(Self::i_type(0, 0, 0, 0, 0x0F)),
473
474            // ------- Pseudo-instructions -------
475            // `nop` = addi x0, x0, 0
476            "nop" => Self::emit(Self::i_type(0, 0, 0, 0, 0x13)),
477            // `ret` = jalr x0, 0(ra)
478            "ret" => Self::emit(Self::i_type(0, 1, 0, 0, 0x67)),
479            // `mv rd, rs` = addi rd, rs, 0
480            "mv" => {
481                if ops.len() != 2 {
482                    return Err(RasError::EncodingError(
483                        "MV requires 2 operands: rd, rs".to_string(),
484                    ));
485                }
486                let rd = self.parse_register(&ops[0])?;
487                let rs = self.parse_register(&ops[1])?;
488                Self::emit(Self::i_type(0, rs, 0, rd, 0x13))
489            }
490            // `li rd, imm` = addi rd, x0, imm  (small immediates only)
491            "li" => {
492                if ops.len() != 2 {
493                    return Err(RasError::EncodingError(
494                        "LI requires 2 operands: rd, imm".to_string(),
495                    ));
496                }
497                let rd = self.parse_register(&ops[0])?;
498                let imm = self.parse_imm(&ops[1])? as i32;
499                if !(-2048..=2047).contains(&imm) {
500                    return Err(RasError::EncodingError(format!(
501                        "LI pseudo-instruction only supports 12-bit immediates ({} out of range)",
502                        imm
503                    )));
504                }
505                Self::emit(Self::i_type(imm, 0, 0, rd, 0x13))
506            }
507            // `j offset` = jal x0, offset
508            "j" => {
509                if ops.len() != 1 {
510                    return Err(RasError::EncodingError(
511                        "J requires 1 operand: offset".to_string(),
512                    ));
513                }
514                let offset = self.parse_imm(&ops[0])? as i32;
515                Self::emit(Self::j_type(offset, 0, 0x6F))
516            }
517            // `call offset` = jal ra, offset
518            "call" => {
519                if ops.len() != 1 {
520                    return Err(RasError::EncodingError(
521                        "CALL requires 1 operand: offset".to_string(),
522                    ));
523                }
524                let offset = self.parse_imm(&ops[0])? as i32;
525                Self::emit(Self::j_type(offset, 1, 0x6F))
526            }
527
528            _ => {
529                return Err(RasError::EncodingError(format!(
530                    "Unknown RISC-V instruction: {}",
531                    opcode
532                )));
533            }
534        };
535
536        self.position += bytes.len();
537        Ok(bytes)
538    }
539
540    fn current_position(&self) -> usize {
541        self.position
542    }
543}
544
545#[cfg(test)]
546mod tests {
547    use super::*;
548    use crate::encoder::traits::ParsedInstruction;
549
550    fn instr(opcode: &str, operands: &[&str]) -> ParsedInstruction {
551        ParsedInstruction {
552            opcode: opcode.to_string(),
553            operands: operands.iter().map(|s| s.to_string()).collect(),
554        }
555    }
556
557    fn enc() -> RiscVEncoder {
558        RiscVEncoder::new(true)
559    }
560
561    #[test]
562    fn test_nop_is_four_bytes() {
563        let bytes = enc().encode_instruction(&instr("nop", &[])).unwrap();
564        assert_eq!(bytes.len(), 4);
565    }
566
567    #[test]
568    fn test_add_encoding() {
569        // add a0, a1, a2  → R-type opcode=0x33 funct3=0 funct7=0
570        let bytes = enc()
571            .encode_instruction(&instr("add", &["a0", "a1", "a2"]))
572            .unwrap();
573        assert_eq!(bytes.len(), 4);
574        let word = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
575        assert_eq!(word & 0x7F, 0x33); // opcode
576        assert_eq!((word >> 12) & 7, 0); // funct3
577        assert_eq!((word >> 25) & 0x7F, 0); // funct7
578        assert_eq!((word >> 7) & 0x1F, 10); // rd = a0 = 10
579    }
580
581    #[test]
582    fn test_addi_encoding() {
583        // addi t0, zero, 42  → I-type
584        let bytes = enc()
585            .encode_instruction(&instr("addi", &["t0", "zero", "42"]))
586            .unwrap();
587        let word = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
588        assert_eq!(word & 0x7F, 0x13); // opcode OP-IMM
589        assert_eq!((word >> 7) & 0x1F, 5); // rd = t0 = 5
590        assert_eq!((word >> 20) as i32 as i32, 42); // imm = 42 (sign-extended)
591    }
592
593    #[test]
594    fn test_ret_pseudo() {
595        // ret → jalr x0, 0(ra) → I-type i=0, rs1=1, funct3=0, rd=0, opcode=0x67
596        let bytes = enc().encode_instruction(&instr("ret", &[])).unwrap();
597        let word = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
598        assert_eq!(word & 0x7F, 0x67); // JALR opcode
599        assert_eq!((word >> 7) & 0x1F, 0); // rd = x0
600        assert_eq!((word >> 15) & 0x1F, 1); // rs1 = ra
601        assert_eq!((word >> 20), 0); // imm = 0
602    }
603
604    #[test]
605    fn test_load_store_encoding() {
606        // sw a0, 0(sp)
607        let bytes = enc()
608            .encode_instruction(&instr("sw", &["a0", "0(sp)"]))
609            .unwrap();
610        let word = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
611        assert_eq!(word & 0x7F, 0x23); // STORE opcode
612        assert_eq!((word >> 12) & 7, 0x2); // funct3 = SW
613
614        // lw a1, 4(sp)
615        let bytes = enc()
616            .encode_instruction(&instr("lw", &["a1", "4(sp)"]))
617            .unwrap();
618        let word = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
619        assert_eq!(word & 0x7F, 0x03); // LOAD opcode
620        assert_eq!((word >> 12) & 7, 0x2); // funct3 = LW
621    }
622
623    #[test]
624    fn test_position_advances() {
625        let mut e = enc();
626        e.encode_instruction(&instr("nop", &[])).unwrap();
627        assert_eq!(e.current_position(), 4);
628        e.encode_instruction(&instr("nop", &[])).unwrap();
629        assert_eq!(e.current_position(), 8);
630    }
631
632    #[test]
633    fn test_abi_register_aliases() {
634        // Both 'zero' and 'x0' should parse to 0
635        let mut e = enc();
636        let b1 = e
637            .encode_instruction(&instr("addi", &["zero", "zero", "0"]))
638            .unwrap();
639        let b2 = e
640            .encode_instruction(&instr("addi", &["x0", "x0", "0"]))
641            .unwrap();
642        assert_eq!(b1, b2);
643    }
644
645    #[test]
646    fn test_mul_encoding() {
647        // mul a0, a1, a2 → R-type funct7=1
648        let bytes = enc()
649            .encode_instruction(&instr("mul", &["a0", "a1", "a2"]))
650            .unwrap();
651        let word = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
652        assert_eq!((word >> 25) & 0x7F, 1); // funct7 = M-extension
653    }
654}