riscv_asm/
decode.rs

1#![deny(unreachable_patterns)]
2
3use crate::instr::{Instr, Reg};
4use Instr::*;
5
6use std::convert::TryInto;
7
8/// Internal trait to simplify bit operations
9trait Bits {
10    type Signed;
11
12    /// Extract the bit at index `idx`
13    fn bit(&self, idx: u8) -> Self;
14
15    /// Extract the bits `lo` through `hi`, inclusive, and then shift them to the 0 position.
16    fn bits(&self, hi: u8, lo: u8) -> Self;
17
18    /// Sign extend using the bit at index `hi` as the most significant bit.
19    ///
20    /// All bits in locations >= `hi` are replaced with the bit at `hi`
21    fn sign_ext(&self, hi: u8) -> Self::Signed;
22}
23
24impl Bits for u32 {
25    type Signed = i32;
26
27    fn bit(&self, idx: u8) -> Self {
28        let idx = idx as u32;
29        (self >> idx) & 0x1
30    }
31
32    fn bits(&self, hi: u8, lo: u8) -> Self {
33        let hi: u32 = hi as u32;
34        let mask = u32::MAX >> (31 - hi);
35
36        (self & mask) >> lo
37    }
38
39    fn sign_ext(&self, hi: u8) -> Self::Signed {
40        let mask = if self.bit(hi) == 0 {
41            0
42        } else {
43            u32::MAX << (hi + 1)
44        };
45        (self | mask) as Self::Signed
46    }
47}
48
49#[allow(unused_variables)]
50pub fn decode_opcode(w: u32) -> Option<Instr> {
51    /*
52      Different instructions may use different named fields in the enoding,
53    and not all fields are always used. Many fields overlap.
54    However, if two instructions use the same field name, that field is
55    located in the same location in the word for both instructions.
56
57    RISC-V Instruction Encodings by type:
58    (note: funct3 is abbreviated as f3)
59
60        R-type
61         0                   1                   2                   3
62         0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
63        +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
64        |    opcode   |    rd   | f3  |   rs1   |   rs2   |    funct7   |
65        +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
66
67        I-type
68         0                   1                   2                   3
69         0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
70        +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
71        |    opcode   |    rd   | f3  |   rs1   |       imm[11;0]       |
72        +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
73
74        S-type
75         0                   1                   2                   3
76         0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
77        +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
78        |    opcode   | imm[4;0]| f3  |   rs1   |   rs2   |  imm[11;5]  |
79        +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
80
81        U-type
82         0                   1                   2                   3
83         0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
84        +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
85        |    opcode   |    rd   |               imm[31;12]              |
86        +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
87    */
88
89    // We extract each field value here, then reference them in the
90    // larger match block below.
91    let opcode = w.bits(6, 0);
92    let rd: Reg = w.bits(11, 7).try_into().unwrap_or_default();
93    let funct3 = w.bits(14, 12);
94    let rs1: Reg = w.bits(19, 15).try_into().unwrap_or_default();
95    let rs2: Reg = w.bits(24, 20).try_into().unwrap_or_default();
96    let funct7 = w.bits(31, 25);
97    let funct12 = w.bits(31, 20);
98
99    // TODO: Not sure if these are always loaded the same way
100    let imm5: u8 = 0;
101    let imm12: i32 = 0;
102    let imm20: i32 = 0;
103
104    // TODO: Sign extend
105    // R-type instructions do not have an immediate encoded
106    let _r_imm: ();
107    let i_imm: u32 = w.bits(31, 20);
108    let s_imm: u32 = (w.bits(31, 25) << 5) | w.bits(11, 7);
109    let b_imm: u32 = (w.bit(31) << 12)      // ┌ Note: lsb is always 0!
110        | (w.bit(7) << 11)                  // │
111        | (w.bits(30, 25) << 5)             // │
112        | (w.bits(11, 8) << 1)              // ┘
113    ;
114    let u_imm: u32 = (w.bits(31, 20) << 20) // ┌ Note: 12 low bits of 0!
115        | (w.bits(19, 12) << 12)            // ┘
116    ;
117    let j_imm: u32 = (w.bit(31) << 20)      // ┌ Note: lsb is always 0!
118        | (w.bits(19, 12) << 12)            // │
119        | (w.bit(20) << 11)                 // │
120        | (w.bits(30, 21) << 1)             // ┘
121    ;
122
123    let b_imm_s: i32 = b_imm.sign_ext(12);
124
125    match (opcode, funct3) {
126        // Special values
127        _ if w == 0x0 => {
128            // The all-zero instruction is special-cased as illegal, so we handle
129            // it here like an instruction. For the rest of our decoding, we'll handle
130            // invalid instructions like an error.
131            Some(Illegal)
132        }
133
134        // Load Instructions
135        (0x03, 0x0) => Some(Lb { rd, rs1, imm12 }),
136        (0x03, 0x1) => Some(Lh { rd, rs1, imm12 }),
137        (0x03, 0x2) => Some(Lw { rd, rs1, imm12 }),
138        (0x03, 0x3) => Some(Ld { rd, rs1, imm12 }),
139        (0x03, 0x4) => Some(Lbu { rd, rs1, imm12 }),
140        (0x03, 0x5) => Some(Lhu { rd, rs1, imm12 }),
141        (0x03, 0x6) => Some(Lwu { rd, rs1, imm12 }),
142
143        // Fences
144        (0x0f, 0x0) => Some(Fence {
145            rd,
146            rs1,
147            successor: 0,
148            predecessor: 0,
149            fm: 0,
150        }),
151        (0x0f, 0x1) => Some(FenceI { rd, rs1, imm12 }),
152
153        (0x13, 0x0) => Some(Addi { rd, rs1, imm12 }),
154        (0x13, 0x1) if funct7 == 0x00 => Some(Slli { rd, rs1, imm5 }),
155        (0x13, 0x2) => Some(Slti { rd, rs1, imm12 }),
156        (0x13, 0x3) => Some(Sltiu { rd, rs1, imm12 }),
157        (0x13, 0x4) => Some(Xori { rd, rs1, imm12 }),
158        (0x13, 0x5) if funct7 == 0x00 => Some(Srli { rd, rs1, imm5 }),
159        (0x13, 0x5) if funct7 == 0x20 => Some(Srai { rd, rs1, imm5 }),
160        (0x13, 0x6) => Some(Ori { rd, rs1, imm12 }),
161        (0x13, 0x7) => Some(Andi { rd, rs1, imm12 }),
162
163        (0x17, _) => Some(Auipc { rd, imm20 }),
164
165        // // Store Instructions
166        (0x23, 0x0) => Some(Sb { rs1, rs2, imm12 }),
167        (0x23, 0x1) => Some(Sh { rs1, rs2, imm12 }),
168        (0x23, 0x2) => Some(Sw { rs1, rs2, imm12 }),
169        (0x23, 0x3) => Some(Sd { rs1, rs2, imm12 }),
170
171        (0x33, 0x0) if funct7 == 0x00 => Some(Add { rd, rs1, rs2 }),
172        (0x33, 0x0) if funct7 == 0x20 => Some(Sub { rd, rs1, rs2 }),
173        (0x33, 0x1) => Some(Sll { rd, rs1, rs2 }),
174        (0x33, 0x2) => Some(Slt { rd, rs1, rs2 }),
175        (0x33, 0x3) => Some(Sltu { rd, rs1, rs2 }),
176        (0x33, 0x4) => Some(Xor { rd, rs1, rs2 }),
177        (0x33, 0x5) if funct7 == 0x00 => Some(Srl { rd, rs1, rs2 }),
178        (0x33, 0x5) if funct7 == 0x20 => Some(Sra { rd, rs1, rs2 }),
179        (0x33, 0x6) => Some(Or { rd, rs1, rs2 }),
180        (0x33, 0x7) => Some(And { rd, rs1, rs2 }),
181
182        (0x37, _) => Some(Lui { rd, imm20 }),
183
184        (0x63, 0x0) => Some(Beq {
185            rs1,
186            rs2,
187            imm: b_imm_s,
188        }),
189        (0x63, 0x1) => Some(Bne {
190            rs1,
191            rs2,
192            imm: b_imm_s,
193        }),
194        (0x63, 0x4) => Some(Blt {
195            rs1,
196            rs2,
197            imm: b_imm_s,
198        }),
199        (0x63, 0x5) => Some(Bge {
200            rs1,
201            rs2,
202            imm: b_imm_s,
203        }),
204        (0x63, 0x6) => Some(Bltu {
205            rs1,
206            rs2,
207            imm: b_imm_s,
208        }),
209        (0x63, 0x7) => Some(Bgeu {
210            rs1,
211            rs2,
212            imm: b_imm_s,
213        }),
214
215        (0x67, 0x0) => Some(Jalr { rd, rs1, imm12 }),
216
217        (0x6f, _) => Some(Jal { rd, imm20 }),
218
219        (0x73, 0x0) if funct7 == 0x0 => Some(Ecall { rd, rs1 }),
220        (0x73, 0x0) if funct7 == 0x1 => Some(Ebreak { rd, rs1 }),
221        (0x73, 0x0) if funct12 == 0x302 => Some(Wfi {}),
222        (0x73, 0x0) if funct12 == 0x105 => Some(Mret {}),
223
224        (0x73, 0x1) => Some(Csrrw {
225            rs1,
226            imm12: imm12 as u32,
227        }),
228        (0x73, 0x2) => Some(Csrrs {
229            rd,
230            rs1,
231            imm12: imm12 as u32,
232        }),
233        (0x73, 0x3) => Some(Csrrc { rs1 }),
234        (0x73, 0x5) => Some(Csrrwi { rd }),
235        (0x73, 0x6) => Some(Csrrsi {
236            imm5,
237            imm12: imm12 as u32,
238        }),
239        (0x73, 0x7) => Some(Csrrci {
240            imm5,
241            imm12: imm12 as u32,
242        }),
243        _ => None,
244    }
245}
246
247#[cfg(test)]
248mod test {
249    use super::*;
250
251    use Reg::*;
252
253    // These asserts give us diffs when they fail.
254    // Import both `assert_eq` and `assert_ne`, even though we don't `assert_ne`
255    // yet, so that future tests don't accidentally miss this import and use
256    // std's macros instead.
257    #[allow(unused_imports)]
258    use pretty_assertions::{assert_eq, assert_ne};
259
260    #[test]
261    fn check_bits() {
262        const W: u32 = 0xdead_beef;
263
264        // To help visualize:
265        assert_eq!(W, 0b_11011110101011011011111011101111);
266
267        // Sanity check:
268        const AWKWARD: u32 = 0b_0110_1101_1111_0111;
269        assert_eq!(W & (AWKWARD << 5), AWKWARD << 5);
270
271        for (hi, lo, expect) in [
272            (0_u8, 0_u8, 1_u32), // Individual Bits
273            (0, 0, 1),           // ┌ 0xf
274            (1, 1, 1),           // │
275            (2, 2, 1),           // │
276            (3, 3, 1),           // └
277            (4, 4, 0),           // ┌ 0xe
278            (5, 5, 1),           // │
279            (6, 6, 1),           // │
280            (7, 7, 1),           // └
281            (8, 8, 0),           // ┌ 0xe
282            (9, 9, 1),           // │
283            (10, 10, 1),         // │
284            (11, 11, 1),         // └
285            (31, 16, 0xdead),    // High 2 bytes
286            (16, 31, 0x0),       // High 2 bytes backwards
287            (15, 0, 0xbeef),     // Low 2 bytes
288            (31, 24, 0xde),      // High byte
289            (23, 16, 0xad),      // 2nd high byte
290            (15, 8, 0xbe),       // 2nd low byte
291            (7, 0, 0xef),        // Low byte
292            (31, 0, W),          // Full range
293            (20, 5, AWKWARD),    // "Awkward" range that crosses bytes
294        ]
295        .iter()
296        .cloned()
297        {
298            let actual = W.bits(hi, lo);
299
300            let label_actual = format!("bits({hi}, {lo})", hi = hi, lo = lo,);
301            let label_expect = format!("0x{expect:x}", expect = expect,);
302
303            assert_eq!(
304                actual,
305                expect,
306                concat!(
307                    "\n",
308                    "   bits({hi}, {lo}) != 0x{expect:x}\n",
309                    "       {label_actual:<12} == 0x{actual:08x} == 0b{actual:032b}\n",
310                    "       {label_expect:<12} == 0x{expect:08x} == 0b{expect:032b}\n"
311                ),
312                hi = hi,
313                lo = lo,
314                label_expect = label_expect,
315                expect = expect,
316                label_actual = label_actual,
317                actual = actual
318            );
319
320            if hi == lo {
321                assert_eq!(actual, W.bit(hi));
322            }
323        }
324    }
325
326    #[test]
327    fn check_bits_sign_ext() {
328        assert_eq!(
329            0b_1.sign_ext(0),                                      //
330            0b_1111_1111_1111_1111_1111_1111_1111_1111_u32 as i32, // Expected
331        );
332        assert_eq!(
333            0b_0100_0000_0000_1111_u32.sign_ext(14),               //
334            0b_1111_1111_1111_1111_1100_0000_0000_1111_u32 as i32, // Expected
335        );
336
337        assert_eq!(
338            0b_0.sign_ext(0), //
339            0b_0_u32 as i32,  // Expected
340        );
341        assert_eq!(
342            0b_0100_0000_0000_1111_u32.sign_ext(15), //
343            0b_0100_0000_0000_1111_u32 as i32,       // Expected
344        );
345    }
346
347    // This test takes takes too long, while the rest are instant.
348    /// (~15 seconds on release and ~1238 seconds (yes really) on debug)
349    // Until this is sped up, mark it as ignored. We can run it manually:
350    //      time cargo test --release brute -- --ignored
351    #[test]
352    #[ignore]
353    fn brute_force_decode() {
354        // Test every word to make sure that we don't panic :)
355        for word in 0..=u32::MAX {
356            let _ = decode_opcode(word);
357        }
358    }
359
360    macro_rules! make_instr_test {
361        ( $( $test_name:ident : $le_bytes:expr => $expected:expr ),+ ) => {
362            $(
363                #[test]
364                fn $test_name() {
365                    let word = u32::from_le_bytes($le_bytes);
366                    assert_eq!(decode_opcode(word), Some($expected));
367                }
368            )+
369        };
370    }
371
372    make_instr_test! {
373        // The zero-word is an illegal instruction by design.
374        check_zero_word:                [0x00, 0x00, 0x00, 0x00] => Illegal,
375        check_unimp:                    [0x73, 0x10, 0x00, 0xc0] => Illegal,
376
377        // TODO: Check
378        //      add a, b, c
379        // making sure to use each of the 31 registers at least twice in different spots.
380        // Note: add zero, X, X is a "HINT" opcode
381
382        check_add_s0_sp_zero:           [0x33, 0x04, 0x01, 0x00] => Add { rd: S0, rs1: Sp, rs2: Zero, },
383        check_add_a2_a5_a1:             [0x33, 0x86, 0xb7, 0x00] => Add { rd: A2, rs1: A5, rs2: A1, },
384        check_add_t0_t0_t2:             [0xb3, 0x82, 0x72, 0x00] => Add { rd: T0, rs1: T0, rs2: T2, },
385
386        check_addi_sp_sp_64:            [0x13, 0x01, 0x01, 0x04] => Addi { rd: Sp, rs1: Sp, imm12: 64, },
387        check_addi_t1_t1_neg_1:         [0x13, 0x03, 0xf3, 0xff] => Addi { rd: T1, rs1: T1, imm12: -1, },
388        check_addi_a0_sp_32:            [0x13, 0x05, 0x01, 0x02] => Addi { rd: A0, rs1: Sp, imm12: 32, },
389        check_addi_a7_a0_neg_273:       [0x93, 0x08, 0xf5, 0xee] => Addi { rd: Zero, rs1: Zero, imm12: 0, },
390        check_addi_t0_t0_neg_2048:      [0x93, 0x82, 0x02, 0x80] => Addi { rd: Zero, rs1: Zero, imm12: 0, },
391
392        check_and_a0_a0_a1:             [0x33, 0x75, 0xb5, 0x00] => And { rd: A0, rs1: A0, rs2: A1 },
393
394        check_andi_a2_a2_1:             [0x13, 0x76, 0x16, 0x00] => Andi { rd: A2, rs1: A2, imm12: 1 },
395
396        check_auipc_sp_4:               [0x17, 0x41, 0x00, 0x00] => Auipc { rd: Sp, imm20: 4 },
397        check_auipc_gp_1:               [0x97, 0x11, 0x00, 0x00] => Auipc { rd: Gp, imm20: 1 },
398
399        check_beq_a0_zero_12:           [0x63, 0x06, 0x05, 0x00] => Beq { rs1: A0, rs2: Zero, imm: 12 },
400        check_beq_a1_a0_20:             [0x63, 0xda, 0xa5, 0x00] => Bge { rs1: A1, rs2: A0, imm: 20 },
401
402        check_bgeu_a0_a1_36:            [0x63, 0x72, 0xb5, 0x02] => Bgeu { rs1: A0, rs2: A1, imm: 36 },
403
404        check_bltu_a1_a0_neg_16:        [0xe3, 0xe8, 0xa5, 0xfe] => Bltu { rs1: A1, rs2: A0, imm: -16 },
405
406        check_bne_t3_t1_neg_64:          [0xe3, 0x10, 0x6e, 0xfc] => Bne { rs1: T3, rs2: T1, imm: -64 },
407
408        // ==== TODO: All of the Csrr tests and decoding is incomplete
409        // Csrr a0, mcause
410        check_csrr_a0_mcause:           [0x73, 0x25, 0x20, 0x34] => Csrrc { rs1: Zero },
411
412        // Csrr a0, mhartid
413        check_cssr_a0_mhartid:          [0x73, 0x25, 0x40, 0xf1] => Csrrc { rs1: Zero },
414
415        // Csrw mtvec, t0
416        check_csrw_mtvec_t0:            [0x73, 0x90, 0x52, 0x30] => Csrrw { rs1: T0, imm12: 0 },
417
418        // Csrwi  mie, 0
419        check_csrwi_mie_0:              [0x73, 0x50, 0x40, 0x30] => Csrrwi { rd: Zero },
420
421        // Csrwi  mip, 0
422        check_csrwi_mip_0:              [0x73, 0x50, 0x40, 0x34] => Csrrwi { rd: Zero },
423
424        // Fence  rw, rw
425        check_fence_rw_rw:              [0x0f, 0x00, 0x30, 0x03] => Fence {
426            rd: Zero, rs1: Zero,
427            successor: 0b_1100, predecessor: 0b_1100,
428            fm: 0
429        },
430
431        check_j_0:                      [0x6f, 0x00, 0x00, 0x00] => Jal { rd: Zero, imm20: 0 },
432        check_j_900:                    [0x6f, 0x00, 0xc0, 0x00] => Jal { rd: Zero, imm20: 900 },
433        check_j_neg_96:                 [0x6f, 0xf0, 0x9f, 0xff] => Jal {rd : Zero, imm20: -96 },
434
435        check_jal_76:                   [0xef, 0x00, 0xc0, 0x04] => Jal { rd: Zero, imm20: 76 },
436        check_jalr_a0:                  [0xe7, 0x00, 0x05, 0x00] => Jalr { rd: Zero, rs1: Zero, imm12: 0 },
437        check_jalr_728_ra:              [0xe7, 0x80, 0x80, 0x2d] => Jalr { rd: Zero, rs1: Zero, imm12: 728 },
438
439        check_lui_a0_0:                 [0x37, 0x05, 0x00, 0x00] => Lui { rd: A0, imm20: 0 },
440        check_lui_a0_2:                 [0x37, 0x25, 0x00, 0x00] => Lui { rd: A0, imm20: 2 },
441        check_lui_a0_912092:            [0x37, 0xc5, 0xad, 0xde] => Lui { rd: A0, imm20: 912092 },
442        check_lui_ra_0:                 [0xb7, 0x00, 0x00, 0x00] => Lui { rd: Ra, imm20: 0 },
443        check_lui_t0_0:                 [0xb7, 0x02, 0x00, 0x00] => Lui { rd: T0, imm20: 0 },
444        check_lui_a1_0:                 [0xb7, 0x05, 0x00, 0x00] => Lui { rd: A1, imm20: 0 },
445        check_lui_a1_674490:            [0xb7, 0xa5, 0xab, 0xa4] => Lui { rd: A1, imm20: 674490 },
446
447        check_lw_t1_8_sp:               [0x03, 0x23, 0x81, 0x00] => Lw { rd: T1, rs1: Sp, imm12: 8},
448        check_lw_a6_56_sp:              [0x03, 0x28, 0x81, 0x03] => Lw { rd: T1, rs1: Sp, imm12: 8},
449        check_lw_t6_28_sp:              [0x83, 0x2f, 0xc1, 0x01] => Lw { rd: T1, rs1: Sp, imm12: 8},
450
451        // Mret
452        check_mret:                     [0x73, 0x00, 0x20, 0x30] => Mret {},
453
454        // Ret
455        // check_ret:                      [0x67, 0x80, 0x00, 0x00] => Ret {},
456
457        check_sb_a2_a1_0:               [0x23, 0x80, 0xc5, 0x00] => Sb { rs1: A2, rs2: A1, imm12: 0 },
458        check_sw_a3_sp_44:              [0x23, 0x26, 0xd1, 0x02] => Sw { rs1: A3, rs2: Sp, imm12: 44},
459
460        check_sllii_a0_a0_2:            [0x13, 0x15, 0x25, 0x00] => Slli { rd: A0, rs1: A0, imm5: 2 },
461
462        check_sub_sp_sp_t0:             [0x33, 0x01, 0x51, 0x40] => Sub { rd: Sp, rs1: Sp, rs2: T0 },
463
464        // Wfi
465        check_wfi:                      [0x73, 0x00, 0x50, 0x10] => Wfi {},
466
467        // Xor  a2, a1, a3
468        check_xor_a2_a1_a3:             [0x33, 0xc6, 0xd5, 0x00] => Xor { rd: A2, rs1: A1, rs2: A3 }
469    }
470}