Skip to main content

asmkit/riscv/
decode.rs

1use opcodes::{
2    ALL_OPCODES, OPCODE32_MASK, OPCODE32_MATCH, OPCODE64_MASK, OPCODE64_MATCH, SHORT_OPCODES,
3};
4
5use super::*;
6pub struct Decoder<'a> {
7    buf: &'a [u8],
8    cursor: usize,
9    address: u64,
10    mode: Mode,
11}
12
13#[derive(Copy, Clone, PartialEq, Eq)]
14enum Mode {
15    Decode32,
16    Decode64,
17}
18
19impl Mode {
20    pub fn is_64(self) -> bool {
21        self == Self::Decode64
22    }
23}
24
25impl TryFrom<usize> for Mode {
26    type Error = ();
27
28    fn try_from(value: usize) -> Result<Self, Self::Error> {
29        match value {
30            32 => Ok(Self::Decode32),
31            64 => Ok(Self::Decode64),
32            _ => Err(()),
33        }
34    }
35}
36pub struct Instruction {
37    pub code: Opcode,
38    pub value: InstructionValue,
39    pub len: usize,
40    pub address: u64,
41}
42
43impl Default for Instruction {
44    fn default() -> Self {
45        Self {
46            len: 0,
47            code: Opcode::Invalid,
48            value: InstructionValue::new(0),
49            address: 0,
50        }
51    }
52}
53
54/// Return the length (in bytes) of an instruction given the low 16 bits of it.
55///
56/// The current spec reserves a bit pattern for instructions of length >= 192 bits, but for
57/// simplicity this function just returns 24 in that case. The largest instructions currently
58/// defined are 4 bytes so it will likely be a long time until this diffence matters.
59fn instruction_length(i: u16) -> usize {
60    if i & 0b11 != 0b11 {
61        2
62    } else if i & 0b11100 != 0b11100 {
63        4
64    } else if i & 0b111111 == 0b011111 {
65        6
66    } else if i & 0b1111111 == 0b011111 {
67        8
68    } else {
69        10 + 2 * ((i >> 12) & 0b111) as usize
70    }
71}
72
73impl<'a> Decoder<'a> {
74    pub fn new(bitness: usize, buf: &'a [u8], address: u64) -> Self {
75        Self {
76            buf,
77            cursor: 0,
78            address,
79            mode: Mode::try_from(bitness).expect("only 32 and 64 bit bitness supported"),
80        }
81    }
82
83    pub fn can_decode(&self) -> bool {
84        self.cursor < self.buf.len()
85    }
86
87    fn read_u8(&mut self) -> u8 {
88        let val = self.buf[self.cursor];
89        self.cursor += 1;
90        val
91    }
92
93    pub fn decode(&mut self) -> Instruction {
94        let mut inst = Instruction::default();
95        self.decode_out(&mut inst);
96        inst
97    }
98
99    pub fn decode_out(&mut self, inst: &mut Instruction) {
100        if self.cursor >= self.buf.len() {
101            return;
102        }
103
104        let start = self.cursor;
105
106        let b0 = self.read_u8();
107        let b1 = self.read_u8();
108
109        let short = u16::from_le_bytes([b0, b1]);
110        let len = instruction_length(short);
111        let mut val = InstructionValue::new(short as _);
112        let masks = if self.mode.is_64() {
113            &OPCODE64_MASK
114        } else {
115            &OPCODE32_MASK
116        };
117
118        let matches = if self.mode.is_64() {
119            &OPCODE64_MATCH
120        } else {
121            &OPCODE32_MATCH
122        };
123        let mut opc = None;
124
125        if len == 2 {
126            let mut zero_mask = None;
127
128            for &op in SHORT_OPCODES.iter() {
129                let mask = masks[op as usize];
130                let match_ = matches[op as usize];
131                if short as u32 & mask == match_ {
132                    if mask == 0 {
133                        zero_mask = Some(op);
134                        continue;
135                    }
136
137                    opc = Some(op);
138                    break;
139                }
140            }
141
142            if opc.is_none() {
143                opc = zero_mask;
144            }
145        } else if len == 4 {
146            let b2 = self.read_u8();
147            let b3 = self.read_u8();
148            let long = u32::from_le_bytes([b0, b1, b2, b3]);
149            for &op in ALL_OPCODES.iter() {
150                let mask = masks[op as usize];
151                let match_ = matches[op as usize];
152                if long & mask == match_ {
153                    opc = Some(op);
154                    val = InstructionValue::new(long);
155                    break;
156                }
157            }
158        } else {
159            self.cursor = start + len;
160            return;
161        }
162
163        let opc = opc.unwrap_or(Opcode::Invalid);
164        inst.code = opc;
165        inst.value = val;
166        inst.len = len;
167        inst.address = self.address + start as u64;
168    }
169}