asmkit/riscv/
decode.rs

1use opcodes::{ALL_OPCODES, OPCODE32_MASK, OPCODE32_MATCH, OPCODE64_MASK, OPCODE64_MATCH, SHORT_OPCODES};
2
3use super::*;
4pub struct Decoder<'a> {
5    buf: &'a [u8],
6    cursor: usize,
7    address: u64,
8    mode: Mode,
9}
10
11#[derive(Copy, Clone, PartialEq, Eq)]
12enum Mode {
13    Decode32,
14    Decode64,
15}
16
17impl Mode {
18    pub fn is_64(self) -> bool {
19        self == Self::Decode64
20    }
21}
22
23impl TryFrom<usize> for Mode {
24    type Error = ();
25
26    fn try_from(value: usize) -> Result<Self, Self::Error> {
27        match value {
28            32 => Ok(Self::Decode32),
29            64 => Ok(Self::Decode64),
30            _ => Err(()),
31        }
32    }
33}
34pub struct Instruction {
35    pub code: Opcode,
36    pub value: InstructionValue,
37    pub len: usize,
38    pub address: u64,
39}
40
41impl Default for Instruction {
42    fn default() -> Self {
43        Self {
44            len: 0,
45            code: Opcode::Invalid,
46            value: InstructionValue::new(0),
47            address: 0,
48        }
49    }
50}
51
52/// Return the length (in bytes) of an instruction given the low 16 bits of it.
53///
54/// The current spec reserves a bit pattern for instructions of length >= 192 bits, but for
55/// simplicity this function just returns 24 in that case. The largest instructions currently
56/// defined are 4 bytes so it will likely be a long time until this diffence matters.
57fn instruction_length(i: u16) -> usize {
58    if i & 0b11 != 0b11 {
59        2
60    } else if i & 0b11100 != 0b11100 {
61        4
62    } else if i & 0b111111 == 0b011111 {
63        6
64    } else if i & 0b1111111 == 0b011111 {
65        8
66    } else {
67        10 + 2 * ((i >> 12) & 0b111) as usize
68    }
69}
70
71impl<'a> Decoder<'a> {
72    pub fn new(bitness: usize, buf: &'a [u8], address: u64) -> Self {
73        Self {
74            buf,
75            cursor: 0,
76            address,
77            mode: Mode::try_from(bitness).expect("only 32 and 64 bit bitness supported"),
78        }
79    }
80
81    pub fn can_decode(&self) -> bool {
82        self.cursor < self.buf.len()
83    }
84
85    fn read_u8(&mut self) -> u8 {
86        let val = self.buf[self.cursor];
87        self.cursor += 1;
88        val
89    }
90
91    pub fn decode(&mut self) -> Instruction {
92        let mut inst = Instruction::default();
93        self.decode_out(&mut inst);
94        inst
95    }
96
97    pub fn decode_out(&mut self, inst: &mut Instruction) {
98        if self.cursor >= self.buf.len() {
99            return;
100        }
101
102        let start = self.cursor;
103
104        let b0 = self.read_u8();
105        let b1 = self.read_u8();
106
107        let short = u16::from_le_bytes([b0, b1]);
108        let len = instruction_length(short);
109        let mut val = InstructionValue::new(short as _);
110        let masks = if self.mode.is_64() {
111            &OPCODE64_MASK
112        } else {
113            &OPCODE32_MASK
114        };
115
116        let matches = if self.mode.is_64() {
117            &OPCODE64_MATCH
118        } else {
119            &OPCODE32_MATCH
120        };
121        let mut opc = None;
122
123        if len == 2 {
124            let mut zero_mask = None;
125
126            for &op in SHORT_OPCODES.iter() {
127                let mask = masks[op as usize];
128                let match_ = matches[op as usize];
129                if short as u32 & mask == match_ {
130                    if mask == 0 {
131                        zero_mask = Some(op);
132                        continue;
133                    }
134
135                    opc = Some(op);
136                    break;
137                }
138            }
139
140            if opc.is_none() {
141                opc = zero_mask;
142            }
143        } else if len == 4 {
144            let b2 = self.read_u8();
145            let b3 = self.read_u8();
146            let long = u32::from_le_bytes([b0, b1, b2, b3]);
147            for &op in ALL_OPCODES.iter() {
148                let mask = masks[op as usize];
149                let match_ = matches[op as usize];
150                if long & mask == match_ {
151                    opc = Some(op);
152                    val = InstructionValue::new(long);
153                    break;
154                }
155            }
156        } else {
157            self.cursor = start + len;
158            return;
159        }
160
161        let opc = opc.unwrap_or(Opcode::Invalid);
162        inst.code = opc;
163        inst.value = val;
164        inst.len = len;
165        inst.address = self.address + start as u64;
166    }
167}