1#![doc = include_str!("readme.md")]
2use crate::instruction::{Instruction, Operand, Register};
3use gaia_types::{helpers::Architecture, GaiaError, Result};
4#[derive(Debug, Clone)]
6pub struct InstructionDecoder {
7 architecture: Architecture,
8}
9
10impl InstructionDecoder {
11 pub fn new(architecture: Architecture) -> Self {
13 Self { architecture }
14 }
15
16 pub fn decode(&self, bytes: &[u8]) -> Result<Vec<Instruction>> {
18 let mut instructions = Vec::new();
19 let mut offset = 0;
20
21 while offset < bytes.len() {
22 match self.decode_instruction(&bytes[offset..]) {
23 Ok((instruction, size)) => {
24 instructions.push(instruction);
25 offset += size;
26 }
27 Err(e) => {
28 return Err(e);
29 }
30 }
31 }
32
33 Ok(instructions)
34 }
35
36 fn decode_instruction(&self, bytes: &[u8]) -> Result<(Instruction, usize)> {
37 if bytes.is_empty() {
38 return Err(GaiaError::invalid_data("Empty instruction bytes"));
39 }
40
41 let mut offset = 0;
42 let mut has_rex = false;
43 let mut rex_prefix = 0u8;
44 let mut prefix_size = 0;
45
46 if self.architecture == Architecture::X86_64 && (bytes[0] & 0xF0) == 0x40 {
48 has_rex = true;
49 rex_prefix = bytes[0];
50 offset += 1;
51 prefix_size += 1;
52 }
53
54 let opcode = bytes[offset];
55 offset += 1;
56
57 let (instruction, size) = match opcode {
58 0x89 => self.decode_mov_reg_reg(bytes, offset, has_rex, rex_prefix),
59 0x8B => self.decode_mov_reg_mem(bytes, offset, has_rex, rex_prefix),
60 0x50..=0x57 => self.decode_push_reg(bytes, offset - 1, has_rex, rex_prefix),
61 0x58..=0x5F => self.decode_pop_reg(bytes, offset - 1, has_rex, rex_prefix),
62 0x01 => self.decode_add_reg_reg(bytes, offset, has_rex, rex_prefix),
63 0x29 => self.decode_sub_reg_reg(bytes, offset, has_rex, rex_prefix),
64 0x6A => self.decode_push_imm8(bytes, offset),
65 0x68 => self.decode_push_imm32(bytes, offset),
66 0xB8..=0xBF => self.decode_mov_reg_imm(bytes, offset - 1, has_rex, rex_prefix),
67 0xC3 => self.decode_ret(bytes, offset),
68 0x90 => self.decode_nop(bytes, offset),
69 _ => Err(GaiaError::invalid_instruction(format!("Unknown opcode: 0x{:02X}", opcode), self.architecture.clone())),
70 }?;
71
72 Ok((instruction, prefix_size + size))
73 }
74
75 fn decode_mov_reg_reg(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
76 if offset >= bytes.len() {
77 return Err(GaiaError::invalid_data("Incomplete MOV reg,reg instruction"));
78 }
79
80 let modrm = bytes[offset];
81 let mod_bits = (modrm >> 6) & 0x03;
82 let reg = (modrm >> 3) & 0x07;
83 let rm = modrm & 0x07;
84
85 if mod_bits != 3 {
86 return Err(GaiaError::invalid_instruction(
87 "Expected register-register MOV".to_string(),
88 self.architecture.clone(),
89 ));
90 }
91
92 let dest_reg = self.decode_register(rm, has_rex, rex_prefix, false)?;
93 let src_reg = self.decode_register(reg, has_rex, rex_prefix, true)?;
94
95 let instruction = Instruction::Mov { dst: Operand::Reg(dest_reg), src: Operand::Reg(src_reg) };
96 Ok((instruction, offset + 1))
97 }
98
99 fn decode_mov_reg_mem(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
100 if offset >= bytes.len() {
101 return Err(GaiaError::invalid_data("Incomplete MOV reg,mem instruction"));
102 }
103
104 let modrm = bytes[offset];
105 let mod_bits = (modrm >> 6) & 0x03;
106 let reg = (modrm >> 3) & 0x07;
107 let rm = modrm & 0x07;
108
109 let dest_reg = self.decode_register(reg, has_rex, rex_prefix, false)?;
110
111 if mod_bits == 0 && rm == 0x05 {
113 if offset + 4 >= bytes.len() {
115 return Err(GaiaError::invalid_data("Incomplete displacement"));
116 }
117 let displacement = i32::from_le_bytes([bytes[offset + 1], bytes[offset + 2], bytes[offset + 3], bytes[offset + 4]]);
118 let instruction = Instruction::Mov {
119 dst: Operand::Reg(dest_reg),
120 src: Operand::Mem { base: None, index: None, scale: 1, displacement },
121 };
122 Ok((instruction, offset + 5))
123 }
124 else if mod_bits == 1 {
125 if offset + 1 >= bytes.len() {
127 return Err(GaiaError::invalid_data("Incomplete displacement"));
128 }
129 let displacement = bytes[offset + 1] as i8 as i32;
130 let base_reg = self.decode_register(rm, has_rex, rex_prefix, false)?;
131 let instruction = Instruction::Mov {
132 dst: Operand::Reg(dest_reg),
133 src: Operand::Mem { base: Some(base_reg), index: None, scale: 1, displacement },
134 };
135 Ok((instruction, offset + 2))
136 }
137 else if mod_bits == 2 {
138 if offset + 4 >= bytes.len() {
140 return Err(GaiaError::invalid_data("Incomplete displacement"));
141 }
142 let displacement = i32::from_le_bytes([bytes[offset + 1], bytes[offset + 2], bytes[offset + 3], bytes[offset + 4]]);
143 let base_reg = self.decode_register(rm, has_rex, rex_prefix, false)?;
144 let instruction = Instruction::Mov {
145 dst: Operand::Reg(dest_reg),
146 src: Operand::Mem { base: Some(base_reg), index: None, scale: 1, displacement },
147 };
148 Ok((instruction, offset + 5))
149 }
150 else if mod_bits == 3 {
151 Err(GaiaError::invalid_instruction(
153 "Invalid addressing mode for MOV reg,mem".to_string(),
154 self.architecture.clone(),
155 ))
156 }
157 else {
158 Err(GaiaError::invalid_instruction("Complex addressing mode not supported".to_string(), self.architecture.clone()))
159 }
160 }
161
162 fn decode_push_reg(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
163 let opcode = bytes[offset];
164 let reg_code = opcode & 0x07;
165
166 let reg = self.decode_register(reg_code, has_rex, rex_prefix, false)?;
167 let instruction = Instruction::Push { op: Operand::Reg(reg) };
168 Ok((instruction, offset + 1))
169 }
170
171 fn decode_pop_reg(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
172 let opcode = bytes[offset];
173 let reg_code = opcode & 0x07;
174
175 let reg = self.decode_register(reg_code, has_rex, rex_prefix, false)?;
176 let instruction = Instruction::Pop { dst: Operand::Reg(reg) };
177 Ok((instruction, offset + 1))
178 }
179
180 fn decode_add_reg_reg(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
181 if offset >= bytes.len() {
182 return Err(GaiaError::invalid_data("Incomplete ADD reg,reg instruction"));
183 }
184
185 let modrm = bytes[offset];
186 let mod_bits = (modrm >> 6) & 0x03;
187 let reg = (modrm >> 3) & 0x07;
188 let rm = modrm & 0x07;
189
190 if mod_bits != 3 {
191 return Err(GaiaError::invalid_instruction(
192 "Expected register-register ADD".to_string(),
193 self.architecture.clone(),
194 ));
195 }
196
197 let dest_reg = self.decode_register(rm, has_rex, rex_prefix, false)?;
198 let src_reg = self.decode_register(reg, has_rex, rex_prefix, true)?;
199
200 let instruction = Instruction::Add { dst: Operand::Reg(dest_reg), src: Operand::Reg(src_reg) };
201 Ok((instruction, offset + 1))
202 }
203
204 fn decode_sub_reg_reg(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
205 if offset >= bytes.len() {
206 return Err(GaiaError::invalid_data("Incomplete SUB reg,reg instruction"));
207 }
208
209 let modrm = bytes[offset];
210 let mod_bits = (modrm >> 6) & 0x03;
211 let reg = (modrm >> 3) & 0x07;
212 let rm = modrm & 0x07;
213
214 if mod_bits != 3 {
215 return Err(GaiaError::invalid_instruction(
216 "Expected register-register SUB".to_string(),
217 self.architecture.clone(),
218 ));
219 }
220
221 let dest_reg = self.decode_register(rm, has_rex, rex_prefix, false)?;
222 let src_reg = self.decode_register(reg, has_rex, rex_prefix, true)?;
223
224 let instruction = Instruction::Sub { dst: Operand::Reg(dest_reg), src: Operand::Reg(src_reg) };
225 Ok((instruction, offset + 1))
226 }
227
228 fn decode_push_imm8(&self, bytes: &[u8], offset: usize) -> Result<(Instruction, usize)> {
229 if offset >= bytes.len() {
230 return Err(GaiaError::invalid_data("Incomplete PUSH imm8 instruction"));
231 }
232
233 let imm = bytes[offset] as i8 as i64;
234 let instruction = Instruction::Push { op: Operand::Imm { value: imm, size: 8 } };
235 Ok((instruction, offset + 1))
236 }
237
238 fn decode_push_imm32(&self, bytes: &[u8], offset: usize) -> Result<(Instruction, usize)> {
239 if offset + 3 >= bytes.len() {
240 return Err(GaiaError::invalid_data("Incomplete PUSH imm32 instruction"));
241 }
242
243 let imm = i32::from_le_bytes([bytes[offset], bytes[offset + 1], bytes[offset + 2], bytes[offset + 3]]) as i64;
244 let instruction = Instruction::Push { op: Operand::Imm { value: imm, size: 32 } };
245 Ok((instruction, offset + 4))
246 }
247
248 fn decode_mov_reg_imm(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
249 let opcode = bytes[offset];
250 let reg_code = opcode & 0x07;
251
252 let dest_reg = self.decode_register(reg_code, has_rex, rex_prefix, false)?;
253
254 let imm_size = if self.architecture == Architecture::X86_64 && has_rex { 64 } else { 32 };
256 let imm_bytes = if imm_size == 64 { 8 } else { 4 };
257
258 if offset + 1 + imm_bytes - 1 >= bytes.len() {
259 return Err(GaiaError::invalid_data("Incomplete MOV reg,imm instruction"));
260 }
261
262 let imm = if imm_size == 64 {
263 i64::from_le_bytes([
264 bytes[offset + 1],
265 bytes[offset + 2],
266 bytes[offset + 3],
267 bytes[offset + 4],
268 bytes[offset + 5],
269 bytes[offset + 6],
270 bytes[offset + 7],
271 bytes[offset + 8],
272 ])
273 }
274 else {
275 i32::from_le_bytes([bytes[offset + 1], bytes[offset + 2], bytes[offset + 3], bytes[offset + 4]]) as i64
276 };
277
278 let instruction =
279 Instruction::Mov { dst: Operand::Reg(dest_reg), src: Operand::Imm { value: imm, size: imm_size as u8 } };
280 Ok((instruction, 1 + imm_bytes))
281 }
282
283 fn decode_ret(&self, _bytes: &[u8], offset: usize) -> Result<(Instruction, usize)> {
284 Ok((Instruction::Ret, offset))
285 }
286
287 fn decode_nop(&self, _bytes: &[u8], offset: usize) -> Result<(Instruction, usize)> {
288 Ok((Instruction::Nop, offset))
289 }
290
291 fn decode_register(&self, reg_code: u8, has_rex: bool, rex_prefix: u8, is_rex_r: bool) -> Result<Register> {
292 let extended = if has_rex && is_rex_r {
293 (rex_prefix & 0x04) != 0
294 }
295 else if has_rex {
296 (rex_prefix & 0x01) != 0
297 }
298 else {
299 false
300 };
301
302 let reg_index = if extended { reg_code + 8 } else { reg_code };
303
304 match self.architecture {
305 Architecture::X86 => match reg_index {
306 0 => Ok(Register::EAX),
307 1 => Ok(Register::ECX),
308 2 => Ok(Register::EDX),
309 3 => Ok(Register::EBX),
310 4 => Ok(Register::ESP),
311 5 => Ok(Register::EBP),
312 6 => Ok(Register::ESI),
313 7 => Ok(Register::EDI),
314 _ => Err(GaiaError::invalid_instruction(
315 format!("Invalid register code for X86: {}", reg_index),
316 self.architecture.clone(),
317 )),
318 },
319 Architecture::X86_64 => match reg_index {
320 0 => Ok(Register::RAX),
321 1 => Ok(Register::RCX),
322 2 => Ok(Register::RDX),
323 3 => Ok(Register::RBX),
324 4 => Ok(Register::RSP),
325 5 => Ok(Register::RBP),
326 6 => Ok(Register::RSI),
327 7 => Ok(Register::RDI),
328 8 => Ok(Register::R8),
329 9 => Ok(Register::R9),
330 10 => Ok(Register::R10),
331 11 => Ok(Register::R11),
332 12 => Ok(Register::R12),
333 13 => Ok(Register::R13),
334 14 => Ok(Register::R14),
335 15 => Ok(Register::R15),
336 _ => Err(GaiaError::invalid_instruction(
337 format!("Invalid register code for X86_64: {}", reg_index),
338 self.architecture.clone(),
339 )),
340 },
341 _ => Err(GaiaError::unsupported_architecture(self.architecture.clone())),
342 }
343 }
344}