1use crate::instruction::{Instruction, Operand, Register};
2use gaia_types::{helpers::Architecture, GaiaError, Result};
3
4pub struct InstructionDecoder {
5 architecture: Architecture,
6}
7
8impl InstructionDecoder {
9 pub fn new(architecture: Architecture) -> Self {
10 Self { architecture }
11 }
12
13 pub fn decode(&self, bytes: &[u8]) -> Result<Vec<Instruction>> {
14 let mut instructions = Vec::new();
15 let mut offset = 0;
16
17 while offset < bytes.len() {
18 match self.decode_instruction(&bytes[offset..]) {
19 Ok((instruction, size)) => {
20 instructions.push(instruction);
21 offset += size;
22 }
23 Err(e) => {
24 return Err(e);
25 }
26 }
27 }
28
29 Ok(instructions)
30 }
31
32 fn decode_instruction(&self, bytes: &[u8]) -> Result<(Instruction, usize)> {
33 if bytes.is_empty() {
34 return Err(GaiaError::invalid_data("Empty instruction bytes"));
35 }
36
37 let mut offset = 0;
38 let mut has_rex = false;
39 let mut rex_prefix = 0u8;
40 let mut prefix_size = 0;
41
42 if self.architecture == Architecture::X86_64 && (bytes[0] & 0xF0) == 0x40 {
44 has_rex = true;
45 rex_prefix = bytes[0];
46 offset += 1;
47 prefix_size += 1;
48 }
49
50 let opcode = bytes[offset];
51 offset += 1;
52
53 let (instruction, size) = match opcode {
54 0x89 => self.decode_mov_reg_reg(bytes, offset, has_rex, rex_prefix),
55 0x8B => self.decode_mov_reg_mem(bytes, offset, has_rex, rex_prefix),
56 0x50..=0x57 => self.decode_push_reg(bytes, offset - 1, has_rex, rex_prefix),
57 0x58..=0x5F => self.decode_pop_reg(bytes, offset - 1, has_rex, rex_prefix),
58 0x01 => self.decode_add_reg_reg(bytes, offset, has_rex, rex_prefix),
59 0x29 => self.decode_sub_reg_reg(bytes, offset, has_rex, rex_prefix),
60 0x6A => self.decode_push_imm8(bytes, offset),
61 0x68 => self.decode_push_imm32(bytes, offset),
62 0xB8..=0xBF => self.decode_mov_reg_imm(bytes, offset - 1, has_rex, rex_prefix),
63 0xC3 => self.decode_ret(bytes, offset),
64 0x90 => self.decode_nop(bytes, offset),
65 _ => Err(GaiaError::invalid_instruction(format!("Unknown opcode: 0x{:02X}", opcode), self.architecture.clone())),
66 }?;
67
68 Ok((instruction, prefix_size + size))
69 }
70
71 fn decode_mov_reg_reg(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
72 if offset >= bytes.len() {
73 return Err(GaiaError::invalid_data("Incomplete MOV reg,reg instruction"));
74 }
75
76 let modrm = bytes[offset];
77 let mod_bits = (modrm >> 6) & 0x03;
78 let reg = (modrm >> 3) & 0x07;
79 let rm = modrm & 0x07;
80
81 if mod_bits != 3 {
82 return Err(GaiaError::invalid_instruction(
83 "Expected register-register MOV".to_string(),
84 self.architecture.clone(),
85 ));
86 }
87
88 let dest_reg = self.decode_register(rm, has_rex, rex_prefix, false)?;
89 let src_reg = self.decode_register(reg, has_rex, rex_prefix, true)?;
90
91 let instruction = Instruction::Mov { dst: Operand::Reg(dest_reg), src: Operand::Reg(src_reg) };
92 Ok((instruction, offset + 1))
93 }
94
95 fn decode_mov_reg_mem(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
96 if offset >= bytes.len() {
97 return Err(GaiaError::invalid_data("Incomplete MOV reg,mem instruction"));
98 }
99
100 let modrm = bytes[offset];
101 let mod_bits = (modrm >> 6) & 0x03;
102 let reg = (modrm >> 3) & 0x07;
103 let rm = modrm & 0x07;
104
105 let dest_reg = self.decode_register(reg, has_rex, rex_prefix, false)?;
106
107 if mod_bits == 0 && rm == 0x05 {
109 if offset + 4 >= bytes.len() {
111 return Err(GaiaError::invalid_data("Incomplete displacement"));
112 }
113 let displacement = i32::from_le_bytes([bytes[offset + 1], bytes[offset + 2], bytes[offset + 3], bytes[offset + 4]]);
114 let instruction = Instruction::Mov { dst: Operand::Reg(dest_reg), src: Operand::Mem { base: None, index: None, scale: 1, displacement } };
115 Ok((instruction, offset + 5))
116 }
117 else if mod_bits == 1 {
118 if offset + 1 >= bytes.len() {
120 return Err(GaiaError::invalid_data("Incomplete displacement"));
121 }
122 let displacement = bytes[offset + 1] as i8 as i32;
123 let base_reg = self.decode_register(rm, has_rex, rex_prefix, false)?;
124 let instruction = Instruction::Mov { dst: Operand::Reg(dest_reg), src: Operand::Mem { base: Some(base_reg), index: None, scale: 1, displacement } };
125 Ok((instruction, offset + 2))
126 }
127 else if mod_bits == 2 {
128 if offset + 4 >= bytes.len() {
130 return Err(GaiaError::invalid_data("Incomplete displacement"));
131 }
132 let displacement = i32::from_le_bytes([bytes[offset + 1], bytes[offset + 2], bytes[offset + 3], bytes[offset + 4]]);
133 let base_reg = self.decode_register(rm, has_rex, rex_prefix, false)?;
134 let instruction = Instruction::Mov { dst: Operand::Reg(dest_reg), src: Operand::Mem { base: Some(base_reg), index: None, scale: 1, displacement } };
135 Ok((instruction, offset + 5))
136 }
137 else if mod_bits == 3 {
138 Err(GaiaError::invalid_instruction(
140 "Invalid addressing mode for MOV reg,mem".to_string(),
141 self.architecture.clone(),
142 ))
143 }
144 else {
145 Err(GaiaError::invalid_instruction("Complex addressing mode not supported".to_string(), self.architecture.clone()))
146 }
147 }
148
149 fn decode_push_reg(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
150 let opcode = bytes[offset];
151 let reg_code = opcode & 0x07;
152
153 let reg = self.decode_register(reg_code, has_rex, rex_prefix, false)?;
154 let instruction = Instruction::Push { op: Operand::Reg(reg) };
155 Ok((instruction, offset + 1))
156 }
157
158 fn decode_pop_reg(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
159 let opcode = bytes[offset];
160 let reg_code = opcode & 0x07;
161
162 let reg = self.decode_register(reg_code, has_rex, rex_prefix, false)?;
163 let instruction = Instruction::Pop { dst: Operand::Reg(reg) };
164 Ok((instruction, offset + 1))
165 }
166
167 fn decode_add_reg_reg(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
168 if offset >= bytes.len() {
169 return Err(GaiaError::invalid_data("Incomplete ADD reg,reg instruction"));
170 }
171
172 let modrm = bytes[offset];
173 let mod_bits = (modrm >> 6) & 0x03;
174 let reg = (modrm >> 3) & 0x07;
175 let rm = modrm & 0x07;
176
177 if mod_bits != 3 {
178 return Err(GaiaError::invalid_instruction(
179 "Expected register-register ADD".to_string(),
180 self.architecture.clone(),
181 ));
182 }
183
184 let dest_reg = self.decode_register(rm, has_rex, rex_prefix, false)?;
185 let src_reg = self.decode_register(reg, has_rex, rex_prefix, true)?;
186
187 let instruction = Instruction::Add { dst: Operand::Reg(dest_reg), src: Operand::Reg(src_reg) };
188 Ok((instruction, offset + 1))
189 }
190
191 fn decode_sub_reg_reg(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
192 if offset >= bytes.len() {
193 return Err(GaiaError::invalid_data("Incomplete SUB reg,reg instruction"));
194 }
195
196 let modrm = bytes[offset];
197 let mod_bits = (modrm >> 6) & 0x03;
198 let reg = (modrm >> 3) & 0x07;
199 let rm = modrm & 0x07;
200
201 if mod_bits != 3 {
202 return Err(GaiaError::invalid_instruction(
203 "Expected register-register SUB".to_string(),
204 self.architecture.clone(),
205 ));
206 }
207
208 let dest_reg = self.decode_register(rm, has_rex, rex_prefix, false)?;
209 let src_reg = self.decode_register(reg, has_rex, rex_prefix, true)?;
210
211 let instruction = Instruction::Sub { dst: Operand::Reg(dest_reg), src: Operand::Reg(src_reg) };
212 Ok((instruction, offset + 1))
213 }
214
215 fn decode_push_imm8(&self, bytes: &[u8], offset: usize) -> Result<(Instruction, usize)> {
216 if offset >= bytes.len() {
217 return Err(GaiaError::invalid_data("Incomplete PUSH imm8 instruction"));
218 }
219
220 let imm = bytes[offset] as i8 as i64;
221 let instruction = Instruction::Push { op: Operand::Imm { value: imm, size: 8 } };
222 Ok((instruction, offset + 1))
223 }
224
225 fn decode_push_imm32(&self, bytes: &[u8], offset: usize) -> Result<(Instruction, usize)> {
226 if offset + 3 >= bytes.len() {
227 return Err(GaiaError::invalid_data("Incomplete PUSH imm32 instruction"));
228 }
229
230 let imm = i32::from_le_bytes([bytes[offset], bytes[offset + 1], bytes[offset + 2], bytes[offset + 3]]) as i64;
231 let instruction = Instruction::Push { op: Operand::Imm { value: imm, size: 32 } };
232 Ok((instruction, offset + 4))
233 }
234
235 fn decode_mov_reg_imm(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
236 let opcode = bytes[offset];
237 let reg_code = opcode & 0x07;
238
239 let dest_reg = self.decode_register(reg_code, has_rex, rex_prefix, false)?;
240
241 let imm_size = if self.architecture == Architecture::X86_64 && has_rex { 64 } else { 32 };
243 let imm_bytes = if imm_size == 64 { 8 } else { 4 };
244
245 if offset + 1 + imm_bytes - 1 >= bytes.len() {
246 return Err(GaiaError::invalid_data("Incomplete MOV reg,imm instruction"));
247 }
248
249 let imm = if imm_size == 64 {
250 i64::from_le_bytes([
251 bytes[offset + 1],
252 bytes[offset + 2],
253 bytes[offset + 3],
254 bytes[offset + 4],
255 bytes[offset + 5],
256 bytes[offset + 6],
257 bytes[offset + 7],
258 bytes[offset + 8],
259 ])
260 }
261 else {
262 i32::from_le_bytes([bytes[offset + 1], bytes[offset + 2], bytes[offset + 3], bytes[offset + 4]]) as i64
263 };
264
265 let instruction = Instruction::Mov { dst: Operand::Reg(dest_reg), src: Operand::Imm { value: imm, size: imm_size as u8 } };
266 Ok((instruction, 1 + imm_bytes))
267 }
268
269 fn decode_ret(&self, _bytes: &[u8], offset: usize) -> Result<(Instruction, usize)> {
270 Ok((Instruction::Ret, offset))
271 }
272
273 fn decode_nop(&self, _bytes: &[u8], offset: usize) -> Result<(Instruction, usize)> {
274 Ok((Instruction::Nop, offset))
275 }
276
277 fn decode_register(&self, reg_code: u8, has_rex: bool, rex_prefix: u8, is_rex_r: bool) -> Result<Register> {
278 let extended = if has_rex && is_rex_r {
279 (rex_prefix & 0x04) != 0
280 }
281 else if has_rex {
282 (rex_prefix & 0x01) != 0
283 }
284 else {
285 false
286 };
287
288 let reg_index = if extended { reg_code + 8 } else { reg_code };
289
290 match self.architecture {
291 Architecture::X86 => match reg_index {
292 0 => Ok(Register::Eax),
293 1 => Ok(Register::Ecx),
294 2 => Ok(Register::Edx),
295 3 => Ok(Register::Ebx),
296 4 => Ok(Register::Esp),
297 5 => Ok(Register::Ebp),
298 6 => Ok(Register::Esi),
299 7 => Ok(Register::Edi),
300 _ => Err(GaiaError::invalid_instruction(
301 format!("Invalid register code for X86: {}", reg_index),
302 self.architecture.clone(),
303 )),
304 },
305 Architecture::X86_64 => match reg_index {
306 0 => Ok(Register::Rax),
307 1 => Ok(Register::Rcx),
308 2 => Ok(Register::Rdx),
309 3 => Ok(Register::Rbx),
310 4 => Ok(Register::Rsp),
311 5 => Ok(Register::Rbp),
312 6 => Ok(Register::Rsi),
313 7 => Ok(Register::Rdi),
314 8 => Ok(Register::R8),
315 9 => Ok(Register::R9),
316 10 => Ok(Register::R10),
317 11 => Ok(Register::R11),
318 12 => Ok(Register::R12),
319 13 => Ok(Register::R13),
320 14 => Ok(Register::R14),
321 15 => Ok(Register::R15),
322 _ => Err(GaiaError::invalid_instruction(
323 format!("Invalid register code for X86_64: {}", reg_index),
324 self.architecture.clone(),
325 )),
326 },
327 _ => Err(GaiaError::unsupported_architecture(self.architecture.clone())),
328 }
329 }
330}