x86_64_assembler/
decoder.rs

1use crate::instruction::{Instruction, Operand, Register};
2use gaia_types::{helpers::Architecture, GaiaError, Result};
3
4pub struct InstructionDecoder {
5    architecture: Architecture,
6}
7
8impl InstructionDecoder {
9    pub fn new(architecture: Architecture) -> Self {
10        Self { architecture }
11    }
12
13    pub fn decode(&self, bytes: &[u8]) -> Result<Vec<Instruction>> {
14        let mut instructions = Vec::new();
15        let mut offset = 0;
16
17        while offset < bytes.len() {
18            match self.decode_instruction(&bytes[offset..]) {
19                Ok((instruction, size)) => {
20                    instructions.push(instruction);
21                    offset += size;
22                }
23                Err(e) => {
24                    return Err(e);
25                }
26            }
27        }
28
29        Ok(instructions)
30    }
31
32    fn decode_instruction(&self, bytes: &[u8]) -> Result<(Instruction, usize)> {
33        if bytes.is_empty() {
34            return Err(GaiaError::invalid_data("Empty instruction bytes"));
35        }
36
37        let mut offset = 0;
38        let mut has_rex = false;
39        let mut rex_prefix = 0u8;
40        let mut prefix_size = 0;
41
42        // Check for REX prefix (64-bit mode)
43        if self.architecture == Architecture::X86_64 && (bytes[0] & 0xF0) == 0x40 {
44            has_rex = true;
45            rex_prefix = bytes[0];
46            offset += 1;
47            prefix_size += 1;
48        }
49
50        let opcode = bytes[offset];
51        offset += 1;
52
53        let (instruction, size) = match opcode {
54            0x89 => self.decode_mov_reg_reg(bytes, offset, has_rex, rex_prefix),
55            0x8B => self.decode_mov_reg_mem(bytes, offset, has_rex, rex_prefix),
56            0x50..=0x57 => self.decode_push_reg(bytes, offset - 1, has_rex, rex_prefix),
57            0x58..=0x5F => self.decode_pop_reg(bytes, offset - 1, has_rex, rex_prefix),
58            0x01 => self.decode_add_reg_reg(bytes, offset, has_rex, rex_prefix),
59            0x29 => self.decode_sub_reg_reg(bytes, offset, has_rex, rex_prefix),
60            0x6A => self.decode_push_imm8(bytes, offset),
61            0x68 => self.decode_push_imm32(bytes, offset),
62            0xB8..=0xBF => self.decode_mov_reg_imm(bytes, offset - 1, has_rex, rex_prefix),
63            0xC3 => self.decode_ret(bytes, offset),
64            0x90 => self.decode_nop(bytes, offset),
65            _ => Err(GaiaError::invalid_instruction(format!("Unknown opcode: 0x{:02X}", opcode), self.architecture.clone())),
66        }?;
67
68        Ok((instruction, prefix_size + size))
69    }
70
71    fn decode_mov_reg_reg(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
72        if offset >= bytes.len() {
73            return Err(GaiaError::invalid_data("Incomplete MOV reg,reg instruction"));
74        }
75
76        let modrm = bytes[offset];
77        let mod_bits = (modrm >> 6) & 0x03;
78        let reg = (modrm >> 3) & 0x07;
79        let rm = modrm & 0x07;
80
81        if mod_bits != 3 {
82            return Err(GaiaError::invalid_instruction(
83                "Expected register-register MOV".to_string(),
84                self.architecture.clone(),
85            ));
86        }
87
88        let dest_reg = self.decode_register(rm, has_rex, rex_prefix, false)?;
89        let src_reg = self.decode_register(reg, has_rex, rex_prefix, true)?;
90
91        let instruction = Instruction::Mov { dst: Operand::Reg(dest_reg), src: Operand::Reg(src_reg) };
92        Ok((instruction, offset + 1))
93    }
94
95    fn decode_mov_reg_mem(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
96        if offset >= bytes.len() {
97            return Err(GaiaError::invalid_data("Incomplete MOV reg,mem instruction"));
98        }
99
100        let modrm = bytes[offset];
101        let mod_bits = (modrm >> 6) & 0x03;
102        let reg = (modrm >> 3) & 0x07;
103        let rm = modrm & 0x07;
104
105        let dest_reg = self.decode_register(reg, has_rex, rex_prefix, false)?;
106
107        // Simplified memory operand decoding
108        if mod_bits == 0 && rm == 0x05 {
109            // [disp32]
110            if offset + 4 >= bytes.len() {
111                return Err(GaiaError::invalid_data("Incomplete displacement"));
112            }
113            let displacement = i32::from_le_bytes([bytes[offset + 1], bytes[offset + 2], bytes[offset + 3], bytes[offset + 4]]);
114            let instruction = Instruction::Mov { dst: Operand::Reg(dest_reg), src: Operand::Mem { base: None, index: None, scale: 1, displacement } };
115            Ok((instruction, offset + 5))
116        }
117        else if mod_bits == 1 {
118            // [reg + disp8]
119            if offset + 1 >= bytes.len() {
120                return Err(GaiaError::invalid_data("Incomplete displacement"));
121            }
122            let displacement = bytes[offset + 1] as i8 as i32;
123            let base_reg = self.decode_register(rm, has_rex, rex_prefix, false)?;
124            let instruction = Instruction::Mov { dst: Operand::Reg(dest_reg), src: Operand::Mem { base: Some(base_reg), index: None, scale: 1, displacement } };
125            Ok((instruction, offset + 2))
126        }
127        else if mod_bits == 2 {
128            // [reg + disp32]
129            if offset + 4 >= bytes.len() {
130                return Err(GaiaError::invalid_data("Incomplete displacement"));
131            }
132            let displacement = i32::from_le_bytes([bytes[offset + 1], bytes[offset + 2], bytes[offset + 3], bytes[offset + 4]]);
133            let base_reg = self.decode_register(rm, has_rex, rex_prefix, false)?;
134            let instruction = Instruction::Mov { dst: Operand::Reg(dest_reg), src: Operand::Mem { base: Some(base_reg), index: None, scale: 1, displacement } };
135            Ok((instruction, offset + 5))
136        }
137        else if mod_bits == 3 {
138            // Register-register (should not happen for 0x8B)
139            Err(GaiaError::invalid_instruction(
140                "Invalid addressing mode for MOV reg,mem".to_string(),
141                self.architecture.clone(),
142            ))
143        }
144        else {
145            Err(GaiaError::invalid_instruction("Complex addressing mode not supported".to_string(), self.architecture.clone()))
146        }
147    }
148
149    fn decode_push_reg(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
150        let opcode = bytes[offset];
151        let reg_code = opcode & 0x07;
152
153        let reg = self.decode_register(reg_code, has_rex, rex_prefix, false)?;
154        let instruction = Instruction::Push { op: Operand::Reg(reg) };
155        Ok((instruction, offset + 1))
156    }
157
158    fn decode_pop_reg(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
159        let opcode = bytes[offset];
160        let reg_code = opcode & 0x07;
161
162        let reg = self.decode_register(reg_code, has_rex, rex_prefix, false)?;
163        let instruction = Instruction::Pop { dst: Operand::Reg(reg) };
164        Ok((instruction, offset + 1))
165    }
166
167    fn decode_add_reg_reg(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
168        if offset >= bytes.len() {
169            return Err(GaiaError::invalid_data("Incomplete ADD reg,reg instruction"));
170        }
171
172        let modrm = bytes[offset];
173        let mod_bits = (modrm >> 6) & 0x03;
174        let reg = (modrm >> 3) & 0x07;
175        let rm = modrm & 0x07;
176
177        if mod_bits != 3 {
178            return Err(GaiaError::invalid_instruction(
179                "Expected register-register ADD".to_string(),
180                self.architecture.clone(),
181            ));
182        }
183
184        let dest_reg = self.decode_register(rm, has_rex, rex_prefix, false)?;
185        let src_reg = self.decode_register(reg, has_rex, rex_prefix, true)?;
186
187        let instruction = Instruction::Add { dst: Operand::Reg(dest_reg), src: Operand::Reg(src_reg) };
188        Ok((instruction, offset + 1))
189    }
190
191    fn decode_sub_reg_reg(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
192        if offset >= bytes.len() {
193            return Err(GaiaError::invalid_data("Incomplete SUB reg,reg instruction"));
194        }
195
196        let modrm = bytes[offset];
197        let mod_bits = (modrm >> 6) & 0x03;
198        let reg = (modrm >> 3) & 0x07;
199        let rm = modrm & 0x07;
200
201        if mod_bits != 3 {
202            return Err(GaiaError::invalid_instruction(
203                "Expected register-register SUB".to_string(),
204                self.architecture.clone(),
205            ));
206        }
207
208        let dest_reg = self.decode_register(rm, has_rex, rex_prefix, false)?;
209        let src_reg = self.decode_register(reg, has_rex, rex_prefix, true)?;
210
211        let instruction = Instruction::Sub { dst: Operand::Reg(dest_reg), src: Operand::Reg(src_reg) };
212        Ok((instruction, offset + 1))
213    }
214
215    fn decode_push_imm8(&self, bytes: &[u8], offset: usize) -> Result<(Instruction, usize)> {
216        if offset >= bytes.len() {
217            return Err(GaiaError::invalid_data("Incomplete PUSH imm8 instruction"));
218        }
219
220        let imm = bytes[offset] as i8 as i64;
221        let instruction = Instruction::Push { op: Operand::Imm { value: imm, size: 8 } };
222        Ok((instruction, offset + 1))
223    }
224
225    fn decode_push_imm32(&self, bytes: &[u8], offset: usize) -> Result<(Instruction, usize)> {
226        if offset + 3 >= bytes.len() {
227            return Err(GaiaError::invalid_data("Incomplete PUSH imm32 instruction"));
228        }
229
230        let imm = i32::from_le_bytes([bytes[offset], bytes[offset + 1], bytes[offset + 2], bytes[offset + 3]]) as i64;
231        let instruction = Instruction::Push { op: Operand::Imm { value: imm, size: 32 } };
232        Ok((instruction, offset + 4))
233    }
234
235    fn decode_mov_reg_imm(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
236        let opcode = bytes[offset];
237        let reg_code = opcode & 0x07;
238
239        let dest_reg = self.decode_register(reg_code, has_rex, rex_prefix, false)?;
240
241        // Determine immediate size based on architecture
242        let imm_size = if self.architecture == Architecture::X86_64 && has_rex { 64 } else { 32 };
243        let imm_bytes = if imm_size == 64 { 8 } else { 4 };
244
245        if offset + 1 + imm_bytes - 1 >= bytes.len() {
246            return Err(GaiaError::invalid_data("Incomplete MOV reg,imm instruction"));
247        }
248
249        let imm = if imm_size == 64 {
250            i64::from_le_bytes([
251                bytes[offset + 1],
252                bytes[offset + 2],
253                bytes[offset + 3],
254                bytes[offset + 4],
255                bytes[offset + 5],
256                bytes[offset + 6],
257                bytes[offset + 7],
258                bytes[offset + 8],
259            ])
260        }
261        else {
262            i32::from_le_bytes([bytes[offset + 1], bytes[offset + 2], bytes[offset + 3], bytes[offset + 4]]) as i64
263        };
264
265        let instruction = Instruction::Mov { dst: Operand::Reg(dest_reg), src: Operand::Imm { value: imm, size: imm_size as u8 } };
266        Ok((instruction, 1 + imm_bytes))
267    }
268
269    fn decode_ret(&self, _bytes: &[u8], offset: usize) -> Result<(Instruction, usize)> {
270        Ok((Instruction::Ret, offset))
271    }
272
273    fn decode_nop(&self, _bytes: &[u8], offset: usize) -> Result<(Instruction, usize)> {
274        Ok((Instruction::Nop, offset))
275    }
276
277    fn decode_register(&self, reg_code: u8, has_rex: bool, rex_prefix: u8, is_rex_r: bool) -> Result<Register> {
278        let extended = if has_rex && is_rex_r {
279            (rex_prefix & 0x04) != 0
280        }
281        else if has_rex {
282            (rex_prefix & 0x01) != 0
283        }
284        else {
285            false
286        };
287
288        let reg_index = if extended { reg_code + 8 } else { reg_code };
289
290        match self.architecture {
291            Architecture::X86 => match reg_index {
292                0 => Ok(Register::Eax),
293                1 => Ok(Register::Ecx),
294                2 => Ok(Register::Edx),
295                3 => Ok(Register::Ebx),
296                4 => Ok(Register::Esp),
297                5 => Ok(Register::Ebp),
298                6 => Ok(Register::Esi),
299                7 => Ok(Register::Edi),
300                _ => Err(GaiaError::invalid_instruction(
301                    format!("Invalid register code for X86: {}", reg_index),
302                    self.architecture.clone(),
303                )),
304            },
305            Architecture::X86_64 => match reg_index {
306                0 => Ok(Register::Rax),
307                1 => Ok(Register::Rcx),
308                2 => Ok(Register::Rdx),
309                3 => Ok(Register::Rbx),
310                4 => Ok(Register::Rsp),
311                5 => Ok(Register::Rbp),
312                6 => Ok(Register::Rsi),
313                7 => Ok(Register::Rdi),
314                8 => Ok(Register::R8),
315                9 => Ok(Register::R9),
316                10 => Ok(Register::R10),
317                11 => Ok(Register::R11),
318                12 => Ok(Register::R12),
319                13 => Ok(Register::R13),
320                14 => Ok(Register::R14),
321                15 => Ok(Register::R15),
322                _ => Err(GaiaError::invalid_instruction(
323                    format!("Invalid register code for X86_64: {}", reg_index),
324                    self.architecture.clone(),
325                )),
326            },
327            _ => Err(GaiaError::unsupported_architecture(self.architecture.clone())),
328        }
329    }
330}