Skip to main content

x86_64_assembler/decoder/
mod.rs

1#![doc = include_str!("readme.md")]
2use crate::instruction::{Instruction, Operand, Register};
3use gaia_types::{helpers::Architecture, GaiaError, Result};
4/// 指令解码器,用于将字节码解码为指令
5#[derive(Debug, Clone)]
6pub struct InstructionDecoder {
7    architecture: Architecture,
8}
9
10impl InstructionDecoder {
11    /// 创建新的指令解码器
12    pub fn new(architecture: Architecture) -> Self {
13        Self { architecture }
14    }
15
16    /// 解码字节码为指令序列
17    pub fn decode(&self, bytes: &[u8]) -> Result<Vec<Instruction>> {
18        let mut instructions = Vec::new();
19        let mut offset = 0;
20
21        while offset < bytes.len() {
22            match self.decode_instruction(&bytes[offset..]) {
23                Ok((instruction, size)) => {
24                    instructions.push(instruction);
25                    offset += size;
26                }
27                Err(e) => {
28                    return Err(e);
29                }
30            }
31        }
32
33        Ok(instructions)
34    }
35
36    fn decode_instruction(&self, bytes: &[u8]) -> Result<(Instruction, usize)> {
37        if bytes.is_empty() {
38            return Err(GaiaError::invalid_data("Empty instruction bytes"));
39        }
40
41        let mut offset = 0;
42        let mut has_rex = false;
43        let mut rex_prefix = 0u8;
44        let mut prefix_size = 0;
45
46        // Check for REX prefix (64-bit mode)
47        if self.architecture == Architecture::X86_64 && (bytes[0] & 0xF0) == 0x40 {
48            has_rex = true;
49            rex_prefix = bytes[0];
50            offset += 1;
51            prefix_size += 1;
52        }
53
54        let opcode = bytes[offset];
55        offset += 1;
56
57        let (instruction, size) = match opcode {
58            0x89 => self.decode_mov_reg_reg(bytes, offset, has_rex, rex_prefix),
59            0x8B => self.decode_mov_reg_mem(bytes, offset, has_rex, rex_prefix),
60            0x50..=0x57 => self.decode_push_reg(bytes, offset - 1, has_rex, rex_prefix),
61            0x58..=0x5F => self.decode_pop_reg(bytes, offset - 1, has_rex, rex_prefix),
62            0x01 => self.decode_add_reg_reg(bytes, offset, has_rex, rex_prefix),
63            0x29 => self.decode_sub_reg_reg(bytes, offset, has_rex, rex_prefix),
64            0x6A => self.decode_push_imm8(bytes, offset),
65            0x68 => self.decode_push_imm32(bytes, offset),
66            0xB8..=0xBF => self.decode_mov_reg_imm(bytes, offset - 1, has_rex, rex_prefix),
67            0xC3 => self.decode_ret(bytes, offset),
68            0x90 => self.decode_nop(bytes, offset),
69            _ => Err(GaiaError::invalid_instruction(format!("Unknown opcode: 0x{:02X}", opcode), self.architecture.clone())),
70        }?;
71
72        Ok((instruction, prefix_size + size))
73    }
74
75    fn decode_mov_reg_reg(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
76        if offset >= bytes.len() {
77            return Err(GaiaError::invalid_data("Incomplete MOV reg,reg instruction"));
78        }
79
80        let modrm = bytes[offset];
81        let mod_bits = (modrm >> 6) & 0x03;
82        let reg = (modrm >> 3) & 0x07;
83        let rm = modrm & 0x07;
84
85        if mod_bits != 3 {
86            return Err(GaiaError::invalid_instruction(
87                "Expected register-register MOV".to_string(),
88                self.architecture.clone(),
89            ));
90        }
91
92        let dest_reg = self.decode_register(rm, has_rex, rex_prefix, false)?;
93        let src_reg = self.decode_register(reg, has_rex, rex_prefix, true)?;
94
95        let instruction = Instruction::Mov { dst: Operand::Reg(dest_reg), src: Operand::Reg(src_reg) };
96        Ok((instruction, offset + 1))
97    }
98
99    fn decode_mov_reg_mem(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
100        if offset >= bytes.len() {
101            return Err(GaiaError::invalid_data("Incomplete MOV reg,mem instruction"));
102        }
103
104        let modrm = bytes[offset];
105        let mod_bits = (modrm >> 6) & 0x03;
106        let reg = (modrm >> 3) & 0x07;
107        let rm = modrm & 0x07;
108
109        let dest_reg = self.decode_register(reg, has_rex, rex_prefix, false)?;
110
111        // Simplified memory operand decoding
112        if mod_bits == 0 && rm == 0x05 {
113            // [disp32]
114            if offset + 4 >= bytes.len() {
115                return Err(GaiaError::invalid_data("Incomplete displacement"));
116            }
117            let displacement = i32::from_le_bytes([bytes[offset + 1], bytes[offset + 2], bytes[offset + 3], bytes[offset + 4]]);
118            let instruction = Instruction::Mov {
119                dst: Operand::Reg(dest_reg),
120                src: Operand::Mem { base: None, index: None, scale: 1, displacement },
121            };
122            Ok((instruction, offset + 5))
123        }
124        else if mod_bits == 1 {
125            // [reg + disp8]
126            if offset + 1 >= bytes.len() {
127                return Err(GaiaError::invalid_data("Incomplete displacement"));
128            }
129            let displacement = bytes[offset + 1] as i8 as i32;
130            let base_reg = self.decode_register(rm, has_rex, rex_prefix, false)?;
131            let instruction = Instruction::Mov {
132                dst: Operand::Reg(dest_reg),
133                src: Operand::Mem { base: Some(base_reg), index: None, scale: 1, displacement },
134            };
135            Ok((instruction, offset + 2))
136        }
137        else if mod_bits == 2 {
138            // [reg + disp32]
139            if offset + 4 >= bytes.len() {
140                return Err(GaiaError::invalid_data("Incomplete displacement"));
141            }
142            let displacement = i32::from_le_bytes([bytes[offset + 1], bytes[offset + 2], bytes[offset + 3], bytes[offset + 4]]);
143            let base_reg = self.decode_register(rm, has_rex, rex_prefix, false)?;
144            let instruction = Instruction::Mov {
145                dst: Operand::Reg(dest_reg),
146                src: Operand::Mem { base: Some(base_reg), index: None, scale: 1, displacement },
147            };
148            Ok((instruction, offset + 5))
149        }
150        else if mod_bits == 3 {
151            // Register-register (should not happen for 0x8B)
152            Err(GaiaError::invalid_instruction(
153                "Invalid addressing mode for MOV reg,mem".to_string(),
154                self.architecture.clone(),
155            ))
156        }
157        else {
158            Err(GaiaError::invalid_instruction("Complex addressing mode not supported".to_string(), self.architecture.clone()))
159        }
160    }
161
162    fn decode_push_reg(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
163        let opcode = bytes[offset];
164        let reg_code = opcode & 0x07;
165
166        let reg = self.decode_register(reg_code, has_rex, rex_prefix, false)?;
167        let instruction = Instruction::Push { op: Operand::Reg(reg) };
168        Ok((instruction, offset + 1))
169    }
170
171    fn decode_pop_reg(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
172        let opcode = bytes[offset];
173        let reg_code = opcode & 0x07;
174
175        let reg = self.decode_register(reg_code, has_rex, rex_prefix, false)?;
176        let instruction = Instruction::Pop { dst: Operand::Reg(reg) };
177        Ok((instruction, offset + 1))
178    }
179
180    fn decode_add_reg_reg(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
181        if offset >= bytes.len() {
182            return Err(GaiaError::invalid_data("Incomplete ADD reg,reg instruction"));
183        }
184
185        let modrm = bytes[offset];
186        let mod_bits = (modrm >> 6) & 0x03;
187        let reg = (modrm >> 3) & 0x07;
188        let rm = modrm & 0x07;
189
190        if mod_bits != 3 {
191            return Err(GaiaError::invalid_instruction(
192                "Expected register-register ADD".to_string(),
193                self.architecture.clone(),
194            ));
195        }
196
197        let dest_reg = self.decode_register(rm, has_rex, rex_prefix, false)?;
198        let src_reg = self.decode_register(reg, has_rex, rex_prefix, true)?;
199
200        let instruction = Instruction::Add { dst: Operand::Reg(dest_reg), src: Operand::Reg(src_reg) };
201        Ok((instruction, offset + 1))
202    }
203
204    fn decode_sub_reg_reg(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
205        if offset >= bytes.len() {
206            return Err(GaiaError::invalid_data("Incomplete SUB reg,reg instruction"));
207        }
208
209        let modrm = bytes[offset];
210        let mod_bits = (modrm >> 6) & 0x03;
211        let reg = (modrm >> 3) & 0x07;
212        let rm = modrm & 0x07;
213
214        if mod_bits != 3 {
215            return Err(GaiaError::invalid_instruction(
216                "Expected register-register SUB".to_string(),
217                self.architecture.clone(),
218            ));
219        }
220
221        let dest_reg = self.decode_register(rm, has_rex, rex_prefix, false)?;
222        let src_reg = self.decode_register(reg, has_rex, rex_prefix, true)?;
223
224        let instruction = Instruction::Sub { dst: Operand::Reg(dest_reg), src: Operand::Reg(src_reg) };
225        Ok((instruction, offset + 1))
226    }
227
228    fn decode_push_imm8(&self, bytes: &[u8], offset: usize) -> Result<(Instruction, usize)> {
229        if offset >= bytes.len() {
230            return Err(GaiaError::invalid_data("Incomplete PUSH imm8 instruction"));
231        }
232
233        let imm = bytes[offset] as i8 as i64;
234        let instruction = Instruction::Push { op: Operand::Imm { value: imm, size: 8 } };
235        Ok((instruction, offset + 1))
236    }
237
238    fn decode_push_imm32(&self, bytes: &[u8], offset: usize) -> Result<(Instruction, usize)> {
239        if offset + 3 >= bytes.len() {
240            return Err(GaiaError::invalid_data("Incomplete PUSH imm32 instruction"));
241        }
242
243        let imm = i32::from_le_bytes([bytes[offset], bytes[offset + 1], bytes[offset + 2], bytes[offset + 3]]) as i64;
244        let instruction = Instruction::Push { op: Operand::Imm { value: imm, size: 32 } };
245        Ok((instruction, offset + 4))
246    }
247
248    fn decode_mov_reg_imm(&self, bytes: &[u8], offset: usize, has_rex: bool, rex_prefix: u8) -> Result<(Instruction, usize)> {
249        let opcode = bytes[offset];
250        let reg_code = opcode & 0x07;
251
252        let dest_reg = self.decode_register(reg_code, has_rex, rex_prefix, false)?;
253
254        // Determine immediate size based on architecture
255        let imm_size = if self.architecture == Architecture::X86_64 && has_rex { 64 } else { 32 };
256        let imm_bytes = if imm_size == 64 { 8 } else { 4 };
257
258        if offset + 1 + imm_bytes - 1 >= bytes.len() {
259            return Err(GaiaError::invalid_data("Incomplete MOV reg,imm instruction"));
260        }
261
262        let imm = if imm_size == 64 {
263            i64::from_le_bytes([
264                bytes[offset + 1],
265                bytes[offset + 2],
266                bytes[offset + 3],
267                bytes[offset + 4],
268                bytes[offset + 5],
269                bytes[offset + 6],
270                bytes[offset + 7],
271                bytes[offset + 8],
272            ])
273        }
274        else {
275            i32::from_le_bytes([bytes[offset + 1], bytes[offset + 2], bytes[offset + 3], bytes[offset + 4]]) as i64
276        };
277
278        let instruction =
279            Instruction::Mov { dst: Operand::Reg(dest_reg), src: Operand::Imm { value: imm, size: imm_size as u8 } };
280        Ok((instruction, 1 + imm_bytes))
281    }
282
283    fn decode_ret(&self, _bytes: &[u8], offset: usize) -> Result<(Instruction, usize)> {
284        Ok((Instruction::Ret, offset))
285    }
286
287    fn decode_nop(&self, _bytes: &[u8], offset: usize) -> Result<(Instruction, usize)> {
288        Ok((Instruction::Nop, offset))
289    }
290
291    fn decode_register(&self, reg_code: u8, has_rex: bool, rex_prefix: u8, is_rex_r: bool) -> Result<Register> {
292        let extended = if has_rex && is_rex_r {
293            (rex_prefix & 0x04) != 0
294        }
295        else if has_rex {
296            (rex_prefix & 0x01) != 0
297        }
298        else {
299            false
300        };
301
302        let reg_index = if extended { reg_code + 8 } else { reg_code };
303
304        match self.architecture {
305            Architecture::X86 => match reg_index {
306                0 => Ok(Register::EAX),
307                1 => Ok(Register::ECX),
308                2 => Ok(Register::EDX),
309                3 => Ok(Register::EBX),
310                4 => Ok(Register::ESP),
311                5 => Ok(Register::EBP),
312                6 => Ok(Register::ESI),
313                7 => Ok(Register::EDI),
314                _ => Err(GaiaError::invalid_instruction(
315                    format!("Invalid register code for X86: {}", reg_index),
316                    self.architecture.clone(),
317                )),
318            },
319            Architecture::X86_64 => match reg_index {
320                0 => Ok(Register::RAX),
321                1 => Ok(Register::RCX),
322                2 => Ok(Register::RDX),
323                3 => Ok(Register::RBX),
324                4 => Ok(Register::RSP),
325                5 => Ok(Register::RBP),
326                6 => Ok(Register::RSI),
327                7 => Ok(Register::RDI),
328                8 => Ok(Register::R8),
329                9 => Ok(Register::R9),
330                10 => Ok(Register::R10),
331                11 => Ok(Register::R11),
332                12 => Ok(Register::R12),
333                13 => Ok(Register::R13),
334                14 => Ok(Register::R14),
335                15 => Ok(Register::R15),
336                _ => Err(GaiaError::invalid_instruction(
337                    format!("Invalid register code for X86_64: {}", reg_index),
338                    self.architecture.clone(),
339                )),
340            },
341            _ => Err(GaiaError::unsupported_architecture(self.architecture.clone())),
342        }
343    }
344}