wraith/manipulation/inline_hook/asm/
iced_decoder.rs

1//! Instruction decoding using iced-x86
2//!
3//! Provides comprehensive instruction decoding with full support for
4//! all x86/x64 instructions, including proper handling of:
5//! - Instruction lengths
6//! - Relative addressing (RIP-relative, branches)
7//! - Control flow analysis
8//! - Instruction operand information
9
10#[cfg(all(not(feature = "std"), feature = "alloc"))]
11use alloc::vec::Vec;
12
13#[cfg(feature = "std")]
14use std::vec::Vec;
15
16use iced_x86::{Decoder, DecoderOptions, FlowControl, Instruction, OpKind};
17
18/// decoded instruction with full metadata
19#[derive(Debug, Clone)]
20pub struct DecodedInstruction {
21    /// the raw iced-x86 instruction
22    pub inner: Instruction,
23    /// instruction length in bytes
24    pub length: usize,
25    /// whether instruction uses relative addressing that needs relocation
26    pub is_relative: bool,
27    /// whether this is a control flow instruction (branch, call, ret)
28    pub is_control_flow: bool,
29    /// for relative instructions, the computed absolute target address
30    pub branch_target: Option<u64>,
31    /// whether this is a RIP-relative memory access
32    pub is_rip_relative: bool,
33    /// for RIP-relative access, the displacement offset within the instruction
34    pub rip_disp_offset: Option<usize>,
35    /// for RIP-relative access, the displacement size
36    pub rip_disp_size: Option<usize>,
37    /// computed absolute address for RIP-relative access
38    pub rip_target: Option<u64>,
39}
40
41impl DecodedInstruction {
42    /// check if instruction can be safely relocated
43    pub fn is_relocatable(&self) -> bool {
44        if !self.is_relative && !self.is_rip_relative {
45            return true;
46        }
47
48        match self.inner.flow_control() {
49            FlowControl::Return | FlowControl::Exception | FlowControl::Interrupt => false,
50            FlowControl::IndirectBranch | FlowControl::IndirectCall => {
51                self.is_rip_relative
52            }
53            _ => true,
54        }
55    }
56
57    /// get the instruction mnemonic
58    pub fn mnemonic(&self) -> iced_x86::Mnemonic {
59        self.inner.mnemonic()
60    }
61}
62
63/// instruction decoder using iced-x86
64pub struct InstructionDecoder {
65    bitness: u32,
66}
67
68impl InstructionDecoder {
69    /// create decoder for current architecture
70    #[cfg(target_arch = "x86_64")]
71    pub fn native() -> Self {
72        Self { bitness: 64 }
73    }
74
75    #[cfg(target_arch = "x86")]
76    pub fn native() -> Self {
77        Self { bitness: 32 }
78    }
79
80    /// create 64-bit decoder
81    pub fn x64() -> Self {
82        Self { bitness: 64 }
83    }
84
85    /// create 32-bit decoder
86    pub fn x86() -> Self {
87        Self { bitness: 32 }
88    }
89
90    /// decode a single instruction at the given address
91    pub fn decode_at(&self, address: usize, bytes: &[u8]) -> Option<DecodedInstruction> {
92        if bytes.is_empty() {
93            return None;
94        }
95
96        let mut decoder = Decoder::with_ip(
97            self.bitness,
98            bytes,
99            address as u64,
100            DecoderOptions::NONE,
101        );
102
103        if !decoder.can_decode() {
104            return None;
105        }
106
107        let instruction = decoder.decode();
108        if instruction.is_invalid() {
109            return None;
110        }
111
112        Some(self.analyze_instruction(instruction, address))
113    }
114
115    /// decode all instructions in the byte slice
116    pub fn decode_all(&self, address: usize, bytes: &[u8]) -> Vec<DecodedInstruction> {
117        let mut result = Vec::new();
118        let mut decoder = Decoder::with_ip(
119            self.bitness,
120            bytes,
121            address as u64,
122            DecoderOptions::NONE,
123        );
124
125        while decoder.can_decode() {
126            let instruction = decoder.decode();
127            if instruction.is_invalid() {
128                break;
129            }
130            result.push(self.analyze_instruction(instruction, instruction.ip() as usize));
131        }
132
133        result
134    }
135
136    /// decode instructions until we have at least min_bytes
137    pub fn decode_until_size(
138        &self,
139        address: usize,
140        bytes: &[u8],
141        min_bytes: usize,
142    ) -> Vec<DecodedInstruction> {
143        let mut result = Vec::new();
144        let mut total_size = 0;
145
146        let mut decoder = Decoder::with_ip(
147            self.bitness,
148            bytes,
149            address as u64,
150            DecoderOptions::NONE,
151        );
152
153        while decoder.can_decode() && total_size < min_bytes {
154            let instruction = decoder.decode();
155            if instruction.is_invalid() {
156                break;
157            }
158            let decoded = self.analyze_instruction(instruction, instruction.ip() as usize);
159            total_size += decoded.length;
160            result.push(decoded);
161        }
162
163        result
164    }
165
166    /// find instruction boundary at or after required_size
167    pub fn find_boundary(&self, address: usize, bytes: &[u8], required_size: usize) -> Option<usize> {
168        let instructions = self.decode_until_size(address, bytes, required_size);
169        if instructions.is_empty() {
170            return None;
171        }
172
173        let total: usize = instructions.iter().map(|i| i.length).sum();
174        if total >= required_size {
175            Some(total)
176        } else {
177            None
178        }
179    }
180
181    fn analyze_instruction(&self, instruction: Instruction, address: usize) -> DecodedInstruction {
182        let length = instruction.len();
183        let flow = instruction.flow_control();
184
185        let is_control_flow = matches!(
186            flow,
187            FlowControl::UnconditionalBranch
188                | FlowControl::ConditionalBranch
189                | FlowControl::Call
190                | FlowControl::IndirectBranch
191                | FlowControl::IndirectCall
192                | FlowControl::Return
193                | FlowControl::Interrupt
194                | FlowControl::XbeginXabortXend
195                | FlowControl::Exception
196        );
197
198        let mut is_relative = false;
199        let mut branch_target = None;
200
201        match flow {
202            FlowControl::UnconditionalBranch
203            | FlowControl::ConditionalBranch
204            | FlowControl::Call => {
205                if instruction.op0_kind() == OpKind::NearBranch16
206                    || instruction.op0_kind() == OpKind::NearBranch32
207                    || instruction.op0_kind() == OpKind::NearBranch64
208                {
209                    is_relative = true;
210                    branch_target = Some(instruction.near_branch_target());
211                }
212            }
213            FlowControl::IndirectBranch | FlowControl::IndirectCall => {
214                // check if using RIP-relative addressing
215            }
216            _ => {}
217        }
218
219        // check for RIP-relative memory access
220        let mut is_rip_relative = false;
221        let mut rip_disp_offset = None;
222        let mut rip_disp_size = None;
223        let mut rip_target = None;
224
225        if self.bitness == 64 {
226            for i in 0..instruction.op_count() {
227                if instruction.op_kind(i) == OpKind::Memory {
228                    if instruction.is_ip_rel_memory_operand() {
229                        is_rip_relative = true;
230                        is_relative = true;
231
232                        // calculate displacement offset and target
233                        let disp = instruction.memory_displacement64();
234                        rip_target = Some(instruction.ip_rel_memory_address());
235
236                        // find displacement offset in instruction bytes
237                        // for RIP-relative, displacement is always 4 bytes and comes
238                        // after ModR/M (and optionally SIB)
239                        rip_disp_size = Some(4);
240
241                        // the displacement offset is instruction length - 4 (for disp32)
242                        // minus any immediate operand size
243                        let imm_size = get_immediate_size(&instruction);
244                        if length > 4 + imm_size {
245                            rip_disp_offset = Some(length - 4 - imm_size);
246                        }
247                    }
248                    break;
249                }
250            }
251        }
252
253        DecodedInstruction {
254            inner: instruction,
255            length,
256            is_relative,
257            is_control_flow,
258            branch_target,
259            is_rip_relative,
260            rip_disp_offset,
261            rip_disp_size,
262            rip_target,
263        }
264    }
265}
266
267fn get_immediate_size(instruction: &Instruction) -> usize {
268    for i in 0..instruction.op_count() {
269        match instruction.op_kind(i) {
270            OpKind::Immediate8 | OpKind::Immediate8_2nd | OpKind::Immediate8to16
271            | OpKind::Immediate8to32 | OpKind::Immediate8to64 => return 1,
272            OpKind::Immediate16 => return 2,
273            OpKind::Immediate32 | OpKind::Immediate32to64 => return 4,
274            OpKind::Immediate64 => return 8,
275            _ => {}
276        }
277    }
278    0
279}
280
281/// convenience function to decode a single instruction
282pub fn decode_one(address: usize, bytes: &[u8]) -> Option<DecodedInstruction> {
283    InstructionDecoder::native().decode_at(address, bytes)
284}
285
286/// convenience function to find instruction boundary
287pub fn find_instruction_boundary(address: usize, bytes: &[u8], required_size: usize) -> Option<usize> {
288    InstructionDecoder::native().find_boundary(address, bytes, required_size)
289}
290
291/// check if instruction at address uses relative addressing
292pub fn uses_relative_addressing(address: usize, bytes: &[u8]) -> bool {
293    decode_one(address, bytes)
294        .map(|i| i.is_relative)
295        .unwrap_or(false)
296}
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301
302    #[test]
303    fn test_decode_nop() {
304        let decoder = InstructionDecoder::x64();
305        let nop = [0x90u8];
306        let decoded = decoder.decode_at(0x1000, &nop).unwrap();
307
308        assert_eq!(decoded.length, 1);
309        assert!(!decoded.is_relative);
310        assert!(!decoded.is_control_flow);
311    }
312
313    #[test]
314    fn test_decode_jmp_rel32() {
315        let decoder = InstructionDecoder::x64();
316        // jmp +0x100 from 0x1000 -> target 0x1105
317        let jmp = [0xE9, 0x00, 0x01, 0x00, 0x00];
318        let decoded = decoder.decode_at(0x1000, &jmp).unwrap();
319
320        assert_eq!(decoded.length, 5);
321        assert!(decoded.is_relative);
322        assert!(decoded.is_control_flow);
323        assert_eq!(decoded.branch_target, Some(0x1105));
324    }
325
326    #[test]
327    fn test_decode_call_rel32() {
328        let decoder = InstructionDecoder::x64();
329        // call +0 from 0x1000 -> target 0x1005
330        let call = [0xE8, 0x00, 0x00, 0x00, 0x00];
331        let decoded = decoder.decode_at(0x1000, &call).unwrap();
332
333        assert_eq!(decoded.length, 5);
334        assert!(decoded.is_relative);
335        assert!(decoded.is_control_flow);
336        assert_eq!(decoded.branch_target, Some(0x1005));
337    }
338
339    #[test]
340    fn test_decode_push_rbp() {
341        let decoder = InstructionDecoder::x64();
342        let push = [0x55u8];
343        let decoded = decoder.decode_at(0x1000, &push).unwrap();
344
345        assert_eq!(decoded.length, 1);
346        assert!(!decoded.is_relative);
347        assert!(!decoded.is_control_flow);
348    }
349
350    #[test]
351    fn test_decode_mov_rbp_rsp() {
352        let decoder = InstructionDecoder::x64();
353        // mov rbp, rsp = 48 89 E5
354        let mov = [0x48, 0x89, 0xE5];
355        let decoded = decoder.decode_at(0x1000, &mov).unwrap();
356
357        assert_eq!(decoded.length, 3);
358        assert!(!decoded.is_relative);
359    }
360
361    #[test]
362    fn test_decode_sub_rsp_imm8() {
363        let decoder = InstructionDecoder::x64();
364        // sub rsp, 0x28 = 48 83 EC 28
365        let sub = [0x48, 0x83, 0xEC, 0x28];
366        let decoded = decoder.decode_at(0x1000, &sub).unwrap();
367
368        assert_eq!(decoded.length, 4);
369        assert!(!decoded.is_relative);
370    }
371
372    #[cfg(target_arch = "x86_64")]
373    #[test]
374    fn test_decode_rip_relative() {
375        let decoder = InstructionDecoder::x64();
376        // mov rax, [rip+0x12345678]
377        // 48 8B 05 78 56 34 12
378        let mov = [0x48, 0x8B, 0x05, 0x78, 0x56, 0x34, 0x12];
379        let decoded = decoder.decode_at(0x1000, &mov).unwrap();
380
381        assert_eq!(decoded.length, 7);
382        assert!(decoded.is_rip_relative);
383        assert!(decoded.is_relative);
384        // target = IP + insn_len + disp = 0x1000 + 7 + 0x12345678 = 0x1234667F
385        assert_eq!(decoded.rip_target, Some(0x1234667F));
386    }
387
388    #[test]
389    fn test_find_boundary() {
390        let decoder = InstructionDecoder::x64();
391        // typical x64 prologue: push rbp; mov rbp, rsp; sub rsp, 0x28
392        let prologue = [0x55, 0x48, 0x89, 0xE5, 0x48, 0x83, 0xEC, 0x28];
393
394        let boundary = decoder.find_boundary(0x1000, &prologue, 5).unwrap();
395        assert!(boundary >= 5);
396        assert!(boundary <= 8);
397    }
398
399    #[test]
400    fn test_decode_conditional_jump() {
401        let decoder = InstructionDecoder::x64();
402        // jz +0x10 (short)
403        let jz_short = [0x74, 0x10];
404        let decoded = decoder.decode_at(0x1000, &jz_short).unwrap();
405
406        assert_eq!(decoded.length, 2);
407        assert!(decoded.is_relative);
408        assert!(decoded.is_control_flow);
409        assert_eq!(decoded.branch_target, Some(0x1012));
410    }
411
412    #[test]
413    fn test_decode_long_conditional_jump() {
414        let decoder = InstructionDecoder::x64();
415        // jz +0x100 (near)
416        let jz_near = [0x0F, 0x84, 0x00, 0x01, 0x00, 0x00];
417        let decoded = decoder.decode_at(0x1000, &jz_near).unwrap();
418
419        assert_eq!(decoded.length, 6);
420        assert!(decoded.is_relative);
421        assert!(decoded.is_control_flow);
422        assert_eq!(decoded.branch_target, Some(0x1106));
423    }
424}