wraith/manipulation/inline_hook/asm/
decoder.rs

1//! Instruction length decoder
2//!
3//! Provides instruction length detection for x86/x64 instructions,
4//! used to find safe hook points at instruction boundaries.
5
6/// information about a decoded instruction
7#[derive(Debug, Clone)]
8pub struct InstructionInfo {
9    /// length of the instruction in bytes
10    pub length: usize,
11    /// whether the instruction uses relative addressing
12    pub is_relative: bool,
13    /// whether the instruction is a control flow instruction (jmp, call, ret)
14    pub is_control_flow: bool,
15    /// for relative instructions, the computed target address (if calculable)
16    pub relative_target: Option<usize>,
17}
18
19/// decode instruction at given address
20///
21/// returns instruction info or None if decoding fails
22#[cfg(target_arch = "x86_64")]
23pub fn decode_instruction(address: usize, max_bytes: usize) -> Option<InstructionInfo> {
24    if max_bytes == 0 {
25        return None;
26    }
27
28    // SAFETY: caller must ensure address is valid readable memory
29    let code = unsafe { core::slice::from_raw_parts(address as *const u8, max_bytes.min(15)) };
30
31    decode_x64(code)
32}
33
34#[cfg(target_arch = "x86")]
35pub fn decode_instruction(address: usize, max_bytes: usize) -> Option<InstructionInfo> {
36    if max_bytes == 0 {
37        return None;
38    }
39
40    let code = unsafe { core::slice::from_raw_parts(address as *const u8, max_bytes.min(15)) };
41
42    decode_x86(code)
43}
44
45/// decode x64 instruction from bytes
46#[cfg(target_arch = "x86_64")]
47fn decode_x64(code: &[u8]) -> Option<InstructionInfo> {
48    if code.is_empty() {
49        return None;
50    }
51
52    let mut offset = 0;
53
54    // skip legacy prefixes (group 1-4)
55    while offset < code.len() {
56        match code[offset] {
57            // segment overrides
58            0x26 | 0x2E | 0x36 | 0x3E | 0x64 | 0x65 => offset += 1,
59            // operand size / address size
60            0x66 | 0x67 => offset += 1,
61            // lock / rep
62            0xF0 | 0xF2 | 0xF3 => offset += 1,
63            _ => break,
64        }
65    }
66
67    if offset >= code.len() {
68        return None;
69    }
70
71    // check for REX prefix (0x40-0x4F)
72    let has_rex = (0x40..=0x4F).contains(&code[offset]);
73    let rex_w = has_rex && (code[offset] & 0x08) != 0;
74    if has_rex {
75        offset += 1;
76    }
77
78    if offset >= code.len() {
79        return None;
80    }
81
82    let opcode = code[offset];
83    offset += 1;
84
85    match opcode {
86        // single-byte instructions
87        0x50..=0x5F => Some(InstructionInfo {
88            length: offset,
89            is_relative: false,
90            is_control_flow: false,
91            relative_target: None,
92        }),
93
94        // NOP, XCHG AX,AX
95        0x90 => Some(InstructionInfo {
96            length: offset,
97            is_relative: false,
98            is_control_flow: false,
99            relative_target: None,
100        }),
101
102        // RET
103        0xC3 => Some(InstructionInfo {
104            length: offset,
105            is_relative: false,
106            is_control_flow: true,
107            relative_target: None,
108        }),
109
110        // RET imm16
111        0xC2 => Some(InstructionInfo {
112            length: offset + 2,
113            is_relative: false,
114            is_control_flow: true,
115            relative_target: None,
116        }),
117
118        // INT3
119        0xCC => Some(InstructionInfo {
120            length: offset,
121            is_relative: false,
122            is_control_flow: true,
123            relative_target: None,
124        }),
125
126        // CALL rel32
127        0xE8 => {
128            if code.len() < offset + 4 {
129                return None;
130            }
131            Some(InstructionInfo {
132                length: offset + 4,
133                is_relative: true,
134                is_control_flow: true,
135                relative_target: None,
136            })
137        }
138
139        // JMP rel32
140        0xE9 => {
141            if code.len() < offset + 4 {
142                return None;
143            }
144            Some(InstructionInfo {
145                length: offset + 4,
146                is_relative: true,
147                is_control_flow: true,
148                relative_target: None,
149            })
150        }
151
152        // JMP rel8
153        0xEB => {
154            if code.len() < offset + 1 {
155                return None;
156            }
157            Some(InstructionInfo {
158                length: offset + 1,
159                is_relative: true,
160                is_control_flow: true,
161                relative_target: None,
162            })
163        }
164
165        // short conditional jumps (Jcc rel8)
166        0x70..=0x7F => {
167            if code.len() < offset + 1 {
168                return None;
169            }
170            Some(InstructionInfo {
171                length: offset + 1,
172                is_relative: true,
173                is_control_flow: true,
174                relative_target: None,
175            })
176        }
177
178        // PUSH imm32
179        0x68 => Some(InstructionInfo {
180            length: offset + 4,
181            is_relative: false,
182            is_control_flow: false,
183            relative_target: None,
184        }),
185
186        // PUSH imm8
187        0x6A => Some(InstructionInfo {
188            length: offset + 1,
189            is_relative: false,
190            is_control_flow: false,
191            relative_target: None,
192        }),
193
194        // MOV r64, imm64 (with REX.W)
195        0xB8..=0xBF if rex_w => Some(InstructionInfo {
196            length: offset + 8,
197            is_relative: false,
198            is_control_flow: false,
199            relative_target: None,
200        }),
201
202        // MOV r32, imm32
203        0xB8..=0xBF => Some(InstructionInfo {
204            length: offset + 4,
205            is_relative: false,
206            is_control_flow: false,
207            relative_target: None,
208        }),
209
210        // MOV r8, imm8
211        0xB0..=0xB7 => Some(InstructionInfo {
212            length: offset + 1,
213            is_relative: false,
214            is_control_flow: false,
215            relative_target: None,
216        }),
217
218        // two-byte opcodes (0F xx)
219        0x0F => {
220            if offset >= code.len() {
221                return None;
222            }
223            let op2 = code[offset];
224            offset += 1;
225
226            match op2 {
227                // Jcc rel32 (long conditional jumps)
228                0x80..=0x8F => {
229                    if code.len() < offset + 4 {
230                        return None;
231                    }
232                    Some(InstructionInfo {
233                        length: offset + 4,
234                        is_relative: true,
235                        is_control_flow: true,
236                        relative_target: None,
237                    })
238                }
239
240                // SETcc rm8
241                0x90..=0x9F => decode_modrm(code, offset, has_rex, false, false),
242
243                // CMOVcc r, rm
244                0x40..=0x4F => decode_modrm(code, offset, has_rex, false, false),
245
246                // MOVZX/MOVSX
247                0xB6 | 0xB7 | 0xBE | 0xBF => decode_modrm(code, offset, has_rex, false, false),
248
249                // SYSCALL
250                0x05 => Some(InstructionInfo {
251                    length: offset,
252                    is_relative: false,
253                    is_control_flow: true,
254                    relative_target: None,
255                }),
256
257                // other two-byte with ModR/M
258                _ => decode_modrm(code, offset, has_rex, false, false),
259            }
260        }
261
262        // instructions with ModR/M
263        0x00..=0x3F | 0x63 | 0x69 | 0x6B | 0x80..=0x8F | 0x8D | 0xC0..=0xC1 | 0xC6..=0xC7
264        | 0xD0..=0xD3 | 0xF6..=0xF7 | 0xFE..=0xFF => {
265            let has_imm8 = matches!(opcode, 0x80 | 0x83 | 0xC0 | 0xC1 | 0xC6 | 0x6B);
266            let has_imm32 = matches!(opcode, 0x81 | 0xC7 | 0x69);
267            decode_modrm(code, offset, has_rex, has_imm8, has_imm32)
268        }
269
270        // AL/AX/EAX/RAX immediate operations
271        0x04 | 0x0C | 0x14 | 0x1C | 0x24 | 0x2C | 0x34 | 0x3C | 0xA8 => Some(InstructionInfo {
272            length: offset + 1,
273            is_relative: false,
274            is_control_flow: false,
275            relative_target: None,
276        }),
277        0x05 | 0x0D | 0x15 | 0x1D | 0x25 | 0x2D | 0x35 | 0x3D | 0xA9 => Some(InstructionInfo {
278            length: offset + 4,
279            is_relative: false,
280            is_control_flow: false,
281            relative_target: None,
282        }),
283
284        // INT imm8
285        0xCD => Some(InstructionInfo {
286            length: offset + 1,
287            is_relative: false,
288            is_control_flow: true,
289            relative_target: None,
290        }),
291
292        // LEAVE
293        0xC9 => Some(InstructionInfo {
294            length: offset,
295            is_relative: false,
296            is_control_flow: false,
297            relative_target: None,
298        }),
299
300        // default: try ModR/M decode
301        _ => decode_modrm(code, offset, has_rex, false, false),
302    }
303}
304
305/// decode x86 instruction
306#[cfg(target_arch = "x86")]
307fn decode_x86(code: &[u8]) -> Option<InstructionInfo> {
308    if code.is_empty() {
309        return None;
310    }
311
312    let mut offset = 0;
313
314    // skip prefixes
315    while offset < code.len() {
316        match code[offset] {
317            0x26 | 0x2E | 0x36 | 0x3E | 0x64 | 0x65 | 0x66 | 0x67 | 0xF0 | 0xF2 | 0xF3 => {
318                offset += 1
319            }
320            _ => break,
321        }
322    }
323
324    if offset >= code.len() {
325        return None;
326    }
327
328    let opcode = code[offset];
329    offset += 1;
330
331    match opcode {
332        0x50..=0x5F | 0x90 | 0xC3 | 0xCC => Some(InstructionInfo {
333            length: offset,
334            is_relative: false,
335            is_control_flow: matches!(opcode, 0xC3 | 0xCC),
336            relative_target: None,
337        }),
338
339        0xC2 => Some(InstructionInfo {
340            length: offset + 2,
341            is_relative: false,
342            is_control_flow: true,
343            relative_target: None,
344        }),
345
346        0xE8 | 0xE9 => {
347            if code.len() < offset + 4 {
348                return None;
349            }
350            Some(InstructionInfo {
351                length: offset + 4,
352                is_relative: true,
353                is_control_flow: true,
354                relative_target: None,
355            })
356        }
357
358        0xEB | 0x70..=0x7F => {
359            if code.len() < offset + 1 {
360                return None;
361            }
362            Some(InstructionInfo {
363                length: offset + 1,
364                is_relative: true,
365                is_control_flow: true,
366                relative_target: None,
367            })
368        }
369
370        0x68 => Some(InstructionInfo {
371            length: offset + 4,
372            is_relative: false,
373            is_control_flow: false,
374            relative_target: None,
375        }),
376
377        0x6A => Some(InstructionInfo {
378            length: offset + 1,
379            is_relative: false,
380            is_control_flow: false,
381            relative_target: None,
382        }),
383
384        0xB8..=0xBF => Some(InstructionInfo {
385            length: offset + 4,
386            is_relative: false,
387            is_control_flow: false,
388            relative_target: None,
389        }),
390
391        0xB0..=0xB7 => Some(InstructionInfo {
392            length: offset + 1,
393            is_relative: false,
394            is_control_flow: false,
395            relative_target: None,
396        }),
397
398        0x0F => {
399            if offset >= code.len() {
400                return None;
401            }
402            let op2 = code[offset];
403            offset += 1;
404
405            if (0x80..=0x8F).contains(&op2) {
406                if code.len() < offset + 4 {
407                    return None;
408                }
409                Some(InstructionInfo {
410                    length: offset + 4,
411                    is_relative: true,
412                    is_control_flow: true,
413                    relative_target: None,
414                })
415            } else {
416                decode_modrm(code, offset, false, false, false)
417            }
418        }
419
420        0x00..=0x3F | 0x69 | 0x6B | 0x80..=0x8F | 0x8D | 0xC0..=0xC1 | 0xC6..=0xC7
421        | 0xD0..=0xD3 | 0xF6..=0xF7 | 0xFE..=0xFF => {
422            let has_imm8 = matches!(opcode, 0x80 | 0x83 | 0xC0 | 0xC1 | 0xC6 | 0x6B);
423            let has_imm32 = matches!(opcode, 0x81 | 0xC7 | 0x69);
424            decode_modrm(code, offset, false, has_imm8, has_imm32)
425        }
426
427        0x04 | 0x0C | 0x14 | 0x1C | 0x24 | 0x2C | 0x34 | 0x3C | 0xA8 => Some(InstructionInfo {
428            length: offset + 1,
429            is_relative: false,
430            is_control_flow: false,
431            relative_target: None,
432        }),
433
434        0x05 | 0x0D | 0x15 | 0x1D | 0x25 | 0x2D | 0x35 | 0x3D | 0xA9 => Some(InstructionInfo {
435            length: offset + 4,
436            is_relative: false,
437            is_control_flow: false,
438            relative_target: None,
439        }),
440
441        0xCD => Some(InstructionInfo {
442            length: offset + 1,
443            is_relative: false,
444            is_control_flow: true,
445            relative_target: None,
446        }),
447
448        0xC9 => Some(InstructionInfo {
449            length: offset,
450            is_relative: false,
451            is_control_flow: false,
452            relative_target: None,
453        }),
454
455        _ => decode_modrm(code, offset, false, false, false),
456    }
457}
458
459/// decode ModR/M byte and compute total instruction length
460fn decode_modrm(
461    code: &[u8],
462    offset: usize,
463    has_rex: bool,
464    has_imm8: bool,
465    has_imm32: bool,
466) -> Option<InstructionInfo> {
467    if offset >= code.len() {
468        return Some(InstructionInfo {
469            length: offset,
470            is_relative: false,
471            is_control_flow: false,
472            relative_target: None,
473        });
474    }
475
476    let modrm = code[offset];
477    let mod_field = (modrm >> 6) & 0x03;
478    let rm = modrm & 0x07;
479
480    let mut len = offset + 1;
481
482    // check for RIP-relative on x64 (mod=00, rm=101)
483    #[cfg(target_arch = "x86_64")]
484    let is_rip_relative = mod_field == 0 && rm == 5;
485    #[cfg(target_arch = "x86")]
486    let is_rip_relative = false;
487
488    match mod_field {
489        0b00 => {
490            if rm == 4 {
491                // SIB byte
492                if len < code.len() {
493                    let sib = code[len];
494                    len += 1;
495                    let base = sib & 0x07;
496                    if base == 5 {
497                        len += 4; // disp32 when base=5
498                    }
499                }
500            } else if rm == 5 {
501                len += 4; // disp32 (or RIP-relative on x64)
502            }
503        }
504        0b01 => {
505            if rm == 4 {
506                len += 1; // SIB
507            }
508            len += 1; // disp8
509        }
510        0b10 => {
511            if rm == 4 {
512                len += 1; // SIB
513            }
514            len += 4; // disp32
515        }
516        0b11 => {
517            // register direct, no extra bytes
518        }
519        _ => {}
520    }
521
522    // add immediate if present
523    if has_imm8 {
524        len += 1;
525    }
526    if has_imm32 {
527        len += 4;
528    }
529
530    let _ = has_rex; // used only for x64
531
532    Some(InstructionInfo {
533        length: len.min(code.len()),
534        is_relative: is_rip_relative,
535        is_control_flow: false,
536        relative_target: None,
537    })
538}
539
540/// find instruction boundary at or after required_size
541///
542/// scans instructions starting at address until we have at least required_size bytes
543pub fn find_boundary(address: usize, required_size: usize, max_scan: usize) -> Option<usize> {
544    let mut current = address;
545    let mut total = 0;
546
547    while total < required_size && (current - address) < max_scan {
548        let info = decode_instruction(current, max_scan - (current - address))?;
549        total += info.length;
550        current += info.length;
551    }
552
553    if total >= required_size {
554        Some(total)
555    } else {
556        None
557    }
558}
559
560#[cfg(test)]
561mod tests {
562    use super::*;
563
564    #[test]
565    fn test_decode_nop() {
566        let code = [0x90u8];
567        #[cfg(target_arch = "x86_64")]
568        let info = decode_x64(&code).unwrap();
569        #[cfg(target_arch = "x86")]
570        let info = decode_x86(&code).unwrap();
571
572        assert_eq!(info.length, 1);
573        assert!(!info.is_relative);
574    }
575
576    #[test]
577    fn test_decode_push_pop() {
578        // push rbp/ebp
579        let push = [0x55u8];
580        #[cfg(target_arch = "x86_64")]
581        let info = decode_x64(&push).unwrap();
582        #[cfg(target_arch = "x86")]
583        let info = decode_x86(&push).unwrap();
584        assert_eq!(info.length, 1);
585
586        // pop rbp/ebp
587        let pop = [0x5Du8];
588        #[cfg(target_arch = "x86_64")]
589        let info = decode_x64(&pop).unwrap();
590        #[cfg(target_arch = "x86")]
591        let info = decode_x86(&pop).unwrap();
592        assert_eq!(info.length, 1);
593    }
594
595    #[cfg(target_arch = "x86_64")]
596    #[test]
597    fn test_decode_mov_rbp_rsp() {
598        // mov rbp, rsp = 48 89 E5
599        let code = [0x48, 0x89, 0xE5];
600        let info = decode_x64(&code).unwrap();
601        assert_eq!(info.length, 3);
602    }
603
604    #[cfg(target_arch = "x86_64")]
605    #[test]
606    fn test_decode_sub_rsp_imm8() {
607        // sub rsp, 0x28 = 48 83 EC 28
608        let code = [0x48, 0x83, 0xEC, 0x28];
609        let info = decode_x64(&code).unwrap();
610        assert_eq!(info.length, 4);
611    }
612
613    #[test]
614    fn test_decode_jmp_rel32() {
615        let code = [0xE9, 0x00, 0x00, 0x00, 0x00];
616        #[cfg(target_arch = "x86_64")]
617        let info = decode_x64(&code).unwrap();
618        #[cfg(target_arch = "x86")]
619        let info = decode_x86(&code).unwrap();
620        assert_eq!(info.length, 5);
621        assert!(info.is_relative);
622        assert!(info.is_control_flow);
623    }
624
625    #[test]
626    fn test_decode_call_rel32() {
627        let code = [0xE8, 0x00, 0x00, 0x00, 0x00];
628        #[cfg(target_arch = "x86_64")]
629        let info = decode_x64(&code).unwrap();
630        #[cfg(target_arch = "x86")]
631        let info = decode_x86(&code).unwrap();
632        assert_eq!(info.length, 5);
633        assert!(info.is_relative);
634        assert!(info.is_control_flow);
635    }
636}