wraith/manipulation/inline_hook/arch/
x86.rs

1//! x86 (32-bit) architecture implementation
2
3use super::{Architecture, DecodedInstruction};
4
5/// x86 (32-bit) architecture
6pub struct X86;
7
8impl Architecture for X86 {
9    // E9 rel32 - 5 bytes
10    const JMP_REL_SIZE: usize = 5;
11
12    // push imm32; ret - 6 bytes
13    const JMP_ABS_SIZE: usize = 6;
14
15    const PTR_SIZE: usize = 4;
16    const CODE_ALIGNMENT: usize = 4;
17
18    // x86 can always use 5-byte jmp rel32 (full address space reachable)
19    const MIN_HOOK_SIZE: usize = 5;
20
21    fn encode_jmp_rel(source: usize, target: usize) -> Option<Vec<u8>> {
22        // on x86, all addresses are reachable with rel32
23        let offset = (target as i32).wrapping_sub((source as i32).wrapping_add(5));
24
25        let mut bytes = Vec::with_capacity(5);
26        bytes.push(0xE9);
27        bytes.extend_from_slice(&offset.to_le_bytes());
28        Some(bytes)
29    }
30
31    fn encode_jmp_abs(target: usize) -> Vec<u8> {
32        // push imm32; ret
33        let mut bytes = Vec::with_capacity(6);
34        bytes.push(0x68); // push imm32
35        bytes.extend_from_slice(&(target as u32).to_le_bytes());
36        bytes.push(0xC3); // ret
37        bytes
38    }
39
40    fn encode_call_rel(source: usize, target: usize) -> Option<Vec<u8>> {
41        let offset = (target as i32).wrapping_sub((source as i32).wrapping_add(5));
42
43        let mut bytes = Vec::with_capacity(5);
44        bytes.push(0xE8);
45        bytes.extend_from_slice(&offset.to_le_bytes());
46        Some(bytes)
47    }
48
49    fn encode_nop_sled(size: usize) -> Vec<u8> {
50        let mut bytes = Vec::with_capacity(size);
51        let mut remaining = size;
52
53        while remaining > 0 {
54            match remaining {
55                1 => {
56                    bytes.push(0x90);
57                    remaining -= 1;
58                }
59                2 => {
60                    bytes.extend_from_slice(&[0x66, 0x90]);
61                    remaining -= 2;
62                }
63                3 => {
64                    bytes.extend_from_slice(&[0x0F, 0x1F, 0x00]);
65                    remaining -= 3;
66                }
67                4 => {
68                    bytes.extend_from_slice(&[0x0F, 0x1F, 0x40, 0x00]);
69                    remaining -= 4;
70                }
71                5 => {
72                    bytes.extend_from_slice(&[0x0F, 0x1F, 0x44, 0x00, 0x00]);
73                    remaining -= 5;
74                }
75                6 => {
76                    bytes.extend_from_slice(&[0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00]);
77                    remaining -= 6;
78                }
79                _ => {
80                    bytes.extend_from_slice(&[0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00]);
81                    remaining -= 7;
82                }
83            }
84        }
85
86        bytes
87    }
88
89    fn find_instruction_boundary(code: &[u8], required_size: usize) -> Option<usize> {
90        let mut offset = 0;
91
92        while offset < required_size && offset < code.len() {
93            let insn = decode_instruction_x86(&code[offset..])?;
94            offset += insn.length;
95        }
96
97        if offset >= required_size {
98            Some(offset)
99        } else {
100            None
101        }
102    }
103
104    fn relocate_instruction(
105        instruction: &[u8],
106        old_address: usize,
107        new_address: usize,
108    ) -> Option<Vec<u8>> {
109        if instruction.is_empty() {
110            return None;
111        }
112
113        let decoded = decode_instruction_x86(instruction)?;
114
115        if !decoded.is_relative {
116            return Some(instruction[..decoded.length].to_vec());
117        }
118
119        match instruction[0] {
120            // E8 - call rel32
121            0xE8 => {
122                if instruction.len() < 5 {
123                    return None;
124                }
125                let orig_offset = i32::from_le_bytes(instruction[1..5].try_into().ok()?);
126                let orig_target = (old_address as i32).wrapping_add(5).wrapping_add(orig_offset);
127                let new_offset = (orig_target as i32)
128                    .wrapping_sub((new_address as i32).wrapping_add(5));
129
130                let mut bytes = vec![0xE8];
131                bytes.extend_from_slice(&new_offset.to_le_bytes());
132                Some(bytes)
133            }
134
135            // E9 - jmp rel32
136            0xE9 => {
137                if instruction.len() < 5 {
138                    return None;
139                }
140                let orig_offset = i32::from_le_bytes(instruction[1..5].try_into().ok()?);
141                let orig_target = (old_address as i32).wrapping_add(5).wrapping_add(orig_offset);
142                let new_offset = (orig_target as i32)
143                    .wrapping_sub((new_address as i32).wrapping_add(5));
144
145                let mut bytes = vec![0xE9];
146                bytes.extend_from_slice(&new_offset.to_le_bytes());
147                Some(bytes)
148            }
149
150            // EB - jmp rel8
151            0xEB => {
152                if instruction.len() < 2 {
153                    return None;
154                }
155                let orig_offset = instruction[1] as i8;
156                let orig_target = (old_address as i32).wrapping_add(2).wrapping_add(orig_offset as i32);
157
158                // try to keep as short jump
159                let new_offset = (orig_target as i32)
160                    .wrapping_sub((new_address as i32).wrapping_add(2));
161                if new_offset >= i8::MIN as i32 && new_offset <= i8::MAX as i32 {
162                    Some(vec![0xEB, new_offset as u8])
163                } else {
164                    // expand to jmp rel32
165                    let new_offset = (orig_target as i32)
166                        .wrapping_sub((new_address as i32).wrapping_add(5));
167                    let mut bytes = vec![0xE9];
168                    bytes.extend_from_slice(&new_offset.to_le_bytes());
169                    Some(bytes)
170                }
171            }
172
173            // 0F 80-8F - conditional jumps rel32
174            0x0F if instruction.len() >= 2 && (0x80..=0x8F).contains(&instruction[1]) => {
175                if instruction.len() < 6 {
176                    return None;
177                }
178                let orig_offset = i32::from_le_bytes(instruction[2..6].try_into().ok()?);
179                let orig_target = (old_address as i32).wrapping_add(6).wrapping_add(orig_offset);
180                let new_offset = (orig_target as i32)
181                    .wrapping_sub((new_address as i32).wrapping_add(6));
182
183                let mut bytes = vec![0x0F, instruction[1]];
184                bytes.extend_from_slice(&new_offset.to_le_bytes());
185                Some(bytes)
186            }
187
188            // 70-7F - short conditional jumps
189            b if (0x70..=0x7F).contains(&b) => {
190                if instruction.len() < 2 {
191                    return None;
192                }
193                let orig_offset = instruction[1] as i8;
194                let orig_target = (old_address as i32).wrapping_add(2).wrapping_add(orig_offset as i32);
195
196                // expand to long conditional jump
197                let new_offset = (orig_target as i32)
198                    .wrapping_sub((new_address as i32).wrapping_add(6));
199                let long_opcode = 0x80 + (b - 0x70);
200                let mut bytes = vec![0x0F, long_opcode];
201                bytes.extend_from_slice(&new_offset.to_le_bytes());
202                Some(bytes)
203            }
204
205            // not relative, copy as-is
206            _ => Some(instruction[..decoded.length].to_vec()),
207        }
208    }
209
210    fn needs_relocation(instruction: &[u8]) -> bool {
211        if instruction.is_empty() {
212            return false;
213        }
214
215        match instruction[0] {
216            0xE8 | 0xE9 | 0xEB => true,
217            0x70..=0x7F => true,
218            0x0F if instruction.len() >= 2 && (0x80..=0x8F).contains(&instruction[1]) => true,
219            _ => false,
220        }
221    }
222}
223
224/// decode a single x86 instruction
225fn decode_instruction_x86(code: &[u8]) -> Option<DecodedInstruction> {
226    if code.is_empty() {
227        return None;
228    }
229
230    let mut offset = 0;
231
232    // skip prefixes
233    while offset < code.len() {
234        match code[offset] {
235            0x26 | 0x2E | 0x36 | 0x3E | 0x64 | 0x65 | 0x66 | 0x67 | 0xF0 | 0xF2 | 0xF3 => {
236                offset += 1;
237            }
238            _ => break,
239        }
240    }
241
242    if offset >= code.len() {
243        return None;
244    }
245
246    let opcode = code[offset];
247    offset += 1;
248
249    let (length, is_relative, relative_target) = match opcode {
250        // single-byte
251        0x50..=0x5F | 0x90..=0x9F | 0xC3 | 0xCC | 0xCB | 0xCF => {
252            (offset, false, None)
253        }
254
255        // push imm
256        0x68 => (offset + 4, false, None),
257        0x6A => (offset + 1, false, None),
258
259        // ret imm16
260        0xC2 => (offset + 2, false, None),
261
262        // call/jmp rel32
263        0xE8 | 0xE9 => {
264            if code.len() < offset + 4 {
265                return None;
266            }
267            let rel = i32::from_le_bytes(code[offset..offset + 4].try_into().ok()?);
268            let target = (code.as_ptr() as usize + offset + 4).wrapping_add(rel as usize);
269            (offset + 4, true, Some(target))
270        }
271
272        // jmp rel8
273        0xEB => {
274            if code.len() < offset + 1 {
275                return None;
276            }
277            let rel = code[offset] as i8;
278            let target = (code.as_ptr() as usize + offset + 1).wrapping_add(rel as usize);
279            (offset + 1, true, Some(target))
280        }
281
282        // short conditional jumps
283        0x70..=0x7F => {
284            if code.len() < offset + 1 {
285                return None;
286            }
287            let rel = code[offset] as i8;
288            let target = (code.as_ptr() as usize + offset + 1).wrapping_add(rel as usize);
289            (offset + 1, true, Some(target))
290        }
291
292        // mov r32, imm32
293        0xB8..=0xBF => (offset + 4, false, None),
294
295        // mov r8, imm8
296        0xB0..=0xB7 => (offset + 1, false, None),
297
298        // two-byte opcodes
299        0x0F => {
300            if offset >= code.len() {
301                return None;
302            }
303            let op2 = code[offset];
304            offset += 1;
305
306            match op2 {
307                // conditional jumps rel32
308                0x80..=0x8F => {
309                    if code.len() < offset + 4 {
310                        return None;
311                    }
312                    let rel = i32::from_le_bytes(code[offset..offset + 4].try_into().ok()?);
313                    let target = (code.as_ptr() as usize + offset + 4).wrapping_add(rel as usize);
314                    (offset + 4, true, Some(target))
315                }
316                // other two-byte with ModR/M
317                _ => decode_modrm_x86(code, offset),
318            }
319        }
320
321        // immediate group instructions: ModR/M + imm8
322        0x80 | 0x83 | 0xC0 | 0xC1 => {
323            let (len, is_rel, target) = decode_modrm_x86(code, offset);
324            (len + 1, is_rel, target) // +1 for imm8
325        }
326
327        // immediate group instructions: ModR/M + imm32
328        0x81 | 0xC7 => {
329            let (len, is_rel, target) = decode_modrm_x86(code, offset);
330            (len + 4, is_rel, target) // +4 for imm32
331        }
332
333        // imul r, r/m, imm8
334        0x6B => {
335            let (len, is_rel, target) = decode_modrm_x86(code, offset);
336            (len + 1, is_rel, target)
337        }
338
339        // imul r, r/m, imm32
340        0x69 => {
341            let (len, is_rel, target) = decode_modrm_x86(code, offset);
342            (len + 4, is_rel, target)
343        }
344
345        // mov r/m8, imm8
346        0xC6 => {
347            let (len, is_rel, target) = decode_modrm_x86(code, offset);
348            (len + 1, is_rel, target)
349        }
350
351        // instructions with ModR/M only (no immediate)
352        0x00..=0x3F | 0x84..=0x8F | 0xD0..=0xD3 | 0xF6..=0xF7
353        | 0xFE..=0xFF | 0x8D => {
354            decode_modrm_x86(code, offset)
355        }
356
357        // int imm8
358        0xCD => (offset + 1, false, None),
359
360        // AL/EAX + imm
361        0x04 | 0x0C | 0x14 | 0x1C | 0x24 | 0x2C | 0x34 | 0x3C => (offset + 1, false, None),
362        0x05 | 0x0D | 0x15 | 0x1D | 0x25 | 0x2D | 0x35 | 0x3D => (offset + 4, false, None),
363
364        _ => decode_modrm_x86(code, offset),
365    };
366
367    Some(DecodedInstruction {
368        length,
369        is_relative,
370        relative_target,
371    })
372}
373
374/// decode ModR/M for x86
375fn decode_modrm_x86(code: &[u8], offset: usize) -> (usize, bool, Option<usize>) {
376    if offset >= code.len() {
377        return (offset, false, None);
378    }
379
380    let modrm = code[offset];
381    let mod_field = (modrm >> 6) & 0x03;
382    let rm = modrm & 0x07;
383
384    let mut len = offset + 1;
385
386    match mod_field {
387        0b00 => {
388            if rm == 4 {
389                len += 1; // SIB
390                if len <= code.len() && (code[len - 1] & 0x07) == 5 {
391                    len += 4; // disp32
392                }
393            } else if rm == 5 {
394                len += 4; // disp32
395            }
396        }
397        0b01 => {
398            if rm == 4 {
399                len += 1;
400            }
401            len += 1; // disp8
402        }
403        0b10 => {
404            if rm == 4 {
405                len += 1;
406            }
407            len += 4; // disp32
408        }
409        0b11 => {}
410        _ => {}
411    }
412
413    (len.min(code.len()), false, None)
414}
415
416#[cfg(test)]
417mod tests {
418    use super::*;
419
420    #[test]
421    fn test_encode_jmp_rel() {
422        let bytes = X86::encode_jmp_rel(0x1000, 0x1100).unwrap();
423        assert_eq!(bytes.len(), 5);
424        assert_eq!(bytes[0], 0xE9);
425        let offset = i32::from_le_bytes(bytes[1..5].try_into().unwrap());
426        assert_eq!(offset, 0xFB);
427    }
428
429    #[test]
430    fn test_encode_jmp_abs() {
431        let bytes = X86::encode_jmp_abs(0xDEADBEEF);
432        assert_eq!(bytes.len(), 6);
433        assert_eq!(bytes[0], 0x68); // push
434        let addr = u32::from_le_bytes(bytes[1..5].try_into().unwrap());
435        assert_eq!(addr, 0xDEADBEEF);
436        assert_eq!(bytes[5], 0xC3); // ret
437    }
438
439    #[test]
440    fn test_decode_push_ebp() {
441        let code = [0x55];
442        let decoded = decode_instruction_x86(&code).unwrap();
443        assert_eq!(decoded.length, 1);
444        assert!(!decoded.is_relative);
445    }
446
447    #[test]
448    fn test_decode_mov_ebp_esp() {
449        // mov ebp, esp = 89 E5 (with 8B it's mov esp, ebp)
450        // or mov ebp, esp = 8B EC
451        let code = [0x8B, 0xEC];
452        let decoded = decode_instruction_x86(&code).unwrap();
453        assert_eq!(decoded.length, 2);
454        assert!(!decoded.is_relative);
455    }
456
457    #[test]
458    fn test_find_instruction_boundary() {
459        // push ebp; mov ebp, esp; sub esp, 0x10
460        let code = [0x55, 0x8B, 0xEC, 0x83, 0xEC, 0x10];
461        let boundary = X86::find_instruction_boundary(&code, 5).unwrap();
462        assert!(boundary >= 5);
463    }
464}