1#[derive(Debug, Clone)]
8pub struct InstructionInfo {
9 pub length: usize,
11 pub is_relative: bool,
13 pub is_control_flow: bool,
15 pub relative_target: Option<usize>,
17}
18
19#[cfg(target_arch = "x86_64")]
23pub fn decode_instruction(address: usize, max_bytes: usize) -> Option<InstructionInfo> {
24 if max_bytes == 0 {
25 return None;
26 }
27
28 let code = unsafe { core::slice::from_raw_parts(address as *const u8, max_bytes.min(15)) };
30
31 decode_x64(code)
32}
33
34#[cfg(target_arch = "x86")]
35pub fn decode_instruction(address: usize, max_bytes: usize) -> Option<InstructionInfo> {
36 if max_bytes == 0 {
37 return None;
38 }
39
40 let code = unsafe { core::slice::from_raw_parts(address as *const u8, max_bytes.min(15)) };
41
42 decode_x86(code)
43}
44
45#[cfg(target_arch = "x86_64")]
47fn decode_x64(code: &[u8]) -> Option<InstructionInfo> {
48 if code.is_empty() {
49 return None;
50 }
51
52 let mut offset = 0;
53
54 while offset < code.len() {
56 match code[offset] {
57 0x26 | 0x2E | 0x36 | 0x3E | 0x64 | 0x65 => offset += 1,
59 0x66 | 0x67 => offset += 1,
61 0xF0 | 0xF2 | 0xF3 => offset += 1,
63 _ => break,
64 }
65 }
66
67 if offset >= code.len() {
68 return None;
69 }
70
71 let has_rex = (0x40..=0x4F).contains(&code[offset]);
73 let rex_w = has_rex && (code[offset] & 0x08) != 0;
74 if has_rex {
75 offset += 1;
76 }
77
78 if offset >= code.len() {
79 return None;
80 }
81
82 let opcode = code[offset];
83 offset += 1;
84
85 match opcode {
86 0x50..=0x5F => Some(InstructionInfo {
88 length: offset,
89 is_relative: false,
90 is_control_flow: false,
91 relative_target: None,
92 }),
93
94 0x90 => Some(InstructionInfo {
96 length: offset,
97 is_relative: false,
98 is_control_flow: false,
99 relative_target: None,
100 }),
101
102 0xC3 => Some(InstructionInfo {
104 length: offset,
105 is_relative: false,
106 is_control_flow: true,
107 relative_target: None,
108 }),
109
110 0xC2 => Some(InstructionInfo {
112 length: offset + 2,
113 is_relative: false,
114 is_control_flow: true,
115 relative_target: None,
116 }),
117
118 0xCC => Some(InstructionInfo {
120 length: offset,
121 is_relative: false,
122 is_control_flow: true,
123 relative_target: None,
124 }),
125
126 0xE8 => {
128 if code.len() < offset + 4 {
129 return None;
130 }
131 Some(InstructionInfo {
132 length: offset + 4,
133 is_relative: true,
134 is_control_flow: true,
135 relative_target: None,
136 })
137 }
138
139 0xE9 => {
141 if code.len() < offset + 4 {
142 return None;
143 }
144 Some(InstructionInfo {
145 length: offset + 4,
146 is_relative: true,
147 is_control_flow: true,
148 relative_target: None,
149 })
150 }
151
152 0xEB => {
154 if code.len() < offset + 1 {
155 return None;
156 }
157 Some(InstructionInfo {
158 length: offset + 1,
159 is_relative: true,
160 is_control_flow: true,
161 relative_target: None,
162 })
163 }
164
165 0x70..=0x7F => {
167 if code.len() < offset + 1 {
168 return None;
169 }
170 Some(InstructionInfo {
171 length: offset + 1,
172 is_relative: true,
173 is_control_flow: true,
174 relative_target: None,
175 })
176 }
177
178 0x68 => Some(InstructionInfo {
180 length: offset + 4,
181 is_relative: false,
182 is_control_flow: false,
183 relative_target: None,
184 }),
185
186 0x6A => Some(InstructionInfo {
188 length: offset + 1,
189 is_relative: false,
190 is_control_flow: false,
191 relative_target: None,
192 }),
193
194 0xB8..=0xBF if rex_w => Some(InstructionInfo {
196 length: offset + 8,
197 is_relative: false,
198 is_control_flow: false,
199 relative_target: None,
200 }),
201
202 0xB8..=0xBF => Some(InstructionInfo {
204 length: offset + 4,
205 is_relative: false,
206 is_control_flow: false,
207 relative_target: None,
208 }),
209
210 0xB0..=0xB7 => Some(InstructionInfo {
212 length: offset + 1,
213 is_relative: false,
214 is_control_flow: false,
215 relative_target: None,
216 }),
217
218 0x0F => {
220 if offset >= code.len() {
221 return None;
222 }
223 let op2 = code[offset];
224 offset += 1;
225
226 match op2 {
227 0x80..=0x8F => {
229 if code.len() < offset + 4 {
230 return None;
231 }
232 Some(InstructionInfo {
233 length: offset + 4,
234 is_relative: true,
235 is_control_flow: true,
236 relative_target: None,
237 })
238 }
239
240 0x90..=0x9F => decode_modrm(code, offset, has_rex, false, false),
242
243 0x40..=0x4F => decode_modrm(code, offset, has_rex, false, false),
245
246 0xB6 | 0xB7 | 0xBE | 0xBF => decode_modrm(code, offset, has_rex, false, false),
248
249 0x05 => Some(InstructionInfo {
251 length: offset,
252 is_relative: false,
253 is_control_flow: true,
254 relative_target: None,
255 }),
256
257 _ => decode_modrm(code, offset, has_rex, false, false),
259 }
260 }
261
262 0x00..=0x3F | 0x63 | 0x69 | 0x6B | 0x80..=0x8F | 0x8D | 0xC0..=0xC1 | 0xC6..=0xC7
264 | 0xD0..=0xD3 | 0xF6..=0xF7 | 0xFE..=0xFF => {
265 let has_imm8 = matches!(opcode, 0x80 | 0x83 | 0xC0 | 0xC1 | 0xC6 | 0x6B);
266 let has_imm32 = matches!(opcode, 0x81 | 0xC7 | 0x69);
267 decode_modrm(code, offset, has_rex, has_imm8, has_imm32)
268 }
269
270 0x04 | 0x0C | 0x14 | 0x1C | 0x24 | 0x2C | 0x34 | 0x3C | 0xA8 => Some(InstructionInfo {
272 length: offset + 1,
273 is_relative: false,
274 is_control_flow: false,
275 relative_target: None,
276 }),
277 0x05 | 0x0D | 0x15 | 0x1D | 0x25 | 0x2D | 0x35 | 0x3D | 0xA9 => Some(InstructionInfo {
278 length: offset + 4,
279 is_relative: false,
280 is_control_flow: false,
281 relative_target: None,
282 }),
283
284 0xCD => Some(InstructionInfo {
286 length: offset + 1,
287 is_relative: false,
288 is_control_flow: true,
289 relative_target: None,
290 }),
291
292 0xC9 => Some(InstructionInfo {
294 length: offset,
295 is_relative: false,
296 is_control_flow: false,
297 relative_target: None,
298 }),
299
300 _ => decode_modrm(code, offset, has_rex, false, false),
302 }
303}
304
305#[cfg(target_arch = "x86")]
307fn decode_x86(code: &[u8]) -> Option<InstructionInfo> {
308 if code.is_empty() {
309 return None;
310 }
311
312 let mut offset = 0;
313
314 while offset < code.len() {
316 match code[offset] {
317 0x26 | 0x2E | 0x36 | 0x3E | 0x64 | 0x65 | 0x66 | 0x67 | 0xF0 | 0xF2 | 0xF3 => {
318 offset += 1
319 }
320 _ => break,
321 }
322 }
323
324 if offset >= code.len() {
325 return None;
326 }
327
328 let opcode = code[offset];
329 offset += 1;
330
331 match opcode {
332 0x50..=0x5F | 0x90 | 0xC3 | 0xCC => Some(InstructionInfo {
333 length: offset,
334 is_relative: false,
335 is_control_flow: matches!(opcode, 0xC3 | 0xCC),
336 relative_target: None,
337 }),
338
339 0xC2 => Some(InstructionInfo {
340 length: offset + 2,
341 is_relative: false,
342 is_control_flow: true,
343 relative_target: None,
344 }),
345
346 0xE8 | 0xE9 => {
347 if code.len() < offset + 4 {
348 return None;
349 }
350 Some(InstructionInfo {
351 length: offset + 4,
352 is_relative: true,
353 is_control_flow: true,
354 relative_target: None,
355 })
356 }
357
358 0xEB | 0x70..=0x7F => {
359 if code.len() < offset + 1 {
360 return None;
361 }
362 Some(InstructionInfo {
363 length: offset + 1,
364 is_relative: true,
365 is_control_flow: true,
366 relative_target: None,
367 })
368 }
369
370 0x68 => Some(InstructionInfo {
371 length: offset + 4,
372 is_relative: false,
373 is_control_flow: false,
374 relative_target: None,
375 }),
376
377 0x6A => Some(InstructionInfo {
378 length: offset + 1,
379 is_relative: false,
380 is_control_flow: false,
381 relative_target: None,
382 }),
383
384 0xB8..=0xBF => Some(InstructionInfo {
385 length: offset + 4,
386 is_relative: false,
387 is_control_flow: false,
388 relative_target: None,
389 }),
390
391 0xB0..=0xB7 => Some(InstructionInfo {
392 length: offset + 1,
393 is_relative: false,
394 is_control_flow: false,
395 relative_target: None,
396 }),
397
398 0x0F => {
399 if offset >= code.len() {
400 return None;
401 }
402 let op2 = code[offset];
403 offset += 1;
404
405 if (0x80..=0x8F).contains(&op2) {
406 if code.len() < offset + 4 {
407 return None;
408 }
409 Some(InstructionInfo {
410 length: offset + 4,
411 is_relative: true,
412 is_control_flow: true,
413 relative_target: None,
414 })
415 } else {
416 decode_modrm(code, offset, false, false, false)
417 }
418 }
419
420 0x00..=0x3F | 0x69 | 0x6B | 0x80..=0x8F | 0x8D | 0xC0..=0xC1 | 0xC6..=0xC7
421 | 0xD0..=0xD3 | 0xF6..=0xF7 | 0xFE..=0xFF => {
422 let has_imm8 = matches!(opcode, 0x80 | 0x83 | 0xC0 | 0xC1 | 0xC6 | 0x6B);
423 let has_imm32 = matches!(opcode, 0x81 | 0xC7 | 0x69);
424 decode_modrm(code, offset, false, has_imm8, has_imm32)
425 }
426
427 0x04 | 0x0C | 0x14 | 0x1C | 0x24 | 0x2C | 0x34 | 0x3C | 0xA8 => Some(InstructionInfo {
428 length: offset + 1,
429 is_relative: false,
430 is_control_flow: false,
431 relative_target: None,
432 }),
433
434 0x05 | 0x0D | 0x15 | 0x1D | 0x25 | 0x2D | 0x35 | 0x3D | 0xA9 => Some(InstructionInfo {
435 length: offset + 4,
436 is_relative: false,
437 is_control_flow: false,
438 relative_target: None,
439 }),
440
441 0xCD => Some(InstructionInfo {
442 length: offset + 1,
443 is_relative: false,
444 is_control_flow: true,
445 relative_target: None,
446 }),
447
448 0xC9 => Some(InstructionInfo {
449 length: offset,
450 is_relative: false,
451 is_control_flow: false,
452 relative_target: None,
453 }),
454
455 _ => decode_modrm(code, offset, false, false, false),
456 }
457}
458
459fn decode_modrm(
461 code: &[u8],
462 offset: usize,
463 has_rex: bool,
464 has_imm8: bool,
465 has_imm32: bool,
466) -> Option<InstructionInfo> {
467 if offset >= code.len() {
468 return Some(InstructionInfo {
469 length: offset,
470 is_relative: false,
471 is_control_flow: false,
472 relative_target: None,
473 });
474 }
475
476 let modrm = code[offset];
477 let mod_field = (modrm >> 6) & 0x03;
478 let rm = modrm & 0x07;
479
480 let mut len = offset + 1;
481
482 #[cfg(target_arch = "x86_64")]
484 let is_rip_relative = mod_field == 0 && rm == 5;
485 #[cfg(target_arch = "x86")]
486 let is_rip_relative = false;
487
488 match mod_field {
489 0b00 => {
490 if rm == 4 {
491 if len < code.len() {
493 let sib = code[len];
494 len += 1;
495 let base = sib & 0x07;
496 if base == 5 {
497 len += 4; }
499 }
500 } else if rm == 5 {
501 len += 4; }
503 }
504 0b01 => {
505 if rm == 4 {
506 len += 1; }
508 len += 1; }
510 0b10 => {
511 if rm == 4 {
512 len += 1; }
514 len += 4; }
516 0b11 => {
517 }
519 _ => {}
520 }
521
522 if has_imm8 {
524 len += 1;
525 }
526 if has_imm32 {
527 len += 4;
528 }
529
530 let _ = has_rex; Some(InstructionInfo {
533 length: len.min(code.len()),
534 is_relative: is_rip_relative,
535 is_control_flow: false,
536 relative_target: None,
537 })
538}
539
540pub fn find_boundary(address: usize, required_size: usize, max_scan: usize) -> Option<usize> {
544 let mut current = address;
545 let mut total = 0;
546
547 while total < required_size && (current - address) < max_scan {
548 let info = decode_instruction(current, max_scan - (current - address))?;
549 total += info.length;
550 current += info.length;
551 }
552
553 if total >= required_size {
554 Some(total)
555 } else {
556 None
557 }
558}
559
560#[cfg(test)]
561mod tests {
562 use super::*;
563
564 #[test]
565 fn test_decode_nop() {
566 let code = [0x90u8];
567 #[cfg(target_arch = "x86_64")]
568 let info = decode_x64(&code).unwrap();
569 #[cfg(target_arch = "x86")]
570 let info = decode_x86(&code).unwrap();
571
572 assert_eq!(info.length, 1);
573 assert!(!info.is_relative);
574 }
575
576 #[test]
577 fn test_decode_push_pop() {
578 let push = [0x55u8];
580 #[cfg(target_arch = "x86_64")]
581 let info = decode_x64(&push).unwrap();
582 #[cfg(target_arch = "x86")]
583 let info = decode_x86(&push).unwrap();
584 assert_eq!(info.length, 1);
585
586 let pop = [0x5Du8];
588 #[cfg(target_arch = "x86_64")]
589 let info = decode_x64(&pop).unwrap();
590 #[cfg(target_arch = "x86")]
591 let info = decode_x86(&pop).unwrap();
592 assert_eq!(info.length, 1);
593 }
594
595 #[cfg(target_arch = "x86_64")]
596 #[test]
597 fn test_decode_mov_rbp_rsp() {
598 let code = [0x48, 0x89, 0xE5];
600 let info = decode_x64(&code).unwrap();
601 assert_eq!(info.length, 3);
602 }
603
604 #[cfg(target_arch = "x86_64")]
605 #[test]
606 fn test_decode_sub_rsp_imm8() {
607 let code = [0x48, 0x83, 0xEC, 0x28];
609 let info = decode_x64(&code).unwrap();
610 assert_eq!(info.length, 4);
611 }
612
613 #[test]
614 fn test_decode_jmp_rel32() {
615 let code = [0xE9, 0x00, 0x00, 0x00, 0x00];
616 #[cfg(target_arch = "x86_64")]
617 let info = decode_x64(&code).unwrap();
618 #[cfg(target_arch = "x86")]
619 let info = decode_x86(&code).unwrap();
620 assert_eq!(info.length, 5);
621 assert!(info.is_relative);
622 assert!(info.is_control_flow);
623 }
624
625 #[test]
626 fn test_decode_call_rel32() {
627 let code = [0xE8, 0x00, 0x00, 0x00, 0x00];
628 #[cfg(target_arch = "x86_64")]
629 let info = decode_x64(&code).unwrap();
630 #[cfg(target_arch = "x86")]
631 let info = decode_x86(&code).unwrap();
632 assert_eq!(info.length, 5);
633 assert!(info.is_relative);
634 assert!(info.is_control_flow);
635 }
636}