disasm_x64/
lib.rs

1#![no_std]
2
3/// mod_m bit
4const MOD_M: u8 = 0xc0;
5/// rm_m bit
6const RM_M: u8 = 0x7;
7/// base_m bit
8const BASE_M: u8 = 0x7;
9/// rex_w bit
10const REX_W: u8 = 0x8;
11/// maximum instruction length for x86
12const MAX_INSN_LEN_X86: usize = 15;
13
14#[cfg(target_arch = "x86")]
15const MAX_INSN_LEN_X86_32: usize = MAX_INSN_LEN_X86;
16
17const MAX_INSN_LEN_X86_64: usize = MAX_INSN_LEN_X86;
18
19/// error for crate type
20#[derive(Copy, Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
21pub struct Error {
22    invalidinstlen: usize,
23}
24
25
26impl Error {
27    pub fn new() -> Self {
28        Self{
29            invalidinstlen: 0,
30        }
31    }
32}
33
34/// imm / system bit    
35#[derive(Clone, Copy, PartialEq, Eq)]
36pub enum Bits {
37    B16,
38    B32,
39    B64,
40}
41
42#[cfg(target_arch = "x86")]
43pub(crate) fn max_insn_len() -> usize {
44    MAX_INSN_LEN_X86_32
45}
46
47#[cfg(target_arch = "x86_64")]
48pub(crate) fn max_insn_len() -> usize {
49    MAX_INSN_LEN_X86_64
50}
51
52/// get insn len
53#[inline]
54pub(crate) fn insn_len_x86<T>(insn: *const T, bits: Bits) -> Option<i32> {
55    let  mut len: i32 = 0;
56    let mut twobytes: i32 = 0;
57    let  mut has_modrm: i32 = 0;
58	let mut operand_bits = Bits::B32;
59    let mut addr_bits = bits;
60    let mut c: *const u8 = insn as _;
61	let mut modrm: u8 = 0;
62    let opcode: u8;
63
64    /* prefixes
65	//  *
66	//  * 0xf0, 0xf2, 0xf3, 0x2e, 0x36
67	//  * 0x3e, 0x26, 0x64, 0x65, 0x66, 0x67
68	//  */
69
70	// // skip prefixes
71    match unsafe { *c } {
72        0xf0 | 0xf2 | 0xf3 | 0x2e | 0x36 |
73        0x3e | 0x26 | 0x64 | 0x65 | 0x66 | 0x67 => {
74            if unsafe { *c } == 0x66 {
75                operand_bits = Bits::B32;
76            } // 16bits operands
77            if unsafe { *c } == 0x67 {
78                addr_bits = if addr_bits == Bits::B32 {
79                     Bits::B16 
80                    } else { 
81                    Bits::B32 
82                };
83            } // 16bits addressing (x86-32), 32bits addressing (x86-64)
84            c = unsafe { c.add(1) };
85            len += 1;
86        },
87
88        _ => {
89        
90        },
91    }
92
93
94    if bits == Bits::B64 && unsafe{*c} & 0xf0 == 0x40 { // x86-64 && REX byte
95        if unsafe {*c} & REX_W != 0 {    
96            operand_bits = Bits::B64;
97        }
98        c = unsafe { c.add(1) };
99        len += 1;
100    }
101
102    // check for 2bytes opcodes (0x0f prefix)
103    if unsafe { *c } == 0x0f {
104        twobytes += 1;
105        c = unsafe { c.add(1) };
106        len += 1;
107    // check 0x9b prefix
108    /* 0x9b prefix is used only by the following 1byte opcodes
109	 *
110	 * 0xd9 Mod != 11 Reg/Op = 110 or 111
111	 * 0xdb ModR/M = 0xe2 or 0xe3
112	 * 0xdd Reg/Op = 110 or 111
113	 * 0xdf ModR/M = 0xe0
114	 */  
115    /* 2bytes opcodes that they *don't* use ModR/M byte:
116	 *
117	 * 0x05 - 0x09, 0x0b, 0x0e,
118	 * 0x30 - 0x37, 0x77, 0x80 - 0x8f,
119	 * 0xa0 - 0xa2, 0xa8 - 0xaa, 0xb9
120	 * 0xc8 - 0xcf
121	 */  
122    } else if  (unsafe { *c } == 0x9b && 
123    ( (unsafe { *(c.add(1)) } == 0xd9 && (unsafe { *(c.add(2)) } & MOD_M) != MOD_M && (unsafe { *(c.add(2))} & 0x30) == 0x30) ||
124        (unsafe { *(c.add(1))  } == 0xdb && (unsafe {*(c.add(2)) } == 0xe2 || unsafe { *(c.add(2)) } == 0xe3)) ||
125        (unsafe { *(c.add(1))  } == 0xdd && (unsafe { *(c.add(2)) } & 0x30) == 0x30) ||
126        (unsafe { *(c.add(1))  } == 0xdf && unsafe { *(c.add(2)) } == 0xe0)
127    )) 
128    {
129        
130
131        c = unsafe { c.add(1) };
132        len += 1;
133    }
134
135    opcode = unsafe { *(c.add(1)) };
136	len += 1;
137
138	/* 1byte opcodes that use ModR/M byte:
139	 *
140	 * 0x00 - 0x03, 0x08 - 0x0b,
141	 * 0x10 - 0x13, 0x18 - 0x1b,
142	 * 0x20 - 0x23, 0x28 - 0x2b,
143	 * 0x30 - 0x33, 0x38 - 0x3b,
144	 * 0x62, 0x63, 0x69, 0x6b,
145	 * 0x80 - 0x8f, 0xc0, 0xc1,
146	 * 0xc4 - 0xc7,
147	 * 0xd0 - 0xd3, 0xd8 - 0xdf
148	 * 0xf6, 0xf7, 0xfe, 0xff
149	 */
150    if twobytes == 0 && 
151    ((opcode & 0xf4) == 0 || (opcode & 0xf4) == 0x10 ||
152    (opcode & 0xf4) == 0x20 || (opcode & 0xf4) == 0x30 ||
153    opcode == 0x62 || opcode == 0x63 || opcode == 0x69 || opcode == 0x6b ||
154    (opcode & 0xf0) == 0x80 || opcode == 0xc0 || opcode == 0xc1 ||
155    (opcode & 0xfc) == 0xc4 || (opcode & 0xfc) == 0xd0 ||
156    (opcode & 0xf8) == 0xd8 || opcode == 0xf6 || opcode == 0xf7 ||
157    opcode == 0xfe || opcode == 0xff) {
158        has_modrm = 1;
159    }
160	/* 2bytes opcodes that they *don't* use ModR/M byte:
161	 *
162	 * 0x05 - 0x09, 0x0b, 0x0e,
163	 * 0x30 - 0x37, 0x77, 0x80 - 0x8f,
164	 * 0xa0 - 0xa2, 0xa8 - 0xaa, 0xb9
165	 * 0xc8 - 0xcf
166	 */
167    if twobytes !=0 {
168        if  !((opcode >= 0x05 && opcode <= 0x09) || opcode == 0x0b ||
169        opcode == 0x0e || (opcode & 0xf8) == 0x30 || opcode == 0x77 ||
170        (opcode & 0xf0) == 0x80 || (opcode >= 0xa0 && opcode <= 0xa2) ||
171        (opcode >= 0xa8 && opcode <= 0xaa) || (opcode & 0xf8) == 0xc8 ||
172        opcode == 0xb9) {
173            has_modrm = 1;
174        }
175        // 3bytes opcodes
176        if opcode == 0x38 || opcode == 0x3a {
177            c = unsafe { c.add(1) };
178            len += 1;
179		}
180        // 3DNow! opcode
181        if opcode ==0x0f {
182            len += 1;
183        }
184
185    }
186
187    if has_modrm != 0 {
188        len += 1;
189        modrm = unsafe { *(c.add(1)) };
190        assert!(true, "{}", modrm);
191        if addr_bits != Bits::B16 && (modrm & (MOD_M | RM_M)) == 5  {// Mod = 00 R/M = 101
192            len += 4;
193        }    
194        if addr_bits == Bits::B16 && (modrm & (MOD_M | RM_M)) == 6 {// Mod = 00 R/M = 110 and 16bits addressing
195            len += 2;
196        }    
197        if (modrm & MOD_M) == 0x40 { // Mod = 01
198            len += 1;
199        }    
200        if (modrm & MOD_M) == 0x80 {// Mod = 10
201           match addr_bits {
202               Bits::B16 => {
203                    len += 2;
204               },
205               _ => {
206                    len += 4;
207               },
208           }
209        }
210        // check SIB byte
211        if addr_bits != Bits::B16 && (modrm & MOD_M) != MOD_M && (modrm & RM_M) == 4 { // if it has SIB
212            len += 1;
213            if (modrm & MOD_M) == 0 && (unsafe { *c } & BASE_M) == 5 {// Mod = 00   SIB Base = 101
214                len += 4;
215            }    
216            c = unsafe { c.add(1) };
217            let _ = c;
218        }
219    }
220    /* Immediate operands
221	 *
222	 * 1byte opcode list:
223	 *
224	 * imm8 (1 byte)
225	 *
226	 * 0x04, 0x0c, 0x14, 0x1c, 0x24, 0x2c, 0x34, 0x3c, 0x6a, 0x6b, 0x70 - 0x7f,
227	 * 0x80, 0x82, 0x83, 0xa8, 0xb0 - 0xb7, 0xc0, 0xc1, 0xc6, 0xcd, 0xd4,
228	 * 0xd5, 0xe0 - 0xe7, 0xeb, 0xf6 (Reg/Op = 000 or Reg/Op = 001)
229	 *
230	 * imm16 (2 bytes)
231	 *
232	 * 0xc2, 0xca
233	 *
234	 * imm16/32 (2 bytes if operand_bits == __b16 else 4 bytes)
235	 *
236	 * 0x05, 0x0d, 0x15, 0x1d, 0x25, 0x2d, 0x35, 0x3d, 0x68, 0x69, 0x81, 0xa9
237	 * 0xc7, 0xe8, 0xe9
238	 *
239	 * imm16/32/64 (2 bytes if operand_bits == __b16, 4 bytes if __b32, 8 bytes if __b64)
240	 *
241	 * 0xb8 - 0xbf, 0xf7 (Reg/Op = 000 or Reg/Op = 001)
242	 *
243	 * moffs (2 bytes if addr_bits == __b16, 4 bytes if __b32, 8 bytes if __b64)
244	 *
245	 * 0xa0, 0xa1, 0xa2, 0xa3
246	 *
247	 * others
248	 *
249	 * 0xea, 0x9a: imm16 + imm16/32
250	 * 0xc8: imm16 + imm8
251	 *
252	 *
253	 * 2bytes opcode list:
254	 *
255	 * imm8 (1 byte)
256	 *
257	 * 0x70 - 0x73, 0xa4, 0xac, 0xba, 0xc2, 0xc4 - 0xc6
258	 *
259	 * imm16/32 (2 bytes if operand_bits == __b16 else 4 bytes)
260	 *
261	 * 0x80 - 0x8f
262	 *
263	 *
264	 * all 3bytes opcodes with 0x3a prefix have imm8
265	 */
266    if twobytes == 0 {
267        // imm8
268        if ((opcode & 7) == 4 && (opcode & 0xf0) <= 0x30) ||
269        opcode == 0x6a || opcode == 0x6b || (opcode & 0xf0) == 0x70 ||
270        opcode == 0x80 || opcode == 0x82 || opcode == 0x83 ||
271        opcode == 0xa8 || (opcode & 0xf8) == 0xb0 || opcode == 0xc0 ||
272        opcode == 0xc1 || opcode == 0xc6 || opcode == 0xcd ||
273        opcode == 0xd4 || opcode == 0xd5 || (opcode & 0xf8) == 0xe0 ||
274        opcode == 0xeb || (opcode == 0xf6 && (modrm & 0x30) == 0) {
275            len += 1;
276        }
277
278        // imm16
279        if opcode == 0xc2 || opcode == 0xca{
280            len += 2;
281        }
282        // imm16/32
283        if ((opcode & 7) == 5 && (opcode & 0xf0) <= 0x30) ||
284            opcode == 0x68 || opcode == 0x69 || opcode == 0x81 ||
285            opcode == 0xa9 || opcode == 0xc7 || opcode == 0xe8 ||
286            opcode == 0xe9 {
287                match operand_bits {
288                    Bits::B16 =>{
289                        len += 2;
290                    },
291                    _ => {
292                        len += 4;
293                    },
294                }
295        }
296        // imm16/32/64
297        if (opcode & 0xf8) == 0xb8 || (opcode == 0xf7 && (modrm & 0x30) == 0) {
298            match operand_bits {
299                Bits::B16 => {
300                    len += 2;
301                },
302                Bits::B32 => {
303                    len += 4;
304                },
305                _ => {
306                    len += 8;
307                },
308            }
309        }
310        // moffs
311        if (opcode & 0xfc) == 0xa0 {
312            match addr_bits {
313                Bits::B16 => {
314                    len += 2;
315                },
316                Bits::B32 => {
317                    len += 4;
318                },
319                _ => {
320                    len += 8;
321                },
322            }
323        }
324        // others
325        if opcode == 0xea || opcode == 0x9a {
326            len +=2;
327            match operand_bits {
328                Bits::B16 =>{
329                    len += 2;
330                },
331                _ => {
332                    len += 4;
333                },
334            }
335
336        }    
337        if opcode == 0xc8{
338            len += 3;
339        }    
340
341    }else { // 2bytes opcodes
342        if (opcode & 0xfc) == 0x70 || opcode == 0xa4 ||
343        opcode == 0xac || opcode == 0xba || opcode == 0xc2 ||
344        (opcode >= 0xc4 && opcode <= 0xc6) {
345            len += 1;
346        }
347        // imm16/32
348        if (opcode & 0xf0) == 0x80 {
349            match operand_bits {
350                Bits::B16 =>{
351                    len += 2;
352                },
353                _ => {
354                    len += 4;
355                },
356            }
357        }
358        // 3bytes opcodes with 0x3a prefix
359        if opcode == 0x3a {
360            len += 1;
361        }    
362
363    }
364    // wrong length
365    if len >  max_insn_len() as _ {
366        return None;
367    }
368
369    assert!(true, "{}" ,opcode);
370    Some(len)
371}
372
373
374/// Calculates the length of an instruction in bytes for the x86 architecture.
375///
376/// # Parameters
377///
378/// * `insn`: A pointer to the instruction to be measured. This should be a pointer to a type `T`.
379///
380/// # Return
381///
382/// * `Ok(len)`: If the length of the instruction is successfully calculated, the function returns `Ok(len)`,
383///   where `len` is the length of the instruction in bytes.
384///
385/// * `Err(Error::new())`: If the length of the instruction cannot be calculated due to an error,
386///   the function returns `Err(Error::new())`.
387///
388/// # Note
389///
390/// This function uses the `insn_len_x86` function to calculate the length of the instruction.
391#[cfg(target_arch = "x86")]
392pub fn arch_insn_len<T>(insn: *const T) -> Result<i32, Error> {
393    match insn_len_x86(insn, Bits::B32) {
394        Some(len) => Ok(len),
395        None => Err(Error::new()),
396    }
397}
398
399                                                                    
400/// This function calculates the length of an instruction in bytes for the x86_64 architecture.
401///
402/// # Parameters
403///
404/// * `insn`: A pointer to the instruction to be measured. This should be a pointer to a type `T`.
405///
406/// # Return
407///
408/// * `Ok(len)`: If the length of the instruction is successfully calculated, the function returns `Ok(len)`,
409///   where `len` is the length of the instruction in bytes.
410///
411/// * `Err(Error::new())`: If the length of the instruction cannot be calculated due to an error,
412///   the function returns `Err(Error::new())`.
413///
414/// # Note
415///
416/// This function uses the `insn_len_x86` function to calculate the length of the instruction.
417#[cfg(target_arch = "x86_64")]
418pub fn arch_insn_len<T>(insn: *const T) -> Result<i32, Error> {
419    match insn_len_x86(insn, Bits::B64) {
420        Some(len) => Ok(len),
421        None => Err(Error::new()),
422    }
423}
424
425
426
427/// test
428#[cfg(test)]
429mod tests {
430    use super::*;
431
432    #[test]
433    fn it_works() {
434        qwb();
435        let results = [0x48, 0x83, 0xEC, 0x28, 0xE8, 0x0B, 0x00, 0x00, 0x00, 0x48, 0x83, 0xC4, 0x28, 0xE9, 0x7A, 0xFE, 0xFF, 0xFF,  0xcc, 0xcc];
436        //let mut result= qwb as *const () as *const u8;
437        let result = results.as_ptr() as *const u8;
438        let mut b = 0;
439        while b < 20 {
440            let len = arch_insn_len(unsafe { result.add(b as _) });
441            match len {
442                Ok(len) => {
443                    b += len;
444                    if b > 20 {
445                        break;
446                    }
447                },
448                Err(_) => {
449                    b = 0;
450                    break
451                },
452            }
453            
454        
455        }
456        assert_eq!(20, b);
457        assert_eq!(results,  [0x48, 0x83, 0xEC, 0x28, 0xE8, 0x0B, 0x00, 0x00, 0x00, 0x48, 0x83, 0xC4, 0x28, 0xE9, 0x7A, 0xFE, 0xFF, 0xFF,  0xcc, 0xcc])
458    }
459}
460
461
462pub fn qwb () {
463    assert_eq!(3, "qwb".len());
464}