asmkit/x86/
formatter.rs

1use super::decode::{Decoder, Instruction, OperandType, RegType};
2use super::decode_tab::{Opcode, MNEMONIC_LENS, MNEMONIC_OFFS, MNEMONIC_STR};
3
4use core::fmt::{self, Write};
5
6fn strpcatnum<W: Write>(w: &mut W, mut val: u64) -> fmt::Result {
7    let lz = (val | 1).leading_zeros() as usize;
8    let numbytes = 16 - (lz / 4);
9    let mut dst = [0u8; 32];
10    let mut idx = numbytes + 2;
11    loop {
12        idx -= 1;
13        dst[idx] = b"0123456789abcdef"[(val % 16) as usize];
14        val /= 16;
15        if val == 0 {
16            break;
17        }
18    }
19
20    dst[0] = b'0';
21    dst[1] = b'x';
22
23    write!(w, "{}", unsafe {
24        core::str::from_utf8_unchecked(&dst[..numbytes + 2])
25    })
26}
27
28pub fn strpcatreg<W: Write>(w: &mut W, rt: RegType, ri: usize, size: usize) -> fmt::Result {
29    let nametab = [
30        2, 97, 108, 4, 98, 110, 100, 48, 2, 99, 108, 4, 98, 110, 100, 49, 2, 100, 108, 4, 98, 110,
31        100, 50, 2, 98, 108, 4, 98, 110, 100, 51, 3, 115, 112, 108, 0, 32, 32, 32, 3, 98, 112, 108,
32        0, 32, 32, 32, 3, 115, 105, 108, 0, 32, 32, 32, 3, 100, 105, 108, 0, 32, 32, 32, 3, 114,
33        56, 98, 0, 32, 32, 32, 3, 114, 57, 98, 0, 32, 32, 32, 4, 114, 49, 48, 98, 0, 32, 32, 4,
34        114, 49, 49, 98, 0, 32, 32, 4, 114, 49, 50, 98, 2, 97, 104, 4, 114, 49, 51, 98, 2, 99, 104,
35        4, 114, 49, 52, 98, 2, 100, 104, 4, 114, 49, 53, 98, 2, 98, 104, 0, 0, 32, 32, 32, 32, 32,
36        32, 2, 97, 120, 4, 116, 109, 109, 48, 2, 99, 120, 4, 116, 109, 109, 49, 2, 100, 120, 4,
37        116, 109, 109, 50, 2, 98, 120, 4, 116, 109, 109, 51, 2, 115, 112, 4, 116, 109, 109, 52, 2,
38        98, 112, 4, 116, 109, 109, 53, 2, 115, 105, 4, 116, 109, 109, 54, 2, 100, 105, 4, 116, 109,
39        109, 55, 3, 114, 56, 119, 32, 2, 101, 115, 3, 114, 57, 119, 32, 2, 99, 115, 4, 114, 49, 48,
40        119, 2, 115, 115, 4, 114, 49, 49, 119, 2, 100, 115, 4, 114, 49, 50, 119, 2, 102, 115, 4,
41        114, 49, 51, 119, 2, 103, 115, 4, 114, 49, 52, 119, 0, 32, 32, 4, 114, 49, 53, 119, 0, 32,
42        32, 2, 105, 112, 0, 32, 32, 32, 32, 3, 101, 97, 120, 3, 109, 109, 48, 3, 101, 99, 120, 3,
43        109, 109, 49, 3, 101, 100, 120, 3, 109, 109, 50, 3, 101, 98, 120, 3, 109, 109, 51, 3, 101,
44        115, 112, 3, 109, 109, 52, 3, 101, 98, 112, 3, 109, 109, 53, 3, 101, 115, 105, 3, 109, 109,
45        54, 3, 101, 100, 105, 3, 109, 109, 55, 3, 114, 56, 100, 32, 2, 107, 48, 3, 114, 57, 100,
46        32, 2, 107, 49, 4, 114, 49, 48, 100, 2, 107, 50, 4, 114, 49, 49, 100, 2, 107, 51, 4, 114,
47        49, 50, 100, 2, 107, 52, 4, 114, 49, 51, 100, 2, 107, 53, 4, 114, 49, 52, 100, 2, 107, 54,
48        4, 114, 49, 53, 100, 2, 107, 55, 3, 101, 105, 112, 0, 32, 32, 32, 3, 114, 97, 120, 3, 99,
49        114, 48, 3, 114, 99, 120, 0, 32, 32, 32, 3, 114, 100, 120, 3, 99, 114, 50, 3, 114, 98, 120,
50        3, 99, 114, 51, 3, 114, 115, 112, 3, 99, 114, 52, 3, 114, 98, 112, 0, 32, 32, 32, 3, 114,
51        115, 105, 0, 32, 32, 32, 3, 114, 100, 105, 0, 32, 32, 32, 2, 114, 56, 32, 3, 99, 114, 56,
52        2, 114, 57, 32, 3, 100, 114, 48, 3, 114, 49, 48, 3, 100, 114, 49, 3, 114, 49, 49, 3, 100,
53        114, 50, 3, 114, 49, 50, 3, 100, 114, 51, 3, 114, 49, 51, 3, 100, 114, 52, 3, 114, 49, 52,
54        3, 100, 114, 53, 3, 114, 49, 53, 3, 100, 114, 54, 3, 114, 105, 112, 3, 100, 114, 55, 5,
55        115, 116, 40, 48, 41, 0, 32, 5, 115, 116, 40, 49, 41, 0, 32, 5, 115, 116, 40, 50, 41, 0,
56        32, 5, 115, 116, 40, 51, 41, 0, 32, 5, 115, 116, 40, 52, 41, 0, 32, 5, 115, 116, 40, 53,
57        41, 0, 32, 5, 115, 116, 40, 54, 41, 0, 32, 5, 115, 116, 40, 55, 41, 0, 32, 4, 120, 109,
58        109, 48, 0, 32, 32, 4, 120, 109, 109, 49, 0, 32, 32, 4, 120, 109, 109, 50, 0, 32, 32, 4,
59        120, 109, 109, 51, 0, 32, 32, 4, 120, 109, 109, 52, 0, 32, 32, 4, 120, 109, 109, 53, 0, 32,
60        32, 4, 120, 109, 109, 54, 0, 32, 32, 4, 120, 109, 109, 55, 0, 32, 32, 4, 120, 109, 109, 56,
61        0, 32, 32, 4, 120, 109, 109, 57, 0, 32, 32, 5, 120, 109, 109, 49, 48, 0, 32, 5, 120, 109,
62        109, 49, 49, 0, 32, 5, 120, 109, 109, 49, 50, 0, 32, 5, 120, 109, 109, 49, 51, 0, 32, 5,
63        120, 109, 109, 49, 52, 0, 32, 5, 120, 109, 109, 49, 53, 0, 32, 5, 120, 109, 109, 49, 54, 0,
64        32, 5, 120, 109, 109, 49, 55, 0, 32, 5, 120, 109, 109, 49, 56, 0, 32, 5, 120, 109, 109, 49,
65        57, 0, 32, 5, 120, 109, 109, 50, 48, 0, 32, 5, 120, 109, 109, 50, 49, 0, 32, 5, 120, 109,
66        109, 50, 50, 0, 32, 5, 120, 109, 109, 50, 51, 0, 32, 5, 120, 109, 109, 50, 52, 0, 32, 5,
67        120, 109, 109, 50, 53, 0, 32, 5, 120, 109, 109, 50, 54, 0, 32, 5, 120, 109, 109, 50, 55, 0,
68        32, 5, 120, 109, 109, 50, 56, 0, 32, 5, 120, 109, 109, 50, 57, 0, 32, 5, 120, 109, 109, 51,
69        48, 0, 32, 5, 120, 109, 109, 51, 49, 0, 32, 0,
70    ];
71
72    let nametab_idx = [608u16, 0, 69, 205, 544, 276, 139, 341, 3, 412, 484];
73    let idx = if rt == RegType::Gpl {
74        size * 17 * 8
75    } else {
76        nametab_idx[rt as usize] as usize
77    };
78
79    let mut dst = [0; 16];
80    let name = &nametab[idx + 8 * ri..];
81
82    for i in 0..8 {
83        dst[i] = name[i + 1];
84    }
85
86    if rt == RegType::Vec && size > 4 {
87        dst[0] += size as u8 - 4;
88    }
89
90    write!(
91        w,
92        "{}",
93        core::str::from_utf8(&dst[..name[0] as usize]).unwrap()
94    )
95}
96
97fn mnemonic<W: Write>(w: &mut W, inst: &Instruction) -> fmt::Result {
98    let mut mnem = &MNEMONIC_STR[MNEMONIC_OFFS[inst.typ as usize] as usize..];
99    let mut mnem_len = MNEMONIC_LENS[inst.code() as usize];
100
101    let mut prefix_xacq_xrel = false;
102    let mut prefix_segment = false;
103
104    let mut sizesuffix = [0u8; 4];
105    let mut sizesuffixlen = 0;
106
107    if inst.op_type(0) == OperandType::Off && inst.op_size_log(0) == 1 {
108        sizesuffix[0] = b'w';
109        sizesuffixlen = 0;
110    }
111
112    match inst.code() {
113        Opcode::C_SEP => {
114            mnem = &mnem[inst.opsize() & 0xc..];
115            mnem_len = 3;
116        }
117
118        Opcode::C_EX => {
119            mnem = &mnem[inst.opsize() & 0xc..];
120            mnem_len = if inst.opsize() < 4 { 3 } else { 4 };
121        }
122
123        Opcode::CMPXCHGD => match inst.opsize_log() {
124            2 => {
125                sizesuffix[0] = b'8';
126                sizesuffix[1] = b'b';
127                sizesuffixlen = 2;
128            }
129            3 => {
130                sizesuffix[0] = b'1';
131                sizesuffix[1] = b'6';
132                sizesuffix[2] = b'b';
133                sizesuffixlen = 3;
134            }
135
136            _ => (),
137        },
138
139        Opcode::JCXZ => {
140            mnem_len = if inst.addrsize_log() == 1 { 4 } else { 5 };
141            mnem = &mnem[5 * (inst.addrsize_log() - 1)..];
142        }
143
144        Opcode::PUSH => {
145            if inst.op_size_log(0) == 1 && inst.op_type(0) == OperandType::Imm {
146                sizesuffix[0] = b'w';
147                sizesuffixlen = 1;
148            }
149            if inst.op_size_log(0) == 1
150                && inst.op_type(0) == OperandType::Reg
151                && inst.op_reg_type(0) == Some(RegType::Seg)
152            {
153                sizesuffix[0] = b'w';
154                sizesuffixlen = 1;
155            }
156        }
157
158        Opcode::POP => {
159            if inst.op_size_log(0) == 1
160                && inst.op_type(0) == OperandType::Reg
161                && inst.op_reg_type(0) == Some(RegType::Seg)
162            {
163                sizesuffix[0] = b'w';
164                sizesuffixlen = 1;
165            }
166        }
167
168        Opcode::MOV => {
169            if inst.has_rep()
170                && inst.op_type(0) == OperandType::Mem
171                && inst.op_type(1) == OperandType::Imm
172            {
173                prefix_xacq_xrel = true;
174            }
175        }
176
177        Opcode::FXSAVE
178        | Opcode::FXRSTOR
179        | Opcode::XSAVE
180        | Opcode::XSAVEC
181        | Opcode::XSAVEOPT
182        | Opcode::XSAVES
183        | Opcode::XRSTOR
184        | Opcode::XRSTORS => {
185            if inst.opsize_log() == 3 {
186                sizesuffix[0] = b'6';
187                sizesuffix[1] = b'4';
188                sizesuffixlen = 2;
189            }
190        }
191
192        Opcode::EVX_MOV_G2X | Opcode::EVX_MOV_X2G => {
193            sizesuffix[0] = b"bwdq"[inst.op_size_log(0)];
194            sizesuffixlen = 1;
195        }
196
197        Opcode::EVX_PBROADCAST => {
198            sizesuffix[0] = b"bwdq"[inst.op_size_log(1)];
199            sizesuffixlen = 1;
200        }
201
202        Opcode::EVX_PINSR => {
203            sizesuffix[0] = b"bwdq"[inst.op_size_log(2)];
204            sizesuffixlen = 1;
205        }
206
207        Opcode::RET | Opcode::ENTER | Opcode::LEAVE => {
208            if inst.opsize_log() == 1 {
209                sizesuffix[0] = b'w';
210                sizesuffixlen = 1;
211            }
212        }
213
214        Opcode::LODS | Opcode::MOVS | Opcode::CMPS | Opcode::OUTS => {
215            prefix_segment = true;
216            if inst.has_rep() {
217                write!(w, "rep ")?;
218            }
219
220            if inst.has_repnz() {
221                write!(w, "repnz ")?;
222            }
223
224            if inst.is_64() && inst.addrsize_log() == 2 {
225                write!(w, "addr32 ")?;
226            }
227
228            if !inst.is_64() && inst.addrsize_log() == 1 {
229                write!(w, "addr16 ")?;
230            }
231        }
232
233        Opcode::STOS | Opcode::SCAS | Opcode::INS => {
234            if inst.has_rep() {
235                write!(w, "rep ")?;
236            }
237
238            if inst.has_repnz() {
239                write!(w, "repnz ")?;
240            }
241
242            if inst.is_64() && inst.addrsize_log() == 2 {
243                write!(w, "addr32 ")?;
244            }
245
246            if !inst.is_64() && inst.addrsize_log() == 1 {
247                write!(w, "addr16 ")?;
248            }
249        }
250
251        Opcode::PUSHA
252        | Opcode::POPA
253        | Opcode::PUSHF
254        | Opcode::POPF
255        | Opcode::RETF
256        | Opcode::IRET
257        | Opcode::IN
258        | Opcode::OUT => {
259            sizesuffix[0] = b"bwdq"[inst.opsize_log()];
260            sizesuffixlen = 1;
261        }
262
263        _ => (),
264    }
265
266    if prefix_xacq_xrel || inst.has_lock() {
267        if inst.has_rep() {
268            write!(w, "xrelease ")?;
269        }
270
271        if inst.has_repnz() {
272            write!(w, "xacquire ")?;
273        }
274    }
275
276    if inst.has_lock() {
277        write!(w, "lock ")?;
278    }
279
280    if prefix_segment && inst.segment().is_some() {
281        write!(w, "{}s ", b"ecsdfg"[inst.segment as usize & 7])?;
282    }
283
284    for c in mnem[..mnem_len as usize].chars() {
285        write!(w, "{}", c.to_lowercase())?;
286    }
287
288    write!(
289        w,
290        "{}",
291        core::str::from_utf8(&sizesuffix[..sizesuffixlen]).unwrap()
292    )?;
293    Ok(())
294}
295
296pub struct Formatter {}
297
298impl Formatter {
299    pub const fn new() -> Self {
300        Self {}
301    }
302
303    pub fn format<W: Write>(&self, out: &mut W, inst: &Instruction) -> fmt::Result {
304        mnemonic(out, inst)?;
305
306        for i in 0..4 {
307            let op_type = inst.op_type(i);
308            if op_type == OperandType::None {
309                break;
310            }
311
312            if i > 0 {
313                write!(out, ",")?;
314            }
315
316            write!(out, " ")?;
317
318            let mut size = inst.op_size_log(i);
319            if size == 0 {
320                size = inst.addrsize_log();
321            }
322
323            if op_type == OperandType::Reg {
324                let typ = inst.op_reg_type(i).unwrap();
325                let idx = inst.operands[i].reg as usize;
326                strpcatreg(out, typ, idx as _, size)?;
327            } else if op_type == OperandType::Mem || op_type == OperandType::MemBCST {
328                let mut idx_rt = RegType::Gpl;
329                let mut idx_sz = inst.addrsize_log();
330                use Opcode::*;
331                match inst.code() {
332                    CMPXCHGD => {
333                        size = inst.opsize_log() + 1;
334                    }
335                    BOUND => {
336                        size += 1;
337                    }
338
339                    JMPF | CALLF | LDS | LES | LFS | LGS | LSS => size += 6,
340                    FLD | FSTP | FBLD | FBSTP => {
341                        size = if size != 0 { size } else { 9 };
342                    }
343
344                    VPGATHERQD | VGATHERQPS | EVX_PGATHERQD | EVX_GATHERQPS => {
345                        idx_rt = RegType::Vec;
346                        idx_sz = inst.op_size_log(0) + 1;
347                    }
348
349                    EVX_PSCATTERQD | EVX_SCATTERQPS => {
350                        idx_rt = RegType::Vec;
351                        idx_sz = inst.op_size_log(1) + 1;
352                    }
353
354                    VPGATHERDD | VPGATHERQQ | VGATHERDPS | VGATHERQPD | EVX_PGATHERDD
355                    | EVX_PGATHERQQ | EVX_GATHERDPS | EVX_GATHERQPD => {
356                        idx_rt = RegType::Vec;
357                        idx_sz = inst.op_size_log(0);
358                    }
359
360                    EVX_PSCATTERDD | EVX_PSCATTERQQ | EVX_SCATTERDPS | EVX_SCATTERQPD => {
361                        idx_rt = RegType::Vec;
362                        idx_sz = inst.op_size_log(1);
363                    }
364                    _ => (),
365                }
366
367                if op_type == OperandType::MemBCST {
368                    size = inst.op_bcstsz_log(i);
369                }
370
371                static PTR_SIZES: [u8; 177] = [
372                    0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 9, 98, 121, 116,
373                    101, 32, 112, 116, 114, 32, 32, 32, 32, 32, 32, 32, 9, 119, 111, 114, 100, 32,
374                    112, 116, 114, 32, 32, 32, 32, 32, 32, 32, 10, 100, 119, 111, 114, 100, 32,
375                    112, 116, 114, 32, 32, 32, 32, 32, 32, 10, 113, 119, 111, 114, 100, 32, 112,
376                    116, 114, 32, 32, 32, 32, 32, 32, 12, 120, 109, 109, 119, 111, 114, 100, 32,
377                    112, 116, 114, 32, 32, 32, 32, 12, 121, 109, 109, 119, 111, 114, 100, 32, 112,
378                    116, 114, 32, 32, 32, 32, 12, 122, 109, 109, 119, 111, 114, 100, 32, 112, 116,
379                    114, 32, 32, 32, 32, 10, 100, 119, 111, 114, 100, 32, 112, 116, 114, 32, 32,
380                    32, 32, 32, 32, 10, 102, 119, 111, 114, 100, 32, 112, 116, 114, 32, 32, 32, 32,
381                    32, 32, 10, 116, 98, 121, 116, 101, 32, 112, 116, 114, 32, 32, 32, 32, 32, 32,
382                    0,
383                ];
384
385                let ptrsize = &PTR_SIZES[16 * (size + 1)..];
386                let len = ptrsize[0];
387                write!(
388                    out,
389                    "{}",
390                    core::str::from_utf8(&ptrsize[1..len as usize]).unwrap()
391                )?;
392
393                if let Some(seg) = inst.segment() {
394                    write!(out, "{}s:", b"ecsdfg\0"[seg as usize & 7] as char)?
395                }
396
397                write!(out, "[")?;
398
399                let has_base = inst.op_base(i).is_some();
400                let has_idx = inst.op_index(i).is_some();
401
402                if has_base {
403                    strpcatreg(
404                        out,
405                        RegType::Gpl,
406                        inst.op_base(i).unwrap() as _,
407                        inst.addrsize_log(),
408                    )?;
409                }
410
411                if has_idx {
412                    if has_base {
413                        write!(out, "+")?
414                    }
415
416                    write!(out, "0{}", char::from_u32(1 << inst.op_scale(i)).unwrap())?;
417                    write!(out, "*")?;
418                    
419                    strpcatreg(out, idx_rt, inst.op_index(i).unwrap() as _, idx_sz)?;
420                }
421
422                let mut disp = inst.op_disp(i);
423
424                if disp != 0 && (has_base || has_idx) {
425                    write!(out, "{}", if disp < 0 { "-" } else { "+" })?;
426
427                    if disp < 0 {
428                        disp = disp.wrapping_neg();
429                    }
430
431                    if inst.addrsize_log() == 1 {
432                        disp &= 0xffff;
433                    } else if inst.addrsize_log() == 2 {
434                        disp &= 0xffffffff;
435                    }
436
437                    if disp != 0 || (!has_base && !has_idx) {
438                        strpcatnum(out, disp as _)?;
439                    }
440
441                    
442                }
443                write!(out, "]")?;
444            } else if op_type == OperandType::Imm || op_type == OperandType::Off {
445                let mut immediate = inst.op_imm(i) as u64;
446                use Opcode::*;
447                match inst.code() {
448                    SSE_EXTRQ | SSE_INSERTQ => {
449                        write!(out, "0x{:x}", immediate & 0xff)?;
450                        write!(out, ", ")?;
451                        immediate = (immediate >> 8) & 0xff;
452                    }
453
454                    ENTER => {
455                        write!(out, "0x{:x}, ", immediate & 0xffff)?;
456                        immediate = (immediate >> 16) & 0xff;
457                    }
458
459                    JMPF | CALLF => {
460                        write!(out, "0x{:x}:", (immediate >> (8 << size)) & 0xffff)?;
461                    }
462
463                    _ => (),
464                }
465
466                if op_type == OperandType::Off {
467                    immediate += inst.address + inst.size() as u64;
468                }
469
470                if size == 0 {
471                    immediate &= 0xff;
472                } else if size == 1 {
473                    immediate &= 0xffff;
474                } else if size == 2 {
475                    immediate &= 0xffffffff;
476                }
477
478                write!(out, "{:x}", immediate)?;
479            }
480
481            if i == 0 && inst.maskreg().is_some() {
482                write!(out, "{{")?;
483                strpcatreg(out, RegType::Mask, inst.maskreg().unwrap() as usize, 0)?;
484                write!(out, "}}")?;
485                if inst.maskzero() {
486                    write!(out, "{{z}}")?;
487                }
488            }
489        }
490
491        Ok(())
492    }
493}
494use alloc::format;
495use alloc::vec::Vec;
496pub fn pretty_disassembler<W: Write>(
497    out: &mut W,
498    bitness: usize,
499    data: &[u8],
500    address: u64,
501) -> fmt::Result {
502    let mut decoder = Decoder::new(bitness as _, data, address);
503    let fmt = Formatter::new();
504
505    let mut inst = Instruction::default();
506    let start = address;
507    while decoder.can_decode() {
508        decoder.decode_out(&mut inst);
509        let ix = (inst.address - start) as usize;
510
511        let instr_bytes = data[ix..ix + inst.size()]
512            .iter()
513            .map(|x| format!("{:02X}", x))
514            .collect::<Vec<alloc::string::String>>()
515            .join(" ");
516
517        let mut outs = alloc::string::String::new();
518        fmt.format(&mut outs, &inst)?;
519        write!(out, "{:<15.016x} {:<20} {}\n", inst.address, instr_bytes, outs)?;
520    }
521    Ok(())
522}