Skip to main content

asmkit/x86/
formatter.rs

1use super::decode::{Decoder, Instruction, OperandType, RegType};
2use super::decode_tab::{
3    Opcode, STRTAB1 as MNEMONIC_STR, STRTAB2 as MNEMONIC_OFFS, STRTAB3 as MNEMONIC_LENS,
4};
5
6use core::fmt::{self, Write};
7
8fn strpcatnum<W: Write>(w: &mut W, mut val: u64) -> fmt::Result {
9    let lz = (val | 1).leading_zeros() as usize;
10    let numbytes = 16 - (lz / 4);
11    let mut dst = [0u8; 32];
12    let mut idx = numbytes + 2;
13    loop {
14        idx -= 1;
15        dst[idx] = b"0123456789abcdef"[(val % 16) as usize];
16        val /= 16;
17        if val == 0 {
18            break;
19        }
20    }
21
22    dst[0] = b'0';
23    dst[1] = b'x';
24
25    write!(w, "{}", unsafe {
26        core::str::from_utf8_unchecked(&dst[..numbytes + 2])
27    })
28}
29
30pub fn strpcatreg<W: Write>(w: &mut W, rt: RegType, ri: usize, size: usize) -> fmt::Result {
31    let nametab = [
32        2, 97, 108, 4, 98, 110, 100, 48, 2, 99, 108, 4, 98, 110, 100, 49, 2, 100, 108, 4, 98, 110,
33        100, 50, 2, 98, 108, 4, 98, 110, 100, 51, 3, 115, 112, 108, 0, 32, 32, 32, 3, 98, 112, 108,
34        0, 32, 32, 32, 3, 115, 105, 108, 0, 32, 32, 32, 3, 100, 105, 108, 0, 32, 32, 32, 3, 114,
35        56, 98, 0, 32, 32, 32, 3, 114, 57, 98, 0, 32, 32, 32, 4, 114, 49, 48, 98, 0, 32, 32, 4,
36        114, 49, 49, 98, 0, 32, 32, 4, 114, 49, 50, 98, 2, 97, 104, 4, 114, 49, 51, 98, 2, 99, 104,
37        4, 114, 49, 52, 98, 2, 100, 104, 4, 114, 49, 53, 98, 2, 98, 104, 0, 0, 32, 32, 32, 32, 32,
38        32, 2, 97, 120, 4, 116, 109, 109, 48, 2, 99, 120, 4, 116, 109, 109, 49, 2, 100, 120, 4,
39        116, 109, 109, 50, 2, 98, 120, 4, 116, 109, 109, 51, 2, 115, 112, 4, 116, 109, 109, 52, 2,
40        98, 112, 4, 116, 109, 109, 53, 2, 115, 105, 4, 116, 109, 109, 54, 2, 100, 105, 4, 116, 109,
41        109, 55, 3, 114, 56, 119, 32, 2, 101, 115, 3, 114, 57, 119, 32, 2, 99, 115, 4, 114, 49, 48,
42        119, 2, 115, 115, 4, 114, 49, 49, 119, 2, 100, 115, 4, 114, 49, 50, 119, 2, 102, 115, 4,
43        114, 49, 51, 119, 2, 103, 115, 4, 114, 49, 52, 119, 0, 32, 32, 4, 114, 49, 53, 119, 0, 32,
44        32, 2, 105, 112, 0, 32, 32, 32, 32, 3, 101, 97, 120, 3, 109, 109, 48, 3, 101, 99, 120, 3,
45        109, 109, 49, 3, 101, 100, 120, 3, 109, 109, 50, 3, 101, 98, 120, 3, 109, 109, 51, 3, 101,
46        115, 112, 3, 109, 109, 52, 3, 101, 98, 112, 3, 109, 109, 53, 3, 101, 115, 105, 3, 109, 109,
47        54, 3, 101, 100, 105, 3, 109, 109, 55, 3, 114, 56, 100, 32, 2, 107, 48, 3, 114, 57, 100,
48        32, 2, 107, 49, 4, 114, 49, 48, 100, 2, 107, 50, 4, 114, 49, 49, 100, 2, 107, 51, 4, 114,
49        49, 50, 100, 2, 107, 52, 4, 114, 49, 51, 100, 2, 107, 53, 4, 114, 49, 52, 100, 2, 107, 54,
50        4, 114, 49, 53, 100, 2, 107, 55, 3, 101, 105, 112, 0, 32, 32, 32, 3, 114, 97, 120, 3, 99,
51        114, 48, 3, 114, 99, 120, 0, 32, 32, 32, 3, 114, 100, 120, 3, 99, 114, 50, 3, 114, 98, 120,
52        3, 99, 114, 51, 3, 114, 115, 112, 3, 99, 114, 52, 3, 114, 98, 112, 0, 32, 32, 32, 3, 114,
53        115, 105, 0, 32, 32, 32, 3, 114, 100, 105, 0, 32, 32, 32, 2, 114, 56, 32, 3, 99, 114, 56,
54        2, 114, 57, 32, 3, 100, 114, 48, 3, 114, 49, 48, 3, 100, 114, 49, 3, 114, 49, 49, 3, 100,
55        114, 50, 3, 114, 49, 50, 3, 100, 114, 51, 3, 114, 49, 51, 3, 100, 114, 52, 3, 114, 49, 52,
56        3, 100, 114, 53, 3, 114, 49, 53, 3, 100, 114, 54, 3, 114, 105, 112, 3, 100, 114, 55, 5,
57        115, 116, 40, 48, 41, 0, 32, 5, 115, 116, 40, 49, 41, 0, 32, 5, 115, 116, 40, 50, 41, 0,
58        32, 5, 115, 116, 40, 51, 41, 0, 32, 5, 115, 116, 40, 52, 41, 0, 32, 5, 115, 116, 40, 53,
59        41, 0, 32, 5, 115, 116, 40, 54, 41, 0, 32, 5, 115, 116, 40, 55, 41, 0, 32, 4, 120, 109,
60        109, 48, 0, 32, 32, 4, 120, 109, 109, 49, 0, 32, 32, 4, 120, 109, 109, 50, 0, 32, 32, 4,
61        120, 109, 109, 51, 0, 32, 32, 4, 120, 109, 109, 52, 0, 32, 32, 4, 120, 109, 109, 53, 0, 32,
62        32, 4, 120, 109, 109, 54, 0, 32, 32, 4, 120, 109, 109, 55, 0, 32, 32, 4, 120, 109, 109, 56,
63        0, 32, 32, 4, 120, 109, 109, 57, 0, 32, 32, 5, 120, 109, 109, 49, 48, 0, 32, 5, 120, 109,
64        109, 49, 49, 0, 32, 5, 120, 109, 109, 49, 50, 0, 32, 5, 120, 109, 109, 49, 51, 0, 32, 5,
65        120, 109, 109, 49, 52, 0, 32, 5, 120, 109, 109, 49, 53, 0, 32, 5, 120, 109, 109, 49, 54, 0,
66        32, 5, 120, 109, 109, 49, 55, 0, 32, 5, 120, 109, 109, 49, 56, 0, 32, 5, 120, 109, 109, 49,
67        57, 0, 32, 5, 120, 109, 109, 50, 48, 0, 32, 5, 120, 109, 109, 50, 49, 0, 32, 5, 120, 109,
68        109, 50, 50, 0, 32, 5, 120, 109, 109, 50, 51, 0, 32, 5, 120, 109, 109, 50, 52, 0, 32, 5,
69        120, 109, 109, 50, 53, 0, 32, 5, 120, 109, 109, 50, 54, 0, 32, 5, 120, 109, 109, 50, 55, 0,
70        32, 5, 120, 109, 109, 50, 56, 0, 32, 5, 120, 109, 109, 50, 57, 0, 32, 5, 120, 109, 109, 51,
71        48, 0, 32, 5, 120, 109, 109, 51, 49, 0, 32, 0,
72    ];
73
74    let nametab_idx = [608u16, 0, 69, 205, 544, 276, 139, 341, 3, 412, 484];
75    let idx = if rt == RegType::Gpl {
76        size * 17 * 8
77    } else {
78        nametab_idx[rt as usize] as usize
79    };
80
81    let mut dst = [0; 16];
82    let name = &nametab[idx + 8 * ri..];
83
84    for i in 0..8 {
85        dst[i] = name[i + 1];
86    }
87
88    if rt == RegType::Vec && size > 4 {
89        dst[0] += size as u8 - 4;
90    }
91
92    write!(
93        w,
94        "{}",
95        core::str::from_utf8(&dst[..name[0] as usize]).unwrap()
96    )
97}
98
99fn mnemonic<W: Write>(w: &mut W, inst: &Instruction) -> fmt::Result {
100    let mut mnem = &MNEMONIC_STR[MNEMONIC_OFFS[inst.typ as usize] as usize..];
101    let mut mnem_len = MNEMONIC_LENS[inst.code() as usize];
102
103    let mut prefix_xacq_xrel = false;
104    let mut prefix_segment = false;
105
106    let mut sizesuffix = [0u8; 4];
107    let mut sizesuffixlen = 0;
108
109    if inst.op_type(0) == OperandType::Off && inst.op_size_log(0) == 1 {
110        sizesuffix[0] = b'w';
111        sizesuffixlen = 0;
112    }
113
114    match inst.code() {
115        Opcode::C_SEP => {
116            mnem = &mnem[inst.opsize() & 0xc..];
117            mnem_len = 3;
118        }
119
120        Opcode::C_EX => {
121            mnem = &mnem[inst.opsize() & 0xc..];
122            mnem_len = if inst.opsize() < 4 { 3 } else { 4 };
123        }
124
125        Opcode::CMPXCHGD => match inst.opsize_log() {
126            2 => {
127                sizesuffix[0] = b'8';
128                sizesuffix[1] = b'b';
129                sizesuffixlen = 2;
130            }
131            3 => {
132                sizesuffix[0] = b'1';
133                sizesuffix[1] = b'6';
134                sizesuffix[2] = b'b';
135                sizesuffixlen = 3;
136            }
137
138            _ => (),
139        },
140
141        Opcode::JCXZ => {
142            mnem_len = if inst.addrsize_log() == 1 { 4 } else { 5 };
143            mnem = &mnem[5 * (inst.addrsize_log() - 1)..];
144        }
145
146        Opcode::PUSH => {
147            if inst.op_size_log(0) == 1 && inst.op_type(0) == OperandType::Imm {
148                sizesuffix[0] = b'w';
149                sizesuffixlen = 1;
150            }
151            if inst.op_size_log(0) == 1
152                && inst.op_type(0) == OperandType::Reg
153                && inst.op_reg_type(0) == Some(RegType::Seg)
154            {
155                sizesuffix[0] = b'w';
156                sizesuffixlen = 1;
157            }
158        }
159
160        Opcode::POP => {
161            if inst.op_size_log(0) == 1
162                && inst.op_type(0) == OperandType::Reg
163                && inst.op_reg_type(0) == Some(RegType::Seg)
164            {
165                sizesuffix[0] = b'w';
166                sizesuffixlen = 1;
167            }
168        }
169
170        Opcode::MOV => {
171            if inst.has_rep()
172                && inst.op_type(0) == OperandType::Mem
173                && inst.op_type(1) == OperandType::Imm
174            {
175                prefix_xacq_xrel = true;
176            }
177        }
178
179        Opcode::FXSAVE
180        | Opcode::FXRSTOR
181        | Opcode::XSAVE
182        | Opcode::XSAVEC
183        | Opcode::XSAVEOPT
184        | Opcode::XSAVES
185        | Opcode::XRSTOR
186        | Opcode::XRSTORS => {
187            if inst.opsize_log() == 3 {
188                sizesuffix[0] = b'6';
189                sizesuffix[1] = b'4';
190                sizesuffixlen = 2;
191            }
192        }
193
194        Opcode::EVX_MOV_G2X | Opcode::EVX_MOV_X2G => {
195            sizesuffix[0] = b"bwdq"[inst.op_size_log(0)];
196            sizesuffixlen = 1;
197        }
198
199        Opcode::EVX_PBROADCAST => {
200            sizesuffix[0] = b"bwdq"[inst.op_size_log(1)];
201            sizesuffixlen = 1;
202        }
203
204        Opcode::EVX_PINSR => {
205            sizesuffix[0] = b"bwdq"[inst.op_size_log(2)];
206            sizesuffixlen = 1;
207        }
208
209        Opcode::RET | Opcode::ENTER | Opcode::LEAVE => {
210            if inst.opsize_log() == 1 {
211                sizesuffix[0] = b'w';
212                sizesuffixlen = 1;
213            }
214        }
215
216        Opcode::LODS | Opcode::MOVS | Opcode::CMPS | Opcode::OUTS => {
217            prefix_segment = true;
218            if inst.has_rep() {
219                write!(w, "rep ")?;
220            }
221
222            if inst.has_repnz() {
223                write!(w, "repnz ")?;
224            }
225
226            if inst.is_64() && inst.addrsize_log() == 2 {
227                write!(w, "addr32 ")?;
228            }
229
230            if !inst.is_64() && inst.addrsize_log() == 1 {
231                write!(w, "addr16 ")?;
232            }
233        }
234
235        Opcode::STOS | Opcode::SCAS | Opcode::INS => {
236            if inst.has_rep() {
237                write!(w, "rep ")?;
238            }
239
240            if inst.has_repnz() {
241                write!(w, "repnz ")?;
242            }
243
244            if inst.is_64() && inst.addrsize_log() == 2 {
245                write!(w, "addr32 ")?;
246            }
247
248            if !inst.is_64() && inst.addrsize_log() == 1 {
249                write!(w, "addr16 ")?;
250            }
251        }
252
253        Opcode::PUSHA
254        | Opcode::POPA
255        | Opcode::PUSHF
256        | Opcode::POPF
257        | Opcode::RETF
258        | Opcode::IRET
259        | Opcode::IN
260        | Opcode::OUT => {
261            sizesuffix[0] = b"bwdq"[inst.opsize_log()];
262            sizesuffixlen = 1;
263        }
264
265        _ => (),
266    }
267
268    if prefix_xacq_xrel || inst.has_lock() {
269        if inst.has_rep() {
270            write!(w, "xrelease ")?;
271        }
272
273        if inst.has_repnz() {
274            write!(w, "xacquire ")?;
275        }
276    }
277
278    if inst.has_lock() {
279        write!(w, "lock ")?;
280    }
281
282    if prefix_segment && inst.segment().is_some() {
283        write!(w, "{}s ", b"ecsdfg"[inst.segment as usize & 7])?;
284    }
285
286    for c in mnem[..mnem_len as usize].chars() {
287        write!(w, "{}", c.to_lowercase())?;
288    }
289
290    write!(
291        w,
292        "{}",
293        core::str::from_utf8(&sizesuffix[..sizesuffixlen]).unwrap()
294    )?;
295    Ok(())
296}
297
298pub struct Formatter {}
299
300impl Formatter {
301    pub const fn new() -> Self {
302        Self {}
303    }
304
305    pub fn format<W: Write>(&self, out: &mut W, inst: &Instruction) -> fmt::Result {
306        mnemonic(out, inst)?;
307
308        for i in 0..4 {
309            let op_type = inst.op_type(i);
310            if op_type == OperandType::None {
311                break;
312            }
313
314            if i > 0 {
315                write!(out, ",")?;
316            }
317
318            write!(out, " ")?;
319
320            let mut size = inst.op_size_log(i);
321            if size == 0 {
322                size = inst.addrsize_log();
323            }
324
325            if op_type == OperandType::Reg {
326                let typ = inst.op_reg_type(i).unwrap();
327                let idx = inst.operands[i].reg as usize;
328                strpcatreg(out, typ, idx as _, size)?;
329            } else if op_type == OperandType::Mem || op_type == OperandType::MemBCST {
330                let mut idx_rt = RegType::Gpl;
331                let mut idx_sz = inst.addrsize_log();
332                use Opcode::*;
333                match inst.code() {
334                    CMPXCHGD => {
335                        size = inst.opsize_log() + 1;
336                    }
337                    BOUND => {
338                        size += 1;
339                    }
340
341                    JMPF | CALLF | LDS | LES | LFS | LGS | LSS => size += 6,
342                    FLD | FSTP | FBLD | FBSTP => {
343                        size = if size != 0 { size } else { 9 };
344                    }
345
346                    VPGATHERQD | VGATHERQPS | EVX_PGATHERQD | EVX_GATHERQPS => {
347                        idx_rt = RegType::Vec;
348                        idx_sz = inst.op_size_log(0) + 1;
349                    }
350
351                    EVX_PSCATTERQD | EVX_SCATTERQPS => {
352                        idx_rt = RegType::Vec;
353                        idx_sz = inst.op_size_log(1) + 1;
354                    }
355
356                    VPGATHERDD | VPGATHERQQ | VGATHERDPS | VGATHERQPD | EVX_PGATHERDD
357                    | EVX_PGATHERQQ | EVX_GATHERDPS | EVX_GATHERQPD => {
358                        idx_rt = RegType::Vec;
359                        idx_sz = inst.op_size_log(0);
360                    }
361
362                    EVX_PSCATTERDD | EVX_PSCATTERQQ | EVX_SCATTERDPS | EVX_SCATTERQPD => {
363                        idx_rt = RegType::Vec;
364                        idx_sz = inst.op_size_log(1);
365                    }
366                    _ => (),
367                }
368
369                if op_type == OperandType::MemBCST {
370                    size = inst.op_bcstsz_log(i);
371                }
372
373                static PTR_SIZES: [u8; 177] = [
374                    0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 9, 98, 121, 116,
375                    101, 32, 112, 116, 114, 32, 32, 32, 32, 32, 32, 32, 9, 119, 111, 114, 100, 32,
376                    112, 116, 114, 32, 32, 32, 32, 32, 32, 32, 10, 100, 119, 111, 114, 100, 32,
377                    112, 116, 114, 32, 32, 32, 32, 32, 32, 10, 113, 119, 111, 114, 100, 32, 112,
378                    116, 114, 32, 32, 32, 32, 32, 32, 12, 120, 109, 109, 119, 111, 114, 100, 32,
379                    112, 116, 114, 32, 32, 32, 32, 12, 121, 109, 109, 119, 111, 114, 100, 32, 112,
380                    116, 114, 32, 32, 32, 32, 12, 122, 109, 109, 119, 111, 114, 100, 32, 112, 116,
381                    114, 32, 32, 32, 32, 10, 100, 119, 111, 114, 100, 32, 112, 116, 114, 32, 32,
382                    32, 32, 32, 32, 10, 102, 119, 111, 114, 100, 32, 112, 116, 114, 32, 32, 32, 32,
383                    32, 32, 10, 116, 98, 121, 116, 101, 32, 112, 116, 114, 32, 32, 32, 32, 32, 32,
384                    0,
385                ];
386
387                let ptrsize = &PTR_SIZES[16 * (size + 1)..];
388                let len = ptrsize[0];
389                write!(
390                    out,
391                    "{}",
392                    core::str::from_utf8(&ptrsize[1..len as usize]).unwrap()
393                )?;
394
395                if let Some(seg) = inst.segment() {
396                    write!(out, "{}s:", b"ecsdfg\0"[seg as usize & 7] as char)?
397                }
398
399                write!(out, " [")?;
400
401                let has_base = inst.op_base(i).is_some();
402                let has_idx = inst.op_index(i).is_some();
403
404                if has_base {
405                    strpcatreg(
406                        out,
407                        RegType::Gpl,
408                        inst.op_base(i).unwrap() as _,
409                        inst.addrsize_log(),
410                    )?;
411                }
412
413                if has_idx {
414                    if has_base {
415                        write!(out, "+")?
416                    }
417
418                    write!(out, "0{}", char::from_u32(1 << inst.op_scale(i)).unwrap())?;
419                    write!(out, "*")?;
420
421                    strpcatreg(out, idx_rt, inst.op_index(i).unwrap() as _, idx_sz)?;
422                }
423
424                let mut disp = inst.op_disp(i);
425
426                if disp != 0 && (has_base || has_idx) {
427                    write!(out, "{}", if disp < 0 { "-" } else { "+" })?;
428
429                    if disp < 0 {
430                        disp = disp.wrapping_neg();
431                    }
432
433                    if inst.addrsize_log() == 1 {
434                        disp &= 0xffff;
435                    } else if inst.addrsize_log() == 2 {
436                        disp &= 0xffffffff;
437                    }
438
439                    if disp != 0 || (!has_base && !has_idx) {
440                        strpcatnum(out, disp as _)?;
441                    }
442                }
443                write!(out, "]")?;
444            } else if op_type == OperandType::Imm || op_type == OperandType::Off {
445                let mut immediate = inst.op_imm(i) as u64;
446                use Opcode::*;
447                match inst.code() {
448                    SSE_EXTRQ | SSE_INSERTQ => {
449                        write!(out, "0x{:x}", immediate & 0xff)?;
450                        write!(out, ", ")?;
451                        immediate = (immediate >> 8) & 0xff;
452                    }
453
454                    ENTER => {
455                        write!(out, "0x{:x}, ", immediate & 0xffff)?;
456                        immediate = (immediate >> 16) & 0xff;
457                    }
458
459                    JMPF | CALLF => {
460                        write!(out, "0x{:x}:", (immediate >> (8 << size)) & 0xffff)?;
461                    }
462
463                    _ => (),
464                }
465
466                if op_type == OperandType::Off {
467                    immediate += inst.address + inst.size() as u64;
468                }
469
470                if size == 0 {
471                    immediate &= 0xff;
472                } else if size == 1 {
473                    immediate &= 0xffff;
474                } else if size == 2 {
475                    immediate &= 0xffffffff;
476                }
477
478                write!(out, "{:x}", immediate)?;
479            }
480
481            if i == 0 && inst.maskreg().is_some() {
482                write!(out, "{{")?;
483                strpcatreg(out, RegType::Mask, inst.maskreg().unwrap() as usize, 0)?;
484                write!(out, "}}")?;
485                if inst.maskzero() {
486                    write!(out, "{{z}}")?;
487                }
488            }
489        }
490
491        Ok(())
492    }
493}
494use alloc::format;
495use alloc::vec::Vec;
496pub fn pretty_disassembler<W: Write>(
497    out: &mut W,
498    bitness: usize,
499    data: &[u8],
500    address: u64,
501) -> fmt::Result {
502    let mut decoder = Decoder::new(bitness as _, data, address);
503    let fmt = Formatter::new();
504
505    let mut inst = Instruction::default();
506    let start = address;
507    while decoder.can_decode() {
508        decoder.decode_out(&mut inst);
509        let ix = (inst.address - start) as usize;
510
511        let instr_bytes = data[ix..ix + inst.size()]
512            .iter()
513            .map(|x| format!("{:02X}", x))
514            .collect::<Vec<alloc::string::String>>()
515            .join(" ");
516
517        let mut outs = alloc::string::String::new();
518        fmt.format(&mut outs, &inst)?;
519        write!(
520            out,
521            "{:<15.016x} {:<20} {}\n",
522            inst.address, instr_bytes, outs
523        )?;
524    }
525    Ok(())
526}