asmkit/x86/
decode.rs

1use super::decode_tab::*;
2
3pub struct Decoder<'a> {
4    buf: &'a [u8],
5    cursor: usize,
6    address: u64,
7    mode: DecodeMode,
8    table_root_idx: u16,
9    error: DecoderError,
10}
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
13pub enum Reg {
14    #[default]
15    R0 = 0,
16    R1,
17    R2,
18    R3,
19    R4,
20    R5,
21    R6,
22    R7,
23    R8,
24    R9,
25    R10,
26    R11,
27    R12,
28    R13,
29    R14,
30    R15,
31
32    IP = 0x10,
33    None = 0x3f,
34}
35
36impl TryFrom<u8> for Reg {
37    type Error = ();
38
39    fn try_from(value: u8) -> Result<Self, Self::Error> {
40        match value {
41            0 => Ok(Reg::R0),
42            1 => Ok(Reg::R1),
43            2 => Ok(Reg::R2),
44            3 => Ok(Reg::R3),
45            4 => Ok(Reg::R4),
46            5 => Ok(Reg::R5),
47            6 => Ok(Reg::R6),
48            7 => Ok(Reg::R7),
49            8 => Ok(Reg::R8),
50            9 => Ok(Reg::R9),
51            10 => Ok(Reg::R10),
52            11 => Ok(Reg::R11),
53            12 => Ok(Reg::R12),
54            13 => Ok(Reg::R13),
55            14 => Ok(Reg::R14),
56            15 => Ok(Reg::R15),
57            _ if value == 0x10 => Ok(Reg::IP),
58            _ if value == 0x3f => Ok(Reg::None),
59            _ => Err(()),
60        }
61    }
62}
63
64impl Reg {
65    pub const AL: Self = Self::R0;
66    pub const CL: Self = Self::R1;
67    pub const DL: Self = Self::R2;
68    pub const BL: Self = Self::R3;
69    pub const AH: Self = Self::R4;
70    pub const CH: Self = Self::R5;
71    pub const DH: Self = Self::R6;
72    pub const BH: Self = Self::R7;
73
74    pub const AX: Self = Self::R0;
75    pub const CX: Self = Self::R1;
76    pub const DX: Self = Self::R2;
77    pub const BX: Self = Self::R3;
78    pub const SP: Self = Self::R4;
79    pub const BP: Self = Self::R5;
80    pub const SI: Self = Self::R6;
81    pub const DI: Self = Self::R7;
82
83    pub const ES: Self = Self::R0;
84    pub const CS: Self = Self::R1;
85    pub const SS: Self = Self::R2;
86    pub const DS: Self = Self::R3;
87    pub const FS: Self = Self::R4;
88    pub const GS: Self = Self::R5;
89}
90
91#[derive(Copy, Clone, PartialEq, Eq, Debug, Default)]
92pub enum OperandType {
93    #[default]
94    None,
95    Reg,
96    Imm,
97    Mem,
98    Off,
99    MemBCST,
100}
101
102#[repr(u8)]
103#[derive(Debug, Clone, Copy, PartialEq, Eq)]
104pub enum RegType {
105    /// Vector (SSE/AVX) register XMMn/YMMn/ZMMn
106    Vec = 0,
107    /// Low general purpose register
108    Gpl = 1,
109    /// High-byte general purpose register
110    Gph = 2,
111    /// Segment register
112    Seg = 3,
113    /// FPU register ST(n)
114    Fpu = 4,
115    /// MMX register MMn
116    Mmx = 5,
117    /// TMM register TMMn
118    Tmm = 6,
119    /// Vector mask (AVX-512) register Kn
120    Mask = 7,
121    /// Bound register BNDn
122    Bnd = 8,
123    /// Control Register CRn
124    Cr = 9,
125    /// Debug Register DRn
126    Dr = 10,
127    /// Must be a memory operand
128    Mem = 15,
129}
130
131impl TryFrom<u8> for RegType {
132    type Error = ();
133
134    fn try_from(value: u8) -> Result<Self, Self::Error> {
135        match value {
136            0 => Ok(RegType::Vec),
137            1 => Ok(RegType::Gpl),
138            2 => Ok(RegType::Gph),
139            3 => Ok(RegType::Seg),
140            4 => Ok(RegType::Fpu),
141            5 => Ok(RegType::Mmx),
142            6 => Ok(RegType::Tmm),
143            7 => Ok(RegType::Mask),
144            8 => Ok(RegType::Bnd),
145            9 => Ok(RegType::Cr),
146            10 => Ok(RegType::Dr),
147            15 => Ok(RegType::Mem),
148            _ => Err(()),
149        }
150    }
151}
152
153/// Do not depend on the actual enum values.
154#[repr(u8)]
155#[derive(Debug, Clone, Copy, PartialEq, Eq)]
156pub enum RoundControl {
157    /// Round to nearest (even)
158    Rn = 1,
159    /// Round down
160    Rd = 3,
161    /// Round up
162    Ru = 5,
163    /// Round to zero (truncate)
164    Rz = 7,
165    /// Rounding mode as specified in MXCSR
166    Mxcsr = 0,
167    /// Rounding mode irrelevant, but SAE
168    Sae = 6,
169}
170
171impl TryFrom<u8> for RoundControl {
172    type Error = ();
173
174    fn try_from(value: u8) -> Result<Self, Self::Error> {
175        match value {
176            1 => Ok(RoundControl::Rn),
177            3 => Ok(RoundControl::Rd),
178            5 => Ok(RoundControl::Ru),
179            7 => Ok(RoundControl::Rz),
180            0 => Ok(RoundControl::Mxcsr),
181            6 => Ok(RoundControl::Sae),
182            _ => Err(()),
183        }
184    }
185}
186
187pub struct InstDesc {
188    typ: Opcode,
189    operand_indices: u16,
190    operand_sizes: u16,
191    reg_types: u16,
192}
193
194impl InstDesc {
195    pub(crate) const fn new(
196        typ: Opcode,
197        operand_indices: u16,
198        operand_sizes: u16,
199        reg_types: u16,
200    ) -> Self {
201        Self {
202            typ,
203            operand_indices,
204            operand_sizes,
205            reg_types,
206        }
207    }
208
209    pub(crate) const INVALID: Self = Self::new(Opcode::_3DNOW, 0, 0, 0);
210
211    pub fn has_modrm(&self) -> bool {
212        (self.operand_indices & (3 << 0)) != 0
213    }
214
215    pub fn modrm_idx(&self) -> u8 {
216        (((self.operand_indices >> 0) & 3) ^ 3) as u8
217    }
218
219    pub fn has_modreg(&self) -> bool {
220        (self.operand_indices & (3 << 2)) != 0
221    }
222
223    pub fn modreg_idx(&self) -> u8 {
224        (((self.operand_indices >> 2) & 3) ^ 3) as u8
225    }
226
227    pub fn has_vexreg(&self) -> bool {
228        (self.operand_indices & (3 << 4)) != 0
229    }
230
231    pub fn vexreg_idx(&self) -> u8 {
232        (((self.operand_indices >> 4) & 3) ^ 3) as u8
233    }
234
235    pub fn imm_control(&self) -> u8 {
236        ((self.operand_indices >> 12) & 0x7) as u8
237    }
238
239    pub fn imm_idx(&self) -> u8 {
240        (((self.operand_indices >> 6) & 3) ^ 3) as u8
241    }
242
243    pub fn evex_bcst(&self) -> bool {
244        ((self.operand_indices >> 8) & 1) != 0
245    }
246
247    pub fn evex_mask(&self) -> bool {
248        ((self.operand_indices >> 9) & 1) != 0
249    }
250
251    pub fn zeroreg_val(&self) -> bool {
252        ((self.operand_indices >> 10) & 1) != 0
253    }
254
255    pub fn lock(&self) -> bool {
256        ((self.operand_indices >> 11) & 1) != 0
257    }
258
259    pub fn vsib(&self) -> bool {
260        ((self.operand_indices >> 15) & 1) != 0
261    }
262
263    pub fn opsize(&self) -> u8 {
264        ((self.reg_types >> 11) & 7) as u8
265    }
266
267    pub fn modrm_size(&self) -> u8 {
268        ((self.operand_sizes >> 0) & 3) as u8
269    }
270
271    pub fn modreg_size(&self) -> u8 {
272        ((self.operand_sizes >> 2) & 3) as u8
273    }
274
275    pub fn vexreg_size(&self) -> u8 {
276        ((self.operand_sizes >> 4) & 3) as u8
277    }
278
279    pub fn imm_size(&self) -> u8 {
280        ((self.operand_sizes >> 6) & 3) as u8
281    }
282
283    pub fn legacy(&self) -> bool {
284        ((self.operand_sizes >> 8) & 1) != 0
285    }
286
287    pub fn size_fix1(&self) -> u8 {
288        ((self.operand_sizes >> 10) & 7) as u8
289    }
290
291    pub fn size_fix2(&self) -> u8 {
292        ((self.operand_sizes >> 13) & 3) as u8
293    }
294
295    pub fn instr_width(&self) -> bool {
296        ((self.operand_sizes >> 15) & 1) != 0
297    }
298
299    pub fn modrm(&self) -> bool {
300        ((self.reg_types >> 14) & 1) != 0
301    }
302
303    pub fn ign66(&self) -> bool {
304        ((self.reg_types >> 15) & 1) != 0
305    }
306
307    pub fn evex_sae(&self) -> bool {
308        ((self.reg_types >> 8) & 1) != 0
309    }
310
311    pub fn evex_er(&self) -> bool {
312        ((self.reg_types >> 9) & 1) != 0
313    }
314
315    pub fn evex_bcst16(&self) -> bool {
316        ((self.reg_types >> 10) & 1) != 0
317    }
318
319    pub fn regty_modrm(&self) -> u8 {
320        ((self.reg_types >> 0) & 7) as u8
321    }
322
323    pub fn regty_modreg(&self) -> u8 {
324        ((self.reg_types >> 3) & 7) as u8
325    }
326
327    pub fn regty_vexreg(&self) -> u8 {
328        ((self.reg_types >> 6) & 3) as u8
329    }
330}
331#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
332pub enum DecodeMode {
333    Decode64,
334    Decode32,
335}
336
337impl DecodeMode {
338    fn is_64(&self) -> bool {
339        matches!(self, Self::Decode64)
340    }
341}
342
343fn table_lookup(cur_idx: u16, entry_idx: u8) -> u16 {
344    DECODE_TABLE[cur_idx as usize + entry_idx as usize]
345}
346
347fn table_walk(table_entry: u16, entry_idx: u8) -> u16 {
348    table_lookup(table_entry & !0x3, entry_idx)
349}
350
351fn load_le_1(buf: &[u8]) -> u64 {
352    buf[0] as u64
353}
354
355fn load_le_2(buf: &[u8]) -> u64 {
356    load_le_1(buf) | (load_le_1(&buf[1..]) << 8)
357}
358
359fn load_le_3(buf: &[u8]) -> u64 {
360    load_le_2(buf) | (load_le_1(&buf[2..]) << 16)
361}
362
363fn load_le_4(buf: &[u8]) -> u64 {
364    load_le_2(buf) | (load_le_2(&buf[2..]) << 16)
365}
366
367fn load_le_8(buf: &[u8]) -> u64 {
368    load_le_4(buf) | (load_le_4(&buf[4..]) << 32)
369}
370
371const PREFIX_REXB: u8 = 0x01;
372const PREFIX_REXX: u8 = 0x02;
373const PREFIX_REXR: u8 = 0x04;
374const PREFIX_REXW: u8 = 0x08;
375const PREFIX_REX: u8 = 0x40;
376const PREFIX_REXRR: u8 = 0x10;
377const PREFIX_VEX: u8 = 0x20;
378
379#[derive(Default, Copy, Clone, Debug)]
380pub struct Op {
381    pub typ: OperandType,
382    pub size: u8,
383    pub reg: u8,
384    pub misc: u8,
385}
386
387#[derive(Default)]
388pub struct Instruction {
389    pub typ: Opcode,
390    pub flags: u8,
391    pub segment: u8,
392    pub addrsz: u8,
393    pub operandsz: u8,
394    pub size: u8,
395    pub evex: u8,
396    pub operands: [Op; 4],
397    pub disp: i64,
398    pub imm: i64,
399    pub address: u64,
400}
401
402impl Instruction {
403    pub fn is_valid(&self) -> bool {
404        self.typ != Opcode::INVALID
405    }
406
407    pub fn code(&self) -> Opcode {
408        self.typ
409    }
410
411    pub fn address(&self) -> u64 {
412        self.address
413    }
414
415    pub fn size(&self) -> usize {
416        self.size as _
417    }
418
419    pub fn segment(&self) -> Option<Reg> {
420        Reg::try_from(self.segment & 0x3f)
421            .ok()
422            .filter(|reg| *reg != Reg::None)
423    }
424
425    pub fn addrsize(&self) -> usize {
426        1 << self.addrsz
427    }
428
429    pub fn addrsize_log(&self) -> usize {
430        self.addrsz as _
431    }
432
433    pub fn opsize(&self) -> usize {
434        1 << self.operandsz
435    }
436
437    pub fn opsize_log(&self) -> usize {
438        self.operandsz as _
439    }
440
441    pub fn has_rep(&self) -> bool {
442        self.flags & 4 != 0
443    }
444
445    pub fn has_repnz(&self) -> bool {
446        self.flags & 2 != 0
447    }
448
449    pub fn has_lock(&self) -> bool {
450        self.flags & 1 != 0
451    }
452
453    pub fn is_64(&self) -> bool {
454        self.flags & 128 != 0
455    }
456
457    pub fn op_type(&self, idx: usize) -> OperandType {
458        self.operands[idx].typ
459    }
460
461    pub fn op_size(&self, idx: usize) -> usize {
462        1 << self.operands[idx].size >> 1
463    }
464
465    pub fn op_size_log(&self, idx: usize) -> usize {
466        let sz = self.operands[idx].size;
467        if sz == 0 {
468            return 0;
469        }
470        (self.operands[idx].size - 1) as usize
471    }
472
473    pub fn op_reg(&self, idx: usize) -> Option<Reg> {
474        Reg::try_from(self.operands[idx].reg).ok()
475    }
476
477    pub fn op_reg_type(&self, idx: usize) -> Option<RegType> {
478        RegType::try_from(self.operands[idx].misc).ok()
479    }
480
481    pub fn op_reg_high(&self, idx: usize) -> bool {
482        self.op_reg_type(idx) == Some(RegType::Gph)
483    }
484
485    pub fn op_base(&self, idx: usize) -> Option<Reg> {
486        self.op_reg(idx).filter(|reg| *reg != Reg::None)
487    }
488
489    pub fn op_index(&self, idx: usize) -> Option<Reg> {
490        Reg::try_from(self.operands[idx].misc & 0x3f)
491            .ok()
492            .filter(|reg| *reg != Reg::None)
493    }
494
495    pub fn op_scale(&self, idx: usize) -> u8 {
496        self.operands[idx].misc >> 6
497    }
498
499    pub fn op_disp(&self, idx: usize) -> i64 {
500        let _ = idx;
501        self.disp
502    }
503
504    pub fn op_bcstsz(&self, idx: usize) -> usize {
505        1 << self.op_bcstsz_log(idx)
506    }
507
508    pub fn op_bcstsz_log(&self, idx: usize) -> usize {
509        let _ = idx;
510        self.segment as usize >> 6
511    }
512
513    pub fn op_imm(&self, idx: usize) -> i64 {
514        let _ = idx;
515        self.imm
516    }
517
518    pub fn maskreg(&self) -> Option<Reg> {
519        if self.evex & 0x07 == 0 {
520            return None;
521        }
522        Reg::try_from(self.evex & 0x07).ok()
523    }
524
525    pub fn maskzero(&self) -> bool {
526        self.evex & 0x80 != 0
527    }
528
529    pub fn round_control(&self) -> Option<RoundControl> {
530        RoundControl::try_from((self.evex & 0x70) >> 4).ok()
531    }
532}
533
534#[derive(Copy, Clone, PartialEq, Eq, Debug)]
535pub enum DecoderError {
536    None,
537    InvalidInstruction,
538    Internal,
539    NoMoreBytes,
540}
541
542impl<'a> Decoder<'a> {
543    pub fn new(bitness: u32, bytes: &'a [u8], ip: u64) -> Self {
544        Self {
545            mode: if bitness < 64 {
546                DecodeMode::Decode32
547            } else {
548                DecodeMode::Decode64
549            },
550            buf: bytes,
551            cursor: 0,
552            address: ip,
553            table_root_idx: if bitness == 64 {
554                DECODE_TABLE_OFFSET_64 as u16
555            } else {
556                DECODE_TABLE_OFFSET_32 as u16
557            },
558            error: DecoderError::None,
559        }
560    }
561
562    pub fn decode_out(&mut self, inst: &mut Instruction) {
563        inst.typ = Opcode::INVALID;
564
565        let mut vex_operand = 0u8;
566        let mut addr_size = if self.mode.is_64() { 3 } else { 2 };
567
568        let mut prefix_rex = 0u8;
569        let mut vexl = 0u8;
570        let mut prefix_rep = 0u8;
571        let mut prefix_evex = 0u32;
572
573        inst.segment = Reg::None as _;
574        let start = self.cursor;
575        #[allow(dead_code)]
576        const PF_SEG1: usize = 0xfff8 - 0xfff8;
577        const PF_SEG2: usize = 0xfff9 - 0xfff8;
578        const PF_66: usize = 0xfffa - 0xfff8;
579        const PF_67: usize = 0xfffb - 0xfff8;
580        const PF_LOCK: usize = 0xfffc - 0xfff8;
581        const PF_REP: usize = 0xfffd - 0xfff8;
582        const PF_REX: usize = 0xfffe - 0xfff8;
583
584        let mut prefixes = [0u8; 8];
585        let mut table_entry;
586
587        loop {
588            if self.cursor >= self.buf.len() {
589                return self.partial();
590            }
591
592            let prefix = self.peek();
593
594            table_entry = table_lookup(self.table_root_idx, prefix);
595            if table_entry.wrapping_sub(0xfff8) >= 8 {
596                break;
597            }
598
599            prefixes[PF_REX] = 0;
600            prefixes[(table_entry.wrapping_sub(0xfff8)) as usize] = prefix;
601            self.read_u8();
602        }
603
604        if self.cursor > start {
605            if prefixes[PF_SEG2] != 0 {
606                if prefixes[PF_SEG2] & 0x02 != 0 {
607                    inst.segment = prefixes[PF_SEG2] >> 3 & 3;
608                } else {
609                    inst.segment = prefixes[PF_SEG2] & 7;
610                }
611            }
612
613            if prefixes[PF_67] != 0 {
614                addr_size -= 1;
615            }
616
617            prefix_rex = prefixes[PF_REX];
618            prefix_rep = prefixes[PF_REP];
619        }
620        // table_entry kinds: INSTR(0), T16(1), ESCAPE_A(2), ESCAPE_B(3)
621
622        'direct: loop {
623            if table_entry & 2 == 0 {
624                self.read_u8();
625                if table_entry & 1 != 0 {
626                    // Then, walk through ModR/M-encoded opcode extensions
627                    if self.cursor >= self.buf.len() {
628                        return self.partial();
629                    }
630
631                    let isreg = self.peek() >= 0xc0;
632                    table_entry = table_walk(table_entry, ((self.peek() >> 2) & 0xe) | isreg as u8);
633                    // table_entry kinds: INSTR(0), T8E(1)
634                    if table_entry & 1 != 0 {
635                        table_entry = table_walk(table_entry, self.peek() & 7);
636                    }
637                }
638                // table_entry kinds: INSTR(0)
639                break 'direct;
640            }
641
642            if self.cursor >= self.buf.len() {
643                return self.partial();
644            }
645
646            let mut opcode_escape = 0;
647            let mut mandatory_prefix = 0u8;
648
649            if self.peek() == 0x0f {
650                if self.cursor + 1 >= self.buf.len() {
651                    return self.partial();
652                }
653
654                if self.peek1() == 0x38 {
655                    opcode_escape = 2;
656                } else if self.peek1() == 0x3a {
657                    opcode_escape = 3;
658                } else {
659                    opcode_escape = 1;
660                }
661
662                self.cursor += if opcode_escape >= 2 { 2 } else { 1 };
663
664                // If there is no REP/REPNZ prefix offer 66h as mandatory prefix. If
665                // there is a REP prefix, then the 66h prefix is ignored here.
666                mandatory_prefix = if prefix_rep != 0 {
667                    prefix_rep ^ 0xf1
668                } else {
669                    (prefixes[PF_66] != 0) as u8
670                };
671            } else if self.peek().wrapping_sub(0xc4) < 2 || self.peek() == 0x62 {
672                let vex_prefix = self.peek();
673
674                if self.cursor + 1 >= self.buf.len() {
675                    return self.partial();
676                }
677
678                if !self.mode.is_64() && self.peek1() < 0xc0 {
679                    self.read_u8();
680                    table_entry = table_walk(table_entry, 0);
681                    // table_entry kinds: INSTR(0)
682                    break 'direct;
683                }
684
685                if prefixes[PF_66] != 0 || prefixes[PF_REP] != 0 || prefix_rex != 0 {
686                    return self.invalid();
687                }
688
689                let mut byte = self.peek1();
690                if vex_prefix == 0xc5 {
691                    opcode_escape = 1;
692                    prefix_rex = if byte & 0x80 != 0 { 0 } else { PREFIX_REXR }
693                } else
694                // 3 byte vex or evex
695                {
696                    // SDM Vol 2A 2-15 (Dec. 2016): Ignored in 32-bit mode
697                    if self.mode.is_64() {
698                        prefix_rex = byte >> 5 ^ 0x7;
699                    }
700
701                    if vex_prefix == 0x62 {
702                        // Bit 3 of opcode_escape must be clear.
703                        if byte & 0x08 != 0 {
704                            return self.invalid();
705                        }
706
707                        if self.mode.is_64() {
708                            prefix_rex |= (byte & PREFIX_REXRR) ^ PREFIX_REXRR;
709                        }
710                    } else
711                    // 3 byte VEX
712                    {
713                        // bits 4:3 of opcode_escape must be clear
714                        if byte & 0x18 != 0 {
715                            return self.invalid();
716                        }
717                    }
718
719                    opcode_escape = byte & 0x07;
720
721                    if opcode_escape == 0 {
722                        let prefix_len = if vex_prefix == 0x62 { 4 } else { 3 };
723
724                        if self.cursor + prefix_len > self.buf.len() {
725                            return self.partial();
726                        } else {
727                            return self.invalid();
728                        }
729                    }
730
731                    // load third byte of VEX prefix
732                    if self.cursor + 2 >= self.buf.len() {
733                        return self.partial();
734                    }
735
736                    byte = self.peek2();
737                    prefix_rex |= if byte & 0x80 != 0 { PREFIX_REXW } else { 0 };
738                }
739
740                mandatory_prefix = byte & 3;
741                vex_operand = ((byte & 0x78) >> 3) ^ 0xf;
742                prefix_rex |= PREFIX_VEX;
743
744                if vex_prefix == 0x62
745                // EVEX
746                {
747                    // Bit 10 must be 1.
748                    if byte & 0x04 == 0 {
749                        return self.invalid();
750                    }
751
752                    if self.cursor + 3 >= self.buf.len() {
753                        return self.partial();
754                    }
755
756                    byte = self.peek3();
757
758                    vexl = (byte >> 5) & 3;
759                    prefix_evex = byte as u32 | 0x100;
760                    if self.mode.is_64() {
761                        vex_operand |= if byte & 0x08 != 0 { 0 } else { 0x10 };
762                    } else if byte & 0x08 == 0 {
763                        return self.invalid();
764                    }
765
766                    self.cursor += 4;
767                } else {
768                    vexl = if byte & 0x04 != 0 { 1 } else { 0 };
769                    self.cursor += 0xc7 - vex_prefix as usize; // 3 for c4, 2 for c5
770                }
771            }
772
773            table_entry = table_walk(table_entry, opcode_escape);
774
775            if table_entry == 0 {
776                return self.invalid();
777            }
778
779            if self.cursor >= self.buf.len() {
780                return self.partial();
781            }
782
783            table_entry = table_walk(table_entry, self.read_u8());
784
785            // Handle mandatory prefixes (which behave like an opcode ext.).
786            if table_entry & 3 == 3 {
787                table_entry = table_walk(table_entry, mandatory_prefix);
788            }
789
790            if table_entry & 1 != 0 {
791                if self.cursor >= self.buf.len() {
792                    return self.partial();
793                }
794
795                let isreg = self.peek() >= 0xc0;
796
797                table_entry = table_walk(table_entry, ((self.peek() >> 2) & 0xe) | isreg as u8);
798
799                if table_entry & 1 != 0 {
800                    table_entry = table_walk(table_entry, self.peek() & 7);
801                }
802            }
803
804            // For VEX prefix, we have to distinguish between VEX.W and VEX.L which may
805            // be part of the opcode.
806            if table_entry & 2 != 0 {
807                let mut index = 0;
808                index |= if prefix_rex & PREFIX_REXW != 0 {
809                    1 << 0
810                } else {
811                    0
812                };
813                index |= vexl << 1;
814                table_entry = table_walk(table_entry, index);
815            }
816
817            break 'direct;
818        }
819
820        if table_entry == 0 {
821            return self.invalid();
822        }
823
824        let desc = &TABLE_DESCS[table_entry as usize >> 2];
825
826        inst.typ = desc.typ;
827        inst.addrsz = addr_size;
828        inst.flags = ((prefix_rep + 1) & 6) + if self.mode.is_64() { 128 } else { 0 };
829        inst.address = self.address + start as u64;
830
831        inst.operands = [Op::default(); 4];
832
833        if desc.modrm() && self.cursor >= self.buf.len() {
834            return self.partial();
835        }
836
837        if desc.modrm() {
838            self.read_u8();
839        }
840
841        let op_byte = self.buf[self.cursor - 1] | (if desc.modrm() { 0 } else { 0xc0 });
842
843        if prefix_evex != 0 {
844            if desc.vsib() && (prefix_evex & 0x07 == 0 || prefix_evex & 0x80 != 0) {
845                return self.invalid();
846            }
847
848            if !desc.evex_mask() && prefix_evex & 0x87 != 0 {
849                return self.invalid();
850            }
851
852            if prefix_evex & 0x87 == 0x80 {
853                return self.invalid();
854            }
855
856            if prefix_evex & 0x10 != 0 && op_byte & 0xc0 == 0xc0 {
857                if !desc.evex_sae() {
858                    return self.invalid();
859                }
860
861                vexl = 2;
862                if desc.evex_er() {
863                    inst.evex = prefix_evex as _;
864                } else {
865                    inst.evex = (prefix_evex & 0x87) as u8 | 0x60;
866                }
867            } else {
868                if vexl == 3 {
869                    return self.invalid();
870                }
871
872                inst.evex = (prefix_evex & 0x87) as u8;
873            }
874
875            if desc.vsib() {
876                vex_operand &= 0xf;
877            }
878        } else {
879            inst.evex = 0;
880        }
881
882        let op_size: u8;
883        let mut op_size_alt = 0;
884
885        if desc.opsize() & 4 == 0 {
886            if self.mode.is_64() {
887                op_size = if (prefix_rex & PREFIX_REXW != 0) || desc.opsize() == 3 {
888                    4
889                } else if prefixes[PF_66] != 0 && !desc.ign66() {
890                    2
891                } else if desc.opsize() != 0 {
892                    4
893                } else {
894                    3
895                };
896            } else {
897                op_size = if prefixes[PF_66] != 0 && !desc.ign66() {
898                    2
899                } else {
900                    3
901                };
902            }
903        } else {
904            op_size = 5 + vexl as u8;
905            op_size_alt = op_size - (desc.opsize() as u8 & 3);
906        }
907
908        let operand_sizes = [desc.size_fix1(), desc.size_fix2() + 2, op_size, op_size_alt];
909        'skip_modrm: loop {
910            if matches!(inst.typ, Opcode::MOV_CR | Opcode::MOV_DR) {
911                let modreg = (op_byte >> 3) & 0x7;
912                let modrm = op_byte & 0x7;
913
914                let op_modreg = &mut inst.operands[desc.modreg_idx() as usize];
915                op_modreg.typ = OperandType::Reg;
916                op_modreg.size = op_size;
917                op_modreg.reg = modreg | if prefix_rex & PREFIX_REXR != 0 { 8 } else { 0 };
918                op_modreg.misc = if matches!(inst.typ, Opcode::MOV_CR) {
919                    RegType::Cr as u8
920                } else {
921                    RegType::Dr as u8
922                };
923
924                if matches!(inst.typ, Opcode::MOV_CR) && (!0x011d >> op_modreg.reg) & 1 != 0 {
925                    return self.invalid();
926                } else if matches!(inst.typ, Opcode::MOV_DR) && prefix_rex & PREFIX_REXR != 0 {
927                    return self.invalid();
928                }
929
930                let op_modrm = &mut inst.operands[desc.modrm_idx() as usize];
931                op_modrm.typ = OperandType::Reg;
932                op_modrm.size = op_size;
933                op_modrm.reg = modrm | if prefix_rex & PREFIX_REXB != 0 { 8 } else { 0 };
934                break 'skip_modrm;
935            }
936
937            if desc.has_modreg() {
938                let op_modreg = &mut inst.operands[desc.modreg_idx() as usize];
939                let mut reg_idx = (op_byte & 0x38) as usize >> 3;
940                let reg_ty = desc.regty_modreg();
941
942                op_modreg.misc = reg_ty;
943
944                if reg_ty < 2 {
945                    reg_idx += if prefix_rex & PREFIX_REXR != 0 { 8 } else { 0 };
946                } else if reg_idx == 7 && (prefix_rex & PREFIX_REXR != 0 || prefix_evex & 0x80 != 0)
947                {
948                    return self.invalid();
949                }
950                if reg_ty == RegType::Vec as u8 {
951                    reg_idx += if prefix_rex & PREFIX_REXRR != 0 {
952                        16
953                    } else {
954                        0
955                    };
956                } else if prefix_rex & PREFIX_REXRR != 0 {
957                    return self.invalid();
958                }
959
960                op_modreg.typ = OperandType::Reg;
961                op_modreg.size = operand_sizes[desc.modreg_size() as usize];
962                op_modreg.reg = reg_idx as _;
963            }
964
965            if desc.has_modrm() {
966                let op_modrm = &mut inst.operands[desc.modrm_idx() as usize];
967                op_modrm.size = operand_sizes[desc.modrm_size() as usize];
968
969                let rm = op_byte & 0x07;
970                'end_modrm: loop {
971                    if op_byte >= 0xc0 {
972                        let mut reg_idx = rm;
973                        let reg_ty = desc.regty_modrm();
974                        op_modrm.misc = reg_ty;
975                        if reg_ty < 2 {
976                            reg_idx += if prefix_rex & PREFIX_REXB != 0 { 8 } else { 0 };
977                        }
978
979                        if prefix_evex != 0 && reg_ty == 0 {
980                            reg_idx += if prefix_rex & PREFIX_REXX != 0 { 16 } else { 0 };
981                        }
982
983                        op_modrm.typ = OperandType::Reg;
984                        op_modrm.reg = reg_idx;
985                    } else {
986                        let mut dispscale = 0;
987                        if prefix_evex != 0 {
988                            if prefix_evex & 0x80 != 0 && desc.modrm_idx() == 0 {
989                                return self.invalid();
990                            }
991
992                            if prefix_evex & 0x10 != 0 {
993                                if !desc.evex_bcst() {
994                                    return self.invalid();
995                                }
996
997                                if desc.evex_bcst16() {
998                                    dispscale = 1;
999                                } else {
1000                                    dispscale = if prefix_rex & PREFIX_REXW != 0 { 3 } else { 2 };
1001                                }
1002
1003                                inst.segment |= dispscale << 6;
1004                                op_modrm.typ = OperandType::MemBCST;
1005                            } else {
1006                                dispscale = op_modrm.size - 1;
1007                                op_modrm.typ = OperandType::Mem;
1008                            }
1009                        } else {
1010                            op_modrm.typ = OperandType::Mem;
1011                        }
1012
1013                        if addr_size == 1 {
1014                            assert!(!self.mode.is_64());
1015                            /*
1016                            if desc.vsib() {
1017                                return self.invalid();
1018                            }
1019
1020                            if rm < 6 {
1021                                op_modrm.misc = if rm & 1 != 0 {
1022                                    Reg::DI as u8
1023                                } else {
1024                                    Reg::SI as u8
1025                                };
1026
1027                                if rm < 4 {
1028                                    op_modrm.reg = if rm & 2 != 0 {
1029                                        Reg::BP as u8
1030                                    } else {
1031                                        Reg::BX as u8
1032                                    };
1033                                } else if rm < 6 || op_byte & 0xc7 == 0x06 {
1034                                    op_modrm.reg = Reg::None as u8;
1035                                } else {
1036                                    op_modrm.reg = if rm == 6 {
1037                                        Reg::BP as u8
1038                                    } else {
1039                                        Reg::BX as u8
1040                                    };
1041                                }
1042
1043                                let dispbase = &self.buf[self.cursor..];
1044
1045                                if op_byte & 0x40 != 0 {
1046                                    if self.cursor + 1 >= self.buf.len() {
1047                                        return self.partial();
1048                                    }
1049                                    self.read_u8();
1050
1051                                    inst.disp = ((load_le_1(dispbase) as i8) << dispscale) as i64;
1052                                } else if op_byte & 0x80 != 0 || op_byte & 0xc7 == 0x06 {
1053                                    if self.cursor + 2 >= self.buf.len() {
1054                                        return self.partial();
1055                                    }
1056
1057                                    self.cursor += 2;
1058                                    inst.disp = load_le_2(dispbase) as i16 as i64;
1059                                } else {
1060                                    inst.disp = 0;
1061                                }*/
1062                            break 'end_modrm;
1063                        }
1064
1065                        // SIB byte
1066                        let mut base = rm;
1067                        if rm == 4 {
1068                            if self.cursor >= self.buf.len() {
1069                                return self.partial();
1070                            }
1071
1072                            let sib = self.read_u8();
1073                            let scale = sib & 0xc0;
1074                            let mut idx = (sib & 0x38) >> 3;
1075                            idx += if prefix_rex & PREFIX_REXX != 0 { 8 } else { 0 };
1076                            base = sib & 0x07;
1077                            if idx == 4 {
1078                                idx = Reg::None as u8;
1079                            }
1080                            op_modrm.misc = scale | idx;
1081                        } else {
1082                            op_modrm.misc = Reg::None as u8;
1083                        }
1084
1085                        if desc.vsib() {
1086                            if rm != 4 {
1087                                return self.invalid();
1088                            }
1089
1090                            if op_modrm.misc & 0x3f == Reg::None as u8 {
1091                                op_modrm.misc &= 0xc4;
1092                            }
1093
1094                            if prefix_evex != 0 {
1095                                op_modrm.misc |= if prefix_evex & 0x8 != 0 { 0 } else { 0x10 };
1096                            }
1097                        }
1098
1099                        if op_byte < 0x40 && rm == 5 && self.mode.is_64() {
1100                            op_modrm.reg = Reg::IP as u8;
1101                        } else if op_byte < 0x40 && base == 5 {
1102                            op_modrm.reg = Reg::None as u8;
1103                        } else {
1104                            op_modrm.reg = base + if prefix_rex & PREFIX_REXB != 0 { 8 } else { 0 };
1105                        }
1106
1107                        let dispbase = &self.buf[self.cursor..];
1108
1109                        if op_byte & 0x40 != 0 {
1110                            if self.cursor + 1 > self.buf.len() {
1111                                return self.partial();
1112                            }
1113
1114                            self.read_u8();
1115                            inst.disp = ((load_le_1(dispbase) as i8 as i64) << dispscale) as i64;
1116                        } else if op_byte & 0x80 != 0 || (op_byte < 0x40 && base == 5) {
1117                            if self.cursor + 4 > self.buf.len() {
1118                                return self.partial();
1119                            }
1120                            self.cursor += 4;
1121                            inst.disp = load_le_4(dispbase) as i32 as i64;
1122                        } else {
1123                            inst.disp = 0;
1124                        }
1125
1126                        break;
1127                    }
1128
1129                    break;
1130                }
1131            }
1132
1133            if desc.has_vexreg() {
1134                let operand = &mut inst.operands[desc.vexreg_idx() as usize];
1135
1136                if desc.zeroreg_val() {
1137                    operand.typ = OperandType::Reg;
1138                    operand.size = 1;
1139                    operand.reg = Reg::CL as u8;
1140                    operand.misc = RegType::Gpl as u8;
1141                } else {
1142                    operand.typ = OperandType::Reg;
1143                    operand.size = operand_sizes[desc.vexreg_size() as usize];
1144                    if !self.mode.is_64() {
1145                        vex_operand &= 0x7;
1146                    }
1147
1148                    operand.reg = vex_operand as _;
1149
1150                    let reg_ty = desc.regty_vexreg();
1151
1152                    if prefix_rex & PREFIX_VEX != 0 {
1153                        if reg_ty == 2 && vex_operand >= 8 {
1154                            return self.invalid();
1155                        }
1156
1157                        if reg_ty == 3 {
1158                            operand.reg &= 0x7;
1159                        }
1160
1161                        operand.misc = ((3528u32 >> (3 * reg_ty as u32)) & 0x7) as u8;
1162                    } else {
1163                        operand.misc = ((2504u32 >> (3 * reg_ty as u32)) & 0x7) as u8
1164                    }
1165                }
1166            } else if vex_operand != 0 {
1167                return self.invalid();
1168            }
1169
1170            let imm_control = desc.imm_control();
1171
1172            if imm_control == 0 {
1173            } else if imm_control == 1 {
1174                let operand = &mut inst.operands[desc.imm_idx() as usize];
1175                operand.typ = OperandType::Imm;
1176                operand.size = 1;
1177                inst.imm = 1;
1178            } else if imm_control == 2 {
1179                // 2 = memory, address-sized, used for mov with moffs operand
1180                let operand = &mut inst.operands[desc.imm_idx() as usize];
1181                operand.typ = OperandType::Mem;
1182
1183                operand.size = operand_sizes[desc.imm_size() as usize];
1184                operand.reg = Reg::None as u8;
1185                operand.misc = Reg::None as u8;
1186
1187                let moffsz = 1usize << addr_size;
1188                if self.cursor + moffsz > self.buf.len() {
1189                    return self.partial();
1190                }
1191
1192                if moffsz == 2 {
1193                    inst.disp = load_le_2(&self.buf[self.cursor..]) as _;
1194                }
1195                if moffsz == 4 {
1196                    inst.disp = load_le_4(&self.buf[self.cursor..]) as _;
1197                }
1198
1199                if moffsz == 8 {
1200                    inst.disp = load_le_8(&self.buf[self.cursor..]) as _;
1201                }
1202
1203                self.cursor += moffsz;
1204            } else if imm_control == 3 {
1205                let operand = &mut inst.operands[desc.imm_idx() as usize];
1206                operand.typ = OperandType::Reg;
1207                operand.size = op_size;
1208
1209                operand.misc = RegType::Vec as u8;
1210
1211                if self.cursor + 1 > self.buf.len() {
1212                    return self.partial();
1213                }
1214
1215                let mut reg = load_le_1(&self.buf[self.cursor..]) as u8;
1216                self.cursor += 1;
1217
1218                if !self.mode.is_64() {
1219                    reg &= 0x7f;
1220                }
1221
1222                operand.reg = reg >> 4;
1223                inst.imm = (reg & 0x0f) as i64;
1224            } else if imm_control != 0 {
1225                // 4/5 = immediate, operand-sized/8 bit
1226                // 6/7 = offset, operand-sized/8 bit (used for jumps/calls)
1227                let imm_byte = imm_control & 1;
1228                let imm_offset = imm_control & 2;
1229
1230                let operand = &mut inst.operands[desc.imm_idx() as usize];
1231                operand.typ = OperandType::Imm;
1232
1233                if imm_byte != 0 {
1234                    if self.cursor + 1 > self.buf.len() {
1235                        return self.partial();
1236                    }
1237
1238                    inst.imm = load_le_1(&self.buf[self.cursor..]) as i8 as i64;
1239                    self.cursor += 1;
1240                    operand.size = if desc.imm_size() & 1 != 0 { 1 } else { op_size };
1241                } else {
1242                    operand.size = operand_sizes[desc.imm_size() as usize];
1243
1244                    let imm_size = if matches!(
1245                        inst.typ,
1246                        Opcode::RET | Opcode::RETF | Opcode::SSE_EXTRQ | Opcode::SSE_INSERTQ
1247                    ) {
1248                        2
1249                    } else if matches!(inst.typ, Opcode::JMPF | Opcode::CALLF) {
1250                        (1 << op_size >> 1) + 2
1251                    } else if matches!(inst.typ, Opcode::ENTER) {
1252                        3
1253                    } else {
1254                        if op_size == 2 {
1255                            2
1256                        } else {
1257                            4
1258                        }
1259                    };
1260
1261                    if self.cursor + imm_size > self.buf.len() {
1262                        return self.partial();
1263                    }
1264
1265                    if imm_size == 2 {
1266                        inst.imm = load_le_2(&self.buf[self.cursor..]) as i16 as i64;
1267                    } else if imm_size == 3 {
1268                        inst.imm = load_le_3(&self.buf[self.cursor..]) as i32 as i64;
1269                    } else if imm_size == 4 {
1270                        inst.imm = load_le_4(&self.buf[self.cursor..]) as i32 as i64;
1271                    } else if imm_size == 8 {
1272                        inst.imm = load_le_8(&self.buf[self.cursor..]) as i64;
1273                    }
1274
1275                    self.cursor += imm_size;
1276                }
1277
1278                if imm_offset != 0 {
1279                    if inst.address != 0 {
1280                        inst.imm += inst.address as i64 + (self.cursor - start) as i64;
1281                    } else {
1282                        operand.typ = OperandType::Off;
1283                    }
1284                }
1285            }
1286            break 'skip_modrm;
1287        }
1288
1289        if prefixes[PF_LOCK] != 0 {
1290            if !desc.lock() || inst.operands[0].typ != OperandType::Mem {
1291                return self.invalid();
1292            }
1293
1294            inst.flags |= 1;
1295        }
1296
1297        if desc.legacy() {
1298            if prefix_rex & PREFIX_REX == 0 {
1299                for i in 0..2 {
1300                    let operand = &mut inst.operands[i];
1301                    if operand.typ == OperandType::None {
1302                        break;
1303                    }
1304
1305                    if operand.typ == OperandType::Reg
1306                        && operand.misc == RegType::Gpl as u8
1307                        && operand.size == 1
1308                        && operand.reg >= 4
1309                    {
1310                        operand.misc = RegType::Gph as u8;
1311                    }
1312                }
1313            }
1314
1315            if inst.typ == Opcode::XCHG_NOP {
1316                if inst.operands[0].reg == 0 && inst.operands[1].reg == 0 {
1317                    inst.operands[0].typ = OperandType::None;
1318                    inst.operands[1].typ = OperandType::None;
1319                    inst.typ = Opcode::NOP;
1320                } else {
1321                    inst.typ = Opcode::XCHG;
1322                }
1323            }
1324
1325            if inst.typ == Opcode::_3DNOW {
1326                let opc3dn = inst.imm;
1327
1328                if opc3dn & 0x40 != 0 {
1329                    return self.invalid();
1330                }
1331
1332                let msk = if opc3dn & 0x80 != 0 {
1333                    0x88d144d144d14400u64 as i64
1334                } else {
1335                    0x30003000i64
1336                };
1337
1338                if (msk >> (opc3dn & 0x3f) & 1) == 0 {
1339                    return self.invalid();
1340                }
1341
1342                inst.operandsz = if desc.instr_width() { op_size - 1 } else { 0 };
1343            } else {
1344                inst.operandsz = 0;
1345            }
1346        }
1347        inst.typ = desc.typ;
1348        inst.size = (self.cursor - start as usize) as u8;
1349    }
1350
1351    pub fn decode(&mut self) -> Instruction {
1352        let mut inst = Instruction::default();
1353        self.decode_out(&mut inst);
1354        inst
1355    }
1356
1357    fn partial(&mut self) {
1358        self.error = DecoderError::NoMoreBytes;
1359    }
1360
1361    fn invalid(&mut self) {
1362        self.error = DecoderError::InvalidInstruction;
1363    }
1364
1365    pub fn can_decode(&self) -> bool {
1366        self.cursor < self.buf.len()
1367    }
1368
1369    fn peek(&self) -> u8 {
1370        self.buf[self.cursor]
1371    }
1372
1373    fn peek1(&self) -> u8 {
1374        self.buf[self.cursor + 1]
1375    }
1376
1377    fn peek2(&self) -> u8 {
1378        self.buf[self.cursor + 2]
1379    }
1380
1381    fn peek3(&self) -> u8 {
1382        self.buf[self.cursor + 3]
1383    }
1384
1385    fn read_u8(&mut self) -> u8 {
1386        let ret = self.buf[self.cursor];
1387        self.cursor += 1;
1388        return ret;
1389    }
1390}