cranelift_codegen/isa/x64/encoding/
evex.rs

1//! Encodes EVEX instructions. These instructions are those added by the AVX-512 extensions. The
2//! EVEX encoding requires a 4-byte prefix:
3//!
4//! Byte 0:  0x62
5//!         ┌───┬───┬───┬───┬───┬───┬───┬───┐
6//! Byte 1: │ R │ X │ B │ R'│ 0 │ 0 │ m │ m │
7//!         ├───┼───┼───┼───┼───┼───┼───┼───┤
8//! Byte 2: │ W │ v │ v │ v │ v │ 1 │ p │ p │
9//!         ├───┼───┼───┼───┼───┼───┼───┼───┤
10//! Byte 3: │ z │ L'│ L │ b │ V'│ a │ a │ a │
11//!         └───┴───┴───┴───┴───┴───┴───┴───┘
12//!
13//! The prefix is then followed by the opcode byte, the ModR/M byte, and other optional suffixes
14//! (e.g. SIB byte, displacements, immediates) based on the instruction (see section 2.6, Intel
15//! Software Development Manual, volume 2A).
16
17use super::rex::{self, LegacyPrefixes, OpcodeMap};
18use crate::isa::x64::args::{Amode, Avx512TupleType};
19use crate::isa::x64::inst::Inst;
20use crate::MachBuffer;
21use core::ops::RangeInclusive;
22
23/// Constructs an EVEX-encoded instruction using a builder pattern. This approach makes it visually
24/// easier to transform something the manual's syntax, `EVEX.256.66.0F38.W1 1F /r` to code:
25/// `EvexInstruction::new().length(...).prefix(...).map(...).w(true).opcode(0x1F).reg(...).rm(...)`.
26pub struct EvexInstruction {
27    bits: u32,
28    opcode: u8,
29    reg: Register,
30    rm: RegisterOrAmode,
31    tuple_type: Option<Avx512TupleType>,
32    imm: Option<u8>,
33}
34
35/// Because some of the bit flags in the EVEX prefix are reversed and users of `EvexInstruction` may
36/// choose to skip setting fields, here we set some sane defaults. Note that:
37/// - the first byte is always `0x62` but you will notice it at the end of the default `bits` value
38///   implemented--remember the little-endian order
39/// - some bits are always set to certain values: bits 10-11 to 0, bit 18 to 1
40/// - the other bits set correspond to reversed bits: R, X, B, R' (byte 1), vvvv (byte 2), V' (byte
41///   3).
42///
43/// See the `default_emission` test for what these defaults are equivalent to (e.g. using RAX,
44/// unsetting the W bit, etc.)
45impl Default for EvexInstruction {
46    fn default() -> Self {
47        Self {
48            bits: 0x08_7C_F0_62,
49            opcode: 0,
50            reg: Register::default(),
51            rm: RegisterOrAmode::Register(Register::default()),
52            tuple_type: None,
53            imm: None,
54        }
55    }
56}
57
58#[allow(non_upper_case_globals)] // This makes it easier to match the bit range names to the manual's names.
59impl EvexInstruction {
60    /// Construct a default EVEX instruction.
61    pub fn new() -> Self {
62        Self::default()
63    }
64
65    /// Set the length of the instruction . Note that there are sets of instructions (i.e. rounding,
66    /// memory broadcast) that modify the same underlying bits--at some point (TODO) we can add a
67    /// way to set those context bits and verify that both are not used (e.g. rounding AND length).
68    /// For now, this method is very convenient.
69    #[inline(always)]
70    pub fn length(mut self, length: EvexVectorLength) -> Self {
71        self.write(Self::LL, EvexContext::Other { length }.bits() as u32);
72        self
73    }
74
75    /// Set the legacy prefix byte of the instruction: None | 66 | F0 | F2 | F3. EVEX instructions
76    /// pack these into the prefix, not as separate bytes.
77    #[inline(always)]
78    pub fn prefix(mut self, prefix: LegacyPrefixes) -> Self {
79        self.write(Self::pp, prefix.bits() as u32);
80        self
81    }
82
83    /// Set the opcode map byte of the instruction: None | 0F | 0F38 | 0F3A. EVEX instructions pack
84    /// these into the prefix, not as separate bytes.
85    #[inline(always)]
86    pub fn map(mut self, map: OpcodeMap) -> Self {
87        self.write(Self::mm, map.bits() as u32);
88        self
89    }
90
91    /// Set the W bit, typically used to indicate an instruction using 64 bits of an operand (e.g.
92    /// 64 bit lanes). EVEX packs this bit in the EVEX prefix; previous encodings used the REX
93    /// prefix.
94    #[inline(always)]
95    pub fn w(mut self, w: bool) -> Self {
96        self.write(Self::W, w as u32);
97        self
98    }
99
100    /// Set the instruction opcode byte.
101    #[inline(always)]
102    pub fn opcode(mut self, opcode: u8) -> Self {
103        self.opcode = opcode;
104        self
105    }
106
107    /// Set the "tuple type" which is used for 8-bit scaling when a memory
108    /// operand is used.
109    #[inline(always)]
110    pub fn tuple_type(mut self, tt: Avx512TupleType) -> Self {
111        self.tuple_type = Some(tt);
112        self
113    }
114
115    /// Set the register to use for the `reg` bits; many instructions use this as the write operand.
116    /// Setting this affects both the ModRM byte (`reg` section) and the EVEX prefix (the extension
117    /// bits for register encodings > 8).
118    #[inline(always)]
119    pub fn reg(mut self, reg: impl Into<Register>) -> Self {
120        self.reg = reg.into();
121        let r = !(self.reg.0 >> 3) & 1;
122        let r_ = !(self.reg.0 >> 4) & 1;
123        self.write(Self::R, r as u32);
124        self.write(Self::R_, r_ as u32);
125        self
126    }
127
128    /// Set the mask to use. See section 2.6 in the Intel Software Developer's Manual, volume 2A for
129    /// more details.
130    #[allow(dead_code)]
131    #[inline(always)]
132    pub fn mask(mut self, mask: EvexMasking) -> Self {
133        self.write(Self::aaa, mask.aaa_bits() as u32);
134        self.write(Self::z, mask.z_bit() as u32);
135        self
136    }
137
138    /// Set the `vvvvv` register; some instructions allow using this as a second, non-destructive
139    /// source register in 3-operand instructions (e.g. 2 read, 1 write).
140    #[allow(dead_code)]
141    #[inline(always)]
142    pub fn vvvvv(mut self, reg: impl Into<Register>) -> Self {
143        let reg = reg.into();
144        self.write(Self::vvvv, !(reg.0 as u32) & 0b1111);
145        self.write(Self::V_, !(reg.0 as u32 >> 4) & 0b1);
146        self
147    }
148
149    /// Set the register to use for the `rm` bits; many instructions use this
150    /// as the "read from register/memory" operand. Setting this affects both
151    /// the ModRM byte (`rm` section) and the EVEX prefix (the extension bits
152    /// for register encodings > 8).
153    #[inline(always)]
154    pub fn rm(mut self, reg: impl Into<RegisterOrAmode>) -> Self {
155        // NB: See Table 2-31. 32-Register Support in 64-bit Mode Using EVEX
156        // with Embedded REX Bits
157        self.rm = reg.into();
158        let x = match &self.rm {
159            RegisterOrAmode::Register(r) => r.0 >> 4,
160            RegisterOrAmode::Amode(Amode::ImmRegRegShift { index, .. }) => {
161                index.to_real_reg().unwrap().hw_enc() >> 3
162            }
163
164            // These two modes technically don't use the X bit, so leave it at
165            // 0.
166            RegisterOrAmode::Amode(Amode::ImmReg { .. }) => 0,
167            RegisterOrAmode::Amode(Amode::RipRelative { .. }) => 0,
168        };
169        // The X bit is stored in an inverted format, so invert it here.
170        self.write(Self::X, u32::from(!x & 1));
171
172        let b = match &self.rm {
173            RegisterOrAmode::Register(r) => r.0 >> 3,
174            RegisterOrAmode::Amode(Amode::ImmReg { base, .. }) => {
175                base.to_real_reg().unwrap().hw_enc() >> 3
176            }
177            RegisterOrAmode::Amode(Amode::ImmRegRegShift { base, .. }) => {
178                base.to_real_reg().unwrap().hw_enc() >> 3
179            }
180            // The 4th bit of %rip is 0
181            RegisterOrAmode::Amode(Amode::RipRelative { .. }) => 0,
182        };
183        // The B bit is stored in an inverted format, so invert it here.
184        self.write(Self::B, u32::from(!b & 1));
185        self
186    }
187
188    /// Set the imm byte.
189    #[inline(always)]
190    pub fn imm(mut self, imm: u8) -> Self {
191        self.imm = Some(imm);
192        self
193    }
194
195    /// Emit the EVEX-encoded instruction to the code sink:
196    ///
197    /// - the 4-byte EVEX prefix;
198    /// - the opcode byte;
199    /// - the ModR/M byte
200    /// - SIB bytes, if necessary
201    /// - an optional immediate, if necessary (not currently implemented)
202    pub fn encode(&self, sink: &mut MachBuffer<Inst>) {
203        if let RegisterOrAmode::Amode(amode) = &self.rm {
204            if let Some(trap_code) = amode.get_flags().trap_code() {
205                sink.add_trap(trap_code);
206            }
207        }
208        sink.put4(self.bits);
209        sink.put1(self.opcode);
210
211        match &self.rm {
212            RegisterOrAmode::Register(reg) => {
213                let rm: u8 = (*reg).into();
214                sink.put1(rex::encode_modrm(3, self.reg.0 & 7, rm & 7));
215            }
216            RegisterOrAmode::Amode(amode) => {
217                let scaling = self.scaling_for_8bit_disp();
218
219                let bytes_at_end = if self.imm.is_some() { 1 } else { 0 };
220                rex::emit_modrm_sib_disp(sink, self.reg.0 & 7, amode, bytes_at_end, Some(scaling));
221            }
222        }
223        if let Some(imm) = self.imm {
224            sink.put1(imm);
225        }
226    }
227
228    // In order to simplify the encoding of the various bit ranges in the prefix, we specify those
229    // ranges according to the table below (extracted from the Intel Software Development Manual,
230    // volume 2A). Remember that, because we pack the 4-byte prefix into a little-endian `u32`, this
231    // chart should be read from right-to-left, top-to-bottom. Note also that we start ranges at bit
232    // 8, leaving bits 0-7 for the mandatory `0x62`.
233    //         ┌───┬───┬───┬───┬───┬───┬───┬───┐
234    // Byte 1: │ R │ X │ B │ R'│ 0 │ 0 │ m │ m │
235    //         ├───┼───┼───┼───┼───┼───┼───┼───┤
236    // Byte 2: │ W │ v │ v │ v │ v │ 1 │ p │ p │
237    //         ├───┼───┼───┼───┼───┼───┼───┼───┤
238    // Byte 3: │ z │ L'│ L │ b │ V'│ a │ a │ a │
239    //         └───┴───┴───┴───┴───┴───┴───┴───┘
240
241    // Byte 1:
242    const mm: RangeInclusive<u8> = 8..=9;
243    const R_: RangeInclusive<u8> = 12..=12;
244    const B: RangeInclusive<u8> = 13..=13;
245    const X: RangeInclusive<u8> = 14..=14;
246    const R: RangeInclusive<u8> = 15..=15;
247
248    // Byte 2:
249    const pp: RangeInclusive<u8> = 16..=17;
250    const vvvv: RangeInclusive<u8> = 19..=22;
251    const W: RangeInclusive<u8> = 23..=23;
252
253    // Byte 3:
254    const aaa: RangeInclusive<u8> = 24..=26;
255    const V_: RangeInclusive<u8> = 27..=27;
256    const b: RangeInclusive<u8> = 28..=28;
257    const LL: RangeInclusive<u8> = 29..=30;
258    const z: RangeInclusive<u8> = 31..=31;
259
260    // A convenience method for writing the `value` bits to the given range in `self.bits`.
261    #[inline]
262    fn write(&mut self, range: RangeInclusive<u8>, value: u32) {
263        assert!(ExactSizeIterator::len(&range) > 0);
264        let size = range.end() - range.start() + 1; // Calculate the number of bits in the range.
265        let mask: u32 = (1 << size) - 1; // Generate a bit mask.
266        debug_assert!(
267            value <= mask,
268            "The written value should have fewer than {} bits.",
269            size
270        );
271        let mask_complement = !(mask << *range.start()); // Create the bitwise complement for the clear mask.
272        self.bits &= mask_complement; // Clear the bits in `range`; otherwise the OR below may allow previously-set bits to slip through.
273        let value = value << *range.start(); // Place the value in the correct location (assumes `value <= mask`).
274        self.bits |= value; // Modify the bits in `range`.
275    }
276
277    /// A convenience method for reading given range of bits in `self.bits`
278    /// shifted to the LSB of the returned value..
279    #[inline]
280    fn read(&self, range: RangeInclusive<u8>) -> u32 {
281        (self.bits >> range.start()) & ((1 << range.len()) - 1)
282    }
283
284    fn scaling_for_8bit_disp(&self) -> i8 {
285        use Avx512TupleType::*;
286
287        let vector_size_scaling = || match self.read(Self::LL) {
288            0b00 => 16,
289            0b01 => 32,
290            0b10 => 64,
291            _ => unreachable!(),
292        };
293
294        match self.tuple_type {
295            Some(Full) => {
296                if self.read(Self::b) == 1 {
297                    if self.read(Self::W) == 0 {
298                        4
299                    } else {
300                        8
301                    }
302                } else {
303                    vector_size_scaling()
304                }
305            }
306            Some(FullMem) => vector_size_scaling(),
307            Some(Mem128) => 16,
308            None => panic!("tuple type was not set"),
309        }
310    }
311}
312
313/// Describe the register index to use. This wrapper is a type-safe way to pass
314/// around the registers defined in `inst/regs.rs`.
315#[derive(Debug, Copy, Clone, Default)]
316pub struct Register(u8);
317impl From<u8> for Register {
318    fn from(reg: u8) -> Self {
319        debug_assert!(reg < 16);
320        Self(reg)
321    }
322}
323impl Into<u8> for Register {
324    fn into(self) -> u8 {
325        self.0
326    }
327}
328
329#[allow(missing_docs)]
330#[derive(Debug, Clone)]
331pub enum RegisterOrAmode {
332    Register(Register),
333    Amode(Amode),
334}
335
336impl From<u8> for RegisterOrAmode {
337    fn from(reg: u8) -> Self {
338        RegisterOrAmode::Register(reg.into())
339    }
340}
341
342impl From<Amode> for RegisterOrAmode {
343    fn from(amode: Amode) -> Self {
344        RegisterOrAmode::Amode(amode)
345    }
346}
347
348/// Defines the EVEX context for the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte). Table 2-36 in
349/// section 2.6.10 (Intel Software Development Manual, volume 2A) describes how these bits can be
350/// used together for certain classes of instructions; i.e., special care should be taken to ensure
351/// that instructions use an applicable correct `EvexContext`. Table 2-39 contains cases where
352/// opcodes can result in an #UD.
353#[allow(dead_code, missing_docs)] // Rounding and broadcast modes are not yet used.
354pub enum EvexContext {
355    RoundingRegToRegFP {
356        rc: EvexRoundingControl,
357    },
358    NoRoundingFP {
359        sae: bool,
360        length: EvexVectorLength,
361    },
362    MemoryOp {
363        broadcast: bool,
364        length: EvexVectorLength,
365    },
366    Other {
367        length: EvexVectorLength,
368    },
369}
370
371impl Default for EvexContext {
372    fn default() -> Self {
373        Self::Other {
374            length: EvexVectorLength::default(),
375        }
376    }
377}
378
379impl EvexContext {
380    /// Encode the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte) for merging with the P2 byte.
381    pub fn bits(&self) -> u8 {
382        match self {
383            Self::RoundingRegToRegFP { rc } => 0b001 | rc.bits() << 1,
384            Self::NoRoundingFP { sae, length } => (*sae as u8) | length.bits() << 1,
385            Self::MemoryOp { broadcast, length } => (*broadcast as u8) | length.bits() << 1,
386            Self::Other { length } => length.bits() << 1,
387        }
388    }
389}
390
391/// The EVEX format allows choosing a vector length in the `L'` and `L` bits; see `EvexContext`.
392#[allow(dead_code, missing_docs)] // Wider-length vectors are not yet used.
393pub enum EvexVectorLength {
394    V128,
395    V256,
396    V512,
397}
398
399impl EvexVectorLength {
400    /// Encode the `L'` and `L` bits for merging with the P2 byte.
401    fn bits(&self) -> u8 {
402        match self {
403            Self::V128 => 0b00,
404            Self::V256 => 0b01,
405            Self::V512 => 0b10,
406            // 0b11 is reserved (#UD).
407        }
408    }
409}
410
411impl Default for EvexVectorLength {
412    fn default() -> Self {
413        Self::V128
414    }
415}
416
417/// The EVEX format allows defining rounding control in the `L'` and `L` bits; see `EvexContext`.
418#[allow(dead_code, missing_docs)] // Rounding controls are not yet used.
419pub enum EvexRoundingControl {
420    RNE,
421    RD,
422    RU,
423    RZ,
424}
425
426impl EvexRoundingControl {
427    /// Encode the `L'` and `L` bits for merging with the P2 byte.
428    fn bits(&self) -> u8 {
429        match self {
430            Self::RNE => 0b00,
431            Self::RD => 0b01,
432            Self::RU => 0b10,
433            Self::RZ => 0b11,
434        }
435    }
436}
437
438/// Defines the EVEX masking behavior; masking support is described in section 2.6.4 of the Intel
439/// Software Development Manual, volume 2A.
440#[allow(dead_code, missing_docs)] // Masking is not yet used.
441pub enum EvexMasking {
442    None,
443    Merging { k: u8 },
444    Zeroing { k: u8 },
445}
446
447impl Default for EvexMasking {
448    fn default() -> Self {
449        EvexMasking::None
450    }
451}
452
453impl EvexMasking {
454    /// Encode the `z` bit for merging with the P2 byte.
455    pub fn z_bit(&self) -> u8 {
456        match self {
457            Self::None | Self::Merging { .. } => 0,
458            Self::Zeroing { .. } => 1,
459        }
460    }
461
462    /// Encode the `aaa` bits for merging with the P2 byte.
463    pub fn aaa_bits(&self) -> u8 {
464        match self {
465            Self::None => 0b000,
466            Self::Merging { k } | Self::Zeroing { k } => {
467                debug_assert!(*k <= 7);
468                *k
469            }
470        }
471    }
472}
473
474#[cfg(test)]
475mod tests {
476    use super::*;
477    use crate::ir::MemFlags;
478    use crate::isa::x64::args::Gpr;
479    use crate::isa::x64::inst::regs;
480    use std::vec::Vec;
481
482    // As a sanity test, we verify that the output of `xed-asmparse-main 'vpabsq xmm0{k0},
483    // xmm1'` matches this EVEX encoding machinery.
484    #[test]
485    fn vpabsq() {
486        let mut tmp = MachBuffer::<Inst>::new();
487        let tests: &[(crate::Reg, RegisterOrAmode, Vec<u8>)] = &[
488            // vpabsq %xmm1, %xmm0
489            (
490                regs::xmm0(),
491                regs::xmm1().to_real_reg().unwrap().hw_enc().into(),
492                vec![0x62, 0xf2, 0xfd, 0x08, 0x1f, 0xc1],
493            ),
494            // vpabsq %xmm8, %xmm10
495            (
496                regs::xmm10(),
497                regs::xmm8().to_real_reg().unwrap().hw_enc().into(),
498                vec![0x62, 0x52, 0xfd, 0x08, 0x1f, 0xd0],
499            ),
500            // vpabsq %xmm15, %xmm3
501            (
502                regs::xmm3(),
503                regs::xmm15().to_real_reg().unwrap().hw_enc().into(),
504                vec![0x62, 0xd2, 0xfd, 0x08, 0x1f, 0xdf],
505            ),
506            // vpabsq (%rsi), %xmm12
507            (
508                regs::xmm12(),
509                Amode::ImmReg {
510                    simm32: 0,
511                    base: regs::rsi(),
512                    flags: MemFlags::trusted(),
513                }
514                .into(),
515                vec![0x62, 0x72, 0xfd, 0x08, 0x1f, 0x26],
516            ),
517            // vpabsq 8(%r15), %xmm14
518            (
519                regs::xmm14(),
520                Amode::ImmReg {
521                    simm32: 8,
522                    base: regs::r15(),
523                    flags: MemFlags::trusted(),
524                }
525                .into(),
526                vec![0x62, 0x52, 0xfd, 0x08, 0x1f, 0xb7, 0x08, 0x00, 0x00, 0x00],
527            ),
528            // vpabsq 16(%r15), %xmm14
529            (
530                regs::xmm14(),
531                Amode::ImmReg {
532                    simm32: 16,
533                    base: regs::r15(),
534                    flags: MemFlags::trusted(),
535                }
536                .into(),
537                vec![0x62, 0x52, 0xfd, 0x08, 0x1f, 0x77, 0x01],
538            ),
539            // vpabsq 17(%rax), %xmm3
540            (
541                regs::xmm3(),
542                Amode::ImmReg {
543                    simm32: 17,
544                    base: regs::rax(),
545                    flags: MemFlags::trusted(),
546                }
547                .into(),
548                vec![0x62, 0xf2, 0xfd, 0x08, 0x1f, 0x98, 0x11, 0x00, 0x00, 0x00],
549            ),
550            // vpabsq (%rbx, %rsi, 8), %xmm9
551            (
552                regs::xmm9(),
553                Amode::ImmRegRegShift {
554                    simm32: 0,
555                    base: Gpr::unwrap_new(regs::rbx()),
556                    index: Gpr::unwrap_new(regs::rsi()),
557                    shift: 3,
558                    flags: MemFlags::trusted(),
559                }
560                .into(),
561                vec![0x62, 0x72, 0xfd, 0x08, 0x1f, 0x0c, 0xf3],
562            ),
563            // vpabsq 1(%r11, %rdi, 4), %xmm13
564            (
565                regs::xmm13(),
566                Amode::ImmRegRegShift {
567                    simm32: 1,
568                    base: Gpr::unwrap_new(regs::r11()),
569                    index: Gpr::unwrap_new(regs::rdi()),
570                    shift: 2,
571                    flags: MemFlags::trusted(),
572                }
573                .into(),
574                vec![
575                    0x62, 0x52, 0xfd, 0x08, 0x1f, 0xac, 0xbb, 0x01, 0x00, 0x00, 0x00,
576                ],
577            ),
578            // vpabsq 128(%rsp, %r10, 2), %xmm5
579            (
580                regs::xmm5(),
581                Amode::ImmRegRegShift {
582                    simm32: 128,
583                    base: Gpr::unwrap_new(regs::rsp()),
584                    index: Gpr::unwrap_new(regs::r10()),
585                    shift: 1,
586                    flags: MemFlags::trusted(),
587                }
588                .into(),
589                vec![0x62, 0xb2, 0xfd, 0x08, 0x1f, 0x6c, 0x54, 0x08],
590            ),
591            // vpabsq 112(%rbp, %r13, 1), %xmm6
592            (
593                regs::xmm6(),
594                Amode::ImmRegRegShift {
595                    simm32: 112,
596                    base: Gpr::unwrap_new(regs::rbp()),
597                    index: Gpr::unwrap_new(regs::r13()),
598                    shift: 0,
599                    flags: MemFlags::trusted(),
600                }
601                .into(),
602                vec![0x62, 0xb2, 0xfd, 0x08, 0x1f, 0x74, 0x2d, 0x07],
603            ),
604            // vpabsq (%rbp, %r13, 1), %xmm7
605            (
606                regs::xmm7(),
607                Amode::ImmRegRegShift {
608                    simm32: 0,
609                    base: Gpr::unwrap_new(regs::rbp()),
610                    index: Gpr::unwrap_new(regs::r13()),
611                    shift: 0,
612                    flags: MemFlags::trusted(),
613                }
614                .into(),
615                vec![0x62, 0xb2, 0xfd, 0x08, 0x1f, 0x7c, 0x2d, 0x00],
616            ),
617            // vpabsq 2032(%r12), %xmm8
618            (
619                regs::xmm8(),
620                Amode::ImmReg {
621                    simm32: 2032,
622                    base: regs::r12(),
623                    flags: MemFlags::trusted(),
624                }
625                .into(),
626                vec![0x62, 0x52, 0xfd, 0x08, 0x1f, 0x44, 0x24, 0x7f],
627            ),
628            // vpabsq 2048(%r13), %xmm9
629            (
630                regs::xmm9(),
631                Amode::ImmReg {
632                    simm32: 2048,
633                    base: regs::r13(),
634                    flags: MemFlags::trusted(),
635                }
636                .into(),
637                vec![0x62, 0x52, 0xfd, 0x08, 0x1f, 0x8d, 0x00, 0x08, 0x00, 0x00],
638            ),
639            // vpabsq -16(%r14), %xmm10
640            (
641                regs::xmm10(),
642                Amode::ImmReg {
643                    simm32: -16,
644                    base: regs::r14(),
645                    flags: MemFlags::trusted(),
646                }
647                .into(),
648                vec![0x62, 0x52, 0xfd, 0x08, 0x1f, 0x56, 0xff],
649            ),
650            // vpabsq -5(%r15), %xmm11
651            (
652                regs::xmm11(),
653                Amode::ImmReg {
654                    simm32: -5,
655                    base: regs::r15(),
656                    flags: MemFlags::trusted(),
657                }
658                .into(),
659                vec![0x62, 0x52, 0xfd, 0x08, 0x1f, 0x9f, 0xfb, 0xff, 0xff, 0xff],
660            ),
661            // vpabsq -2048(%rdx), %xmm12
662            (
663                regs::xmm12(),
664                Amode::ImmReg {
665                    simm32: -2048,
666                    base: regs::rdx(),
667                    flags: MemFlags::trusted(),
668                }
669                .into(),
670                vec![0x62, 0x72, 0xfd, 0x08, 0x1f, 0x62, 0x80],
671            ),
672            // vpabsq -2064(%rsi), %xmm13
673            (
674                regs::xmm13(),
675                Amode::ImmReg {
676                    simm32: -2064,
677                    base: regs::rsi(),
678                    flags: MemFlags::trusted(),
679                }
680                .into(),
681                vec![0x62, 0x72, 0xfd, 0x08, 0x1f, 0xae, 0xf0, 0xf7, 0xff, 0xff],
682            ),
683            // a: vpabsq a(%rip), %xmm14
684            (
685                regs::xmm14(),
686                Amode::RipRelative {
687                    target: tmp.get_label(),
688                }
689                .into(),
690                vec![0x62, 0x72, 0xfd, 0x08, 0x1f, 0x35, 0xf6, 0xff, 0xff, 0xff],
691            ),
692        ];
693
694        for (dst, src, encoding) in tests {
695            let mut sink = MachBuffer::new();
696            let label = sink.get_label();
697            sink.bind_label(label, &mut Default::default());
698            EvexInstruction::new()
699                .prefix(LegacyPrefixes::_66)
700                .map(OpcodeMap::_0F38)
701                .w(true)
702                .opcode(0x1F)
703                .reg(dst.to_real_reg().unwrap().hw_enc())
704                .rm(src.clone())
705                .length(EvexVectorLength::V128)
706                .tuple_type(Avx512TupleType::Full)
707                .encode(&mut sink);
708            let bytes0 = sink
709                .finish(&Default::default(), &mut Default::default())
710                .data;
711            assert_eq!(
712                bytes0.as_slice(),
713                encoding.as_slice(),
714                "dst={dst:?} src={src:?}"
715            );
716        }
717    }
718
719    /// Verify that the defaults are equivalent to an instruction with a `0x00` opcode using the
720    /// "0" register (i.e. `rax`), with sane defaults for the various configurable parameters. This
721    /// test is more interesting than it may appear because some of the parameters have flipped-bit
722    /// representations (e.g. `vvvvv`) so emitting 0s as a default will not work.
723    #[test]
724    fn default_emission() {
725        let mut sink = MachBuffer::new();
726        EvexInstruction::new().encode(&mut sink);
727        let bytes0 = sink
728            .finish(&Default::default(), &mut Default::default())
729            .data;
730
731        let mut sink = MachBuffer::new();
732        EvexInstruction::new()
733            .length(EvexVectorLength::V128)
734            .prefix(LegacyPrefixes::None)
735            .map(OpcodeMap::None)
736            .w(false)
737            .opcode(0x00)
738            .reg(regs::rax().to_real_reg().unwrap().hw_enc())
739            .rm(regs::rax().to_real_reg().unwrap().hw_enc())
740            .mask(EvexMasking::None)
741            .encode(&mut sink);
742        let bytes1 = sink
743            .finish(&Default::default(), &mut Default::default())
744            .data;
745
746        assert_eq!(bytes0, bytes1);
747    }
748}