Skip to main content

asmkit/x86/features/
AVX2.rs

1use crate::x86::assembler::*;
2use crate::x86::operands::*;
3use super::super::opcodes::*;
4use crate::core::emitter::*;
5use crate::core::operand::*;
6
7/// A dummy operand that represents no register. Here just for simplicity.
8const NOREG: Operand = Operand::new();
9
10/// `VBROADCASTI128` (VBROADCASTI128). 
11/// Load integer data from the source operand (the second operand) and broadcast to all elements of the destination operand (the first operand).
12///
13///
14/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPBROADCAST.html).
15///
16/// Supported operand variants:
17///
18/// ```text
19/// +---+----------+
20/// | # | Operands |
21/// +---+----------+
22/// | 1 | Ymm, Mem |
23/// +---+----------+
24/// ```
25pub trait Vbroadcasti128Emitter<A, B> {
26    fn vbroadcasti128(&mut self, op0: A, op1: B);
27}
28
29impl<'a> Vbroadcasti128Emitter<Ymm, Mem> for Assembler<'a> {
30    fn vbroadcasti128(&mut self, op0: Ymm, op1: Mem) {
31        self.emit(VBROADCASTI128RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
32    }
33}
34
35/// `VEXTRACTI128` (VEXTRACTI128). 
36/// VEXTRACTI128/VEXTRACTI32x4 and VEXTRACTI64x2 extract 128-bits of doubleword integer values from the source operand (the second operand) and store to the low 128-bit of the destination operand (the first operand). The 128-bit data extraction occurs at an 128-bit granular offset specified by imm8[0] (256-bit) or imm8[1:0] as the multiply factor. The destination may be either a vector register or an 128-bit memory location.
37///
38///
39/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VEXTRACTI128%3AVEXTRACTI32x4%3AVEXTRACTI64x2%3AVEXTRACTI32x8%3AVEXTRACTI64x4.html).
40///
41/// Supported operand variants:
42///
43/// ```text
44/// +---+---------------+
45/// | # | Operands      |
46/// +---+---------------+
47/// | 1 | Mem, Ymm, Imm |
48/// | 2 | Xmm, Ymm, Imm |
49/// +---+---------------+
50/// ```
51pub trait Vextracti128Emitter<A, B, C> {
52    fn vextracti128(&mut self, op0: A, op1: B, op2: C);
53}
54
55impl<'a> Vextracti128Emitter<Xmm, Ymm, Imm> for Assembler<'a> {
56    fn vextracti128(&mut self, op0: Xmm, op1: Ymm, op2: Imm) {
57        self.emit(VEXTRACTI128RRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
58    }
59}
60
61impl<'a> Vextracti128Emitter<Mem, Ymm, Imm> for Assembler<'a> {
62    fn vextracti128(&mut self, op0: Mem, op1: Ymm, op2: Imm) {
63        self.emit(VEXTRACTI128MRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
64    }
65}
66
67/// `VGATHERDPD` (VGATHERDPD). 
68/// The instruction conditionally loads up to 2 or 4 double precision floating-point values from memory addresses specified by the memory operand (the second operand) and using qword indices. The memory operand uses the VSIB form of the SIB byte to specify a general purpose register operand as the common base, a vector register for an array of indices relative to the base and a constant scale factor.
69///
70///
71/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VGATHERDPD%3AVGATHERQPD.html).
72///
73/// Supported operand variants:
74///
75/// ```text
76/// +---+---------------+
77/// | # | Operands      |
78/// +---+---------------+
79/// | 1 | Xmm, Mem, Xmm |
80/// | 2 | Ymm, Mem, Ymm |
81/// +---+---------------+
82/// ```
83pub trait VgatherdpdEmitter_3<A, B, C> {
84    fn vgatherdpd_3(&mut self, op0: A, op1: B, op2: C);
85}
86
87impl<'a> VgatherdpdEmitter_3<Xmm, Mem, Xmm> for Assembler<'a> {
88    fn vgatherdpd_3(&mut self, op0: Xmm, op1: Mem, op2: Xmm) {
89        self.emit(VGATHERDPD128RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
90    }
91}
92
93impl<'a> VgatherdpdEmitter_3<Ymm, Mem, Ymm> for Assembler<'a> {
94    fn vgatherdpd_3(&mut self, op0: Ymm, op1: Mem, op2: Ymm) {
95        self.emit(VGATHERDPD256RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
96    }
97}
98
99/// `VGATHERDPS` (VGATHERDPS). 
100/// The instruction conditionally loads up to 4 or 8 single-precision floating-point values from memory addresses specified by the memory operand (the second operand) and using dword indices. The memory operand uses the VSIB form of the SIB byte to specify a general purpose register operand as the common base, a vector register for an array of indices relative to the base and a constant scale factor.
101///
102///
103/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VGATHERDPS%3AVGATHERQPS.html).
104///
105/// Supported operand variants:
106///
107/// ```text
108/// +---+---------------+
109/// | # | Operands      |
110/// +---+---------------+
111/// | 1 | Xmm, Mem, Xmm |
112/// | 2 | Ymm, Mem, Ymm |
113/// +---+---------------+
114/// ```
115pub trait VgatherdpsEmitter_3<A, B, C> {
116    fn vgatherdps_3(&mut self, op0: A, op1: B, op2: C);
117}
118
119impl<'a> VgatherdpsEmitter_3<Xmm, Mem, Xmm> for Assembler<'a> {
120    fn vgatherdps_3(&mut self, op0: Xmm, op1: Mem, op2: Xmm) {
121        self.emit(VGATHERDPS128RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
122    }
123}
124
125impl<'a> VgatherdpsEmitter_3<Ymm, Mem, Ymm> for Assembler<'a> {
126    fn vgatherdps_3(&mut self, op0: Ymm, op1: Mem, op2: Ymm) {
127        self.emit(VGATHERDPS256RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
128    }
129}
130
131/// `VGATHERQPD` (VGATHERQPD). 
132/// The instruction conditionally loads up to 2 or 4 double precision floating-point values from memory addresses specified by the memory operand (the second operand) and using qword indices. The memory operand uses the VSIB form of the SIB byte to specify a general purpose register operand as the common base, a vector register for an array of indices relative to the base and a constant scale factor.
133///
134///
135/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VGATHERDPD%3AVGATHERQPD.html).
136///
137/// Supported operand variants:
138///
139/// ```text
140/// +---+---------------+
141/// | # | Operands      |
142/// +---+---------------+
143/// | 1 | Xmm, Mem, Xmm |
144/// | 2 | Ymm, Mem, Ymm |
145/// +---+---------------+
146/// ```
147pub trait VgatherqpdEmitter_3<A, B, C> {
148    fn vgatherqpd_3(&mut self, op0: A, op1: B, op2: C);
149}
150
151impl<'a> VgatherqpdEmitter_3<Xmm, Mem, Xmm> for Assembler<'a> {
152    fn vgatherqpd_3(&mut self, op0: Xmm, op1: Mem, op2: Xmm) {
153        self.emit(VGATHERQPD128RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
154    }
155}
156
157impl<'a> VgatherqpdEmitter_3<Ymm, Mem, Ymm> for Assembler<'a> {
158    fn vgatherqpd_3(&mut self, op0: Ymm, op1: Mem, op2: Ymm) {
159        self.emit(VGATHERQPD256RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
160    }
161}
162
163/// `VGATHERQPS` (VGATHERQPS). 
164/// The instruction conditionally loads up to 4 or 8 single-precision floating-point values from memory addresses specified by the memory operand (the second operand) and using dword indices. The memory operand uses the VSIB form of the SIB byte to specify a general purpose register operand as the common base, a vector register for an array of indices relative to the base and a constant scale factor.
165///
166///
167/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VGATHERDPS%3AVGATHERQPS.html).
168///
169/// Supported operand variants:
170///
171/// ```text
172/// +---+---------------+
173/// | # | Operands      |
174/// +---+---------------+
175/// | 1 | Xmm, Mem, Xmm |
176/// +---+---------------+
177/// ```
178pub trait VgatherqpsEmitter_3<A, B, C> {
179    fn vgatherqps_3(&mut self, op0: A, op1: B, op2: C);
180}
181
182impl<'a> VgatherqpsEmitter_3<Xmm, Mem, Xmm> for Assembler<'a> {
183    fn vgatherqps_3(&mut self, op0: Xmm, op1: Mem, op2: Xmm) {
184        self.emit(VGATHERQPS128RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
185    }
186}
187
188/// `VINSERTI128` (VINSERTI128). 
189/// VINSERTI32x4 and VINSERTI64x2 inserts 128-bits of packed integer values from the second source operand (the third operand) into the destination operand (the first operand) at an 128-bit granular offset multiplied by imm8[0] (256-bit) or imm8[1:0]. The remaining portions of the destination are copied from the corresponding fields of the first source operand (the second operand). The second source operand can be either an XMM register or a 128-bit memory location. The high 6/7bits of the immediate are ignored. The destination operand is a ZMM/YMM register and updated at 32 and 64-bit granularity according to the writemask.
190///
191///
192/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VINSERTI128%3AVINSERTI32x4%3AVINSERTI64x2%3AVINSERTI32x8%3AVINSERTI64x4.html).
193///
194/// Supported operand variants:
195///
196/// ```text
197/// +---+--------------------+
198/// | # | Operands           |
199/// +---+--------------------+
200/// | 1 | Ymm, Ymm, Mem, Imm |
201/// | 2 | Ymm, Ymm, Xmm, Imm |
202/// +---+--------------------+
203/// ```
204pub trait Vinserti128Emitter<A, B, C, D> {
205    fn vinserti128(&mut self, op0: A, op1: B, op2: C, op3: D);
206}
207
208impl<'a> Vinserti128Emitter<Ymm, Ymm, Xmm, Imm> for Assembler<'a> {
209    fn vinserti128(&mut self, op0: Ymm, op1: Ymm, op2: Xmm, op3: Imm) {
210        self.emit(VINSERTI128RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
211    }
212}
213
214impl<'a> Vinserti128Emitter<Ymm, Ymm, Mem, Imm> for Assembler<'a> {
215    fn vinserti128(&mut self, op0: Ymm, op1: Ymm, op2: Mem, op3: Imm) {
216        self.emit(VINSERTI128RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
217    }
218}
219
220/// `VPBLENDD` (VPBLENDD). 
221/// Dword elements from the source operand (second operand) are conditionally written to the destination operand (first operand) depending on bits in the immediate operand (third operand). The immediate bits (bits 7:0) form a mask that determines whether the corresponding dword in the destination is copied from the source. If a bit in the mask, corresponding to a dword, is “1", then the dword is copied, else the dword is unchanged.
222///
223///
224/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPBLENDD.html).
225///
226/// Supported operand variants:
227///
228/// ```text
229/// +---+--------------------+
230/// | # | Operands           |
231/// +---+--------------------+
232/// | 1 | Xmm, Xmm, Mem, Imm |
233/// | 2 | Xmm, Xmm, Xmm, Imm |
234/// | 3 | Ymm, Ymm, Mem, Imm |
235/// | 4 | Ymm, Ymm, Ymm, Imm |
236/// +---+--------------------+
237/// ```
238pub trait VpblenddEmitter<A, B, C, D> {
239    fn vpblendd(&mut self, op0: A, op1: B, op2: C, op3: D);
240}
241
242impl<'a> VpblenddEmitter<Xmm, Xmm, Xmm, Imm> for Assembler<'a> {
243    fn vpblendd(&mut self, op0: Xmm, op1: Xmm, op2: Xmm, op3: Imm) {
244        self.emit(VPBLENDD128RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
245    }
246}
247
248impl<'a> VpblenddEmitter<Xmm, Xmm, Mem, Imm> for Assembler<'a> {
249    fn vpblendd(&mut self, op0: Xmm, op1: Xmm, op2: Mem, op3: Imm) {
250        self.emit(VPBLENDD128RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
251    }
252}
253
254impl<'a> VpblenddEmitter<Ymm, Ymm, Ymm, Imm> for Assembler<'a> {
255    fn vpblendd(&mut self, op0: Ymm, op1: Ymm, op2: Ymm, op3: Imm) {
256        self.emit(VPBLENDD256RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
257    }
258}
259
260impl<'a> VpblenddEmitter<Ymm, Ymm, Mem, Imm> for Assembler<'a> {
261    fn vpblendd(&mut self, op0: Ymm, op1: Ymm, op2: Mem, op3: Imm) {
262        self.emit(VPBLENDD256RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
263    }
264}
265
266/// `VPERM2I128` (VPERM2I128). 
267/// Permute 128 bit integer data from the first source operand (second operand) and second source operand (third operand) using bits in the 8-bit immediate and store results in the destination operand (first operand). The first source operand is a YMM register, the second source operand is a YMM register or a 256-bit memory location, and the destination operand is a YMM register.
268///
269///
270/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPERM2I128.html).
271///
272/// Supported operand variants:
273///
274/// ```text
275/// +---+--------------------+
276/// | # | Operands           |
277/// +---+--------------------+
278/// | 1 | Ymm, Ymm, Mem, Imm |
279/// | 2 | Ymm, Ymm, Ymm, Imm |
280/// +---+--------------------+
281/// ```
282pub trait Vperm2i128Emitter<A, B, C, D> {
283    fn vperm2i128(&mut self, op0: A, op1: B, op2: C, op3: D);
284}
285
286impl<'a> Vperm2i128Emitter<Ymm, Ymm, Ymm, Imm> for Assembler<'a> {
287    fn vperm2i128(&mut self, op0: Ymm, op1: Ymm, op2: Ymm, op3: Imm) {
288        self.emit(VPERM2I128_256RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
289    }
290}
291
292impl<'a> Vperm2i128Emitter<Ymm, Ymm, Mem, Imm> for Assembler<'a> {
293    fn vperm2i128(&mut self, op0: Ymm, op1: Ymm, op2: Mem, op3: Imm) {
294        self.emit(VPERM2I128_256RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
295    }
296}
297
298/// `VPGATHERDD` (VPGATHERDD). 
299/// A set of 16 or 8 doubleword/quadword memory locations pointed to by base address BASE_ADDR and index vector VINDEX with scale SCALE are gathered. The result is written into vector zmm1. The elements are specified via the VSIB (i.e., the index register is a zmm, holding packed indices). Elements will only be loaded if their corresponding mask bit is one. If an element’s mask bit is not set, the corresponding element of the destination register (zmm1) is left unchanged. The entire mask register will be set to zero by this instruction unless it triggers an exception.
300///
301///
302/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPGATHERDD%3AVPGATHERDQ.html).
303///
304/// Supported operand variants:
305///
306/// ```text
307/// +---+---------------+
308/// | # | Operands      |
309/// +---+---------------+
310/// | 1 | Xmm, Mem, Xmm |
311/// | 2 | Ymm, Mem, Ymm |
312/// +---+---------------+
313/// ```
314pub trait VpgatherddEmitter_3<A, B, C> {
315    fn vpgatherdd_3(&mut self, op0: A, op1: B, op2: C);
316}
317
318impl<'a> VpgatherddEmitter_3<Xmm, Mem, Xmm> for Assembler<'a> {
319    fn vpgatherdd_3(&mut self, op0: Xmm, op1: Mem, op2: Xmm) {
320        self.emit(VPGATHERDD128RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
321    }
322}
323
324impl<'a> VpgatherddEmitter_3<Ymm, Mem, Ymm> for Assembler<'a> {
325    fn vpgatherdd_3(&mut self, op0: Ymm, op1: Mem, op2: Ymm) {
326        self.emit(VPGATHERDD256RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
327    }
328}
329
330/// `VPGATHERDQ` (VPGATHERDQ). 
331/// A set of 16 or 8 doubleword/quadword memory locations pointed to by base address BASE_ADDR and index vector VINDEX with scale SCALE are gathered. The result is written into vector zmm1. The elements are specified via the VSIB (i.e., the index register is a zmm, holding packed indices). Elements will only be loaded if their corresponding mask bit is one. If an element’s mask bit is not set, the corresponding element of the destination register (zmm1) is left unchanged. The entire mask register will be set to zero by this instruction unless it triggers an exception.
332///
333///
334/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPGATHERDD%3AVPGATHERDQ.html).
335///
336/// Supported operand variants:
337///
338/// ```text
339/// +---+---------------+
340/// | # | Operands      |
341/// +---+---------------+
342/// | 1 | Xmm, Mem, Xmm |
343/// | 2 | Ymm, Mem, Ymm |
344/// +---+---------------+
345/// ```
346pub trait VpgatherdqEmitter_3<A, B, C> {
347    fn vpgatherdq_3(&mut self, op0: A, op1: B, op2: C);
348}
349
350impl<'a> VpgatherdqEmitter_3<Xmm, Mem, Xmm> for Assembler<'a> {
351    fn vpgatherdq_3(&mut self, op0: Xmm, op1: Mem, op2: Xmm) {
352        self.emit(VPGATHERDQ128RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
353    }
354}
355
356impl<'a> VpgatherdqEmitter_3<Ymm, Mem, Ymm> for Assembler<'a> {
357    fn vpgatherdq_3(&mut self, op0: Ymm, op1: Mem, op2: Ymm) {
358        self.emit(VPGATHERDQ256RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
359    }
360}
361
362/// `VPGATHERQD` (VPGATHERQD). 
363/// A set of 8 doubleword/quadword memory locations pointed to by base address BASE_ADDR and index vector VINDEX with scale SCALE are gathered. The result is written into a vector register. The elements are specified via the VSIB (i.e., the index register is a vector register, holding packed indices). Elements will only be loaded if their corresponding mask bit is one. If an element’s mask bit is not set, the corresponding element of the destination register is left unchanged. The entire mask register will be set to zero by this instruction unless it triggers an exception.
364///
365///
366/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPGATHERQD%3AVPGATHERQQ.html).
367///
368/// Supported operand variants:
369///
370/// ```text
371/// +---+---------------+
372/// | # | Operands      |
373/// +---+---------------+
374/// | 1 | Xmm, Mem, Xmm |
375/// +---+---------------+
376/// ```
377pub trait VpgatherqdEmitter_3<A, B, C> {
378    fn vpgatherqd_3(&mut self, op0: A, op1: B, op2: C);
379}
380
381impl<'a> VpgatherqdEmitter_3<Xmm, Mem, Xmm> for Assembler<'a> {
382    fn vpgatherqd_3(&mut self, op0: Xmm, op1: Mem, op2: Xmm) {
383        self.emit(VPGATHERQD128RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
384    }
385}
386
387/// `VPGATHERQQ` (VPGATHERQQ). 
388/// A set of 8 doubleword/quadword memory locations pointed to by base address BASE_ADDR and index vector VINDEX with scale SCALE are gathered. The result is written into a vector register. The elements are specified via the VSIB (i.e., the index register is a vector register, holding packed indices). Elements will only be loaded if their corresponding mask bit is one. If an element’s mask bit is not set, the corresponding element of the destination register is left unchanged. The entire mask register will be set to zero by this instruction unless it triggers an exception.
389///
390///
391/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPGATHERQD%3AVPGATHERQQ.html).
392///
393/// Supported operand variants:
394///
395/// ```text
396/// +---+---------------+
397/// | # | Operands      |
398/// +---+---------------+
399/// | 1 | Xmm, Mem, Xmm |
400/// | 2 | Ymm, Mem, Ymm |
401/// +---+---------------+
402/// ```
403pub trait VpgatherqqEmitter_3<A, B, C> {
404    fn vpgatherqq_3(&mut self, op0: A, op1: B, op2: C);
405}
406
407impl<'a> VpgatherqqEmitter_3<Xmm, Mem, Xmm> for Assembler<'a> {
408    fn vpgatherqq_3(&mut self, op0: Xmm, op1: Mem, op2: Xmm) {
409        self.emit(VPGATHERQQ128RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
410    }
411}
412
413impl<'a> VpgatherqqEmitter_3<Ymm, Mem, Ymm> for Assembler<'a> {
414    fn vpgatherqq_3(&mut self, op0: Ymm, op1: Mem, op2: Ymm) {
415        self.emit(VPGATHERQQ256RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
416    }
417}
418
419/// `VPMASKMOVD` (VPMASKMOVD). 
420/// Conditionally moves packed data elements from the second source operand into the corresponding data element of the destination operand, depending on the mask bits associated with each data element. The mask bits are specified in the first source operand.
421///
422///
423/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPMASKMOV.html).
424///
425/// Supported operand variants:
426///
427/// ```text
428/// +---+---------------+
429/// | # | Operands      |
430/// +---+---------------+
431/// | 1 | Mem, Xmm, Xmm |
432/// | 2 | Mem, Ymm, Ymm |
433/// | 3 | Xmm, Xmm, Mem |
434/// | 4 | Ymm, Ymm, Mem |
435/// +---+---------------+
436/// ```
437pub trait VpmaskmovdEmitter<A, B, C> {
438    fn vpmaskmovd(&mut self, op0: A, op1: B, op2: C);
439}
440
441impl<'a> VpmaskmovdEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
442    fn vpmaskmovd(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
443        self.emit(VPMASKMOVD128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
444    }
445}
446
447impl<'a> VpmaskmovdEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
448    fn vpmaskmovd(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
449        self.emit(VPMASKMOVD256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
450    }
451}
452
453impl<'a> VpmaskmovdEmitter<Mem, Xmm, Xmm> for Assembler<'a> {
454    fn vpmaskmovd(&mut self, op0: Mem, op1: Xmm, op2: Xmm) {
455        self.emit(VPMASKMOVD128MRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
456    }
457}
458
459impl<'a> VpmaskmovdEmitter<Mem, Ymm, Ymm> for Assembler<'a> {
460    fn vpmaskmovd(&mut self, op0: Mem, op1: Ymm, op2: Ymm) {
461        self.emit(VPMASKMOVD256MRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
462    }
463}
464
465/// `VPMASKMOVQ` (VPMASKMOVQ). 
466/// Conditionally moves packed data elements from the second source operand into the corresponding data element of the destination operand, depending on the mask bits associated with each data element. The mask bits are specified in the first source operand.
467///
468///
469/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPMASKMOV.html).
470///
471/// Supported operand variants:
472///
473/// ```text
474/// +---+---------------+
475/// | # | Operands      |
476/// +---+---------------+
477/// | 1 | Mem, Xmm, Xmm |
478/// | 2 | Mem, Ymm, Ymm |
479/// | 3 | Xmm, Xmm, Mem |
480/// | 4 | Ymm, Ymm, Mem |
481/// +---+---------------+
482/// ```
483pub trait VpmaskmovqEmitter<A, B, C> {
484    fn vpmaskmovq(&mut self, op0: A, op1: B, op2: C);
485}
486
487impl<'a> VpmaskmovqEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
488    fn vpmaskmovq(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
489        self.emit(VPMASKMOVQ128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
490    }
491}
492
493impl<'a> VpmaskmovqEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
494    fn vpmaskmovq(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
495        self.emit(VPMASKMOVQ256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
496    }
497}
498
499impl<'a> VpmaskmovqEmitter<Mem, Xmm, Xmm> for Assembler<'a> {
500    fn vpmaskmovq(&mut self, op0: Mem, op1: Xmm, op2: Xmm) {
501        self.emit(VPMASKMOVQ128MRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
502    }
503}
504
505impl<'a> VpmaskmovqEmitter<Mem, Ymm, Ymm> for Assembler<'a> {
506    fn vpmaskmovq(&mut self, op0: Mem, op1: Ymm, op2: Ymm) {
507        self.emit(VPMASKMOVQ256MRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
508    }
509}
510
511
512impl<'a> Assembler<'a> {
513    /// `VBROADCASTI128` (VBROADCASTI128). 
514    /// Load integer data from the source operand (the second operand) and broadcast to all elements of the destination operand (the first operand).
515    ///
516    ///
517    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPBROADCAST.html).
518    ///
519    /// Supported operand variants:
520    ///
521    /// ```text
522    /// +---+----------+
523    /// | # | Operands |
524    /// +---+----------+
525    /// | 1 | Ymm, Mem |
526    /// +---+----------+
527    /// ```
528    #[inline]
529    pub fn vbroadcasti128<A, B>(&mut self, op0: A, op1: B)
530    where Assembler<'a>: Vbroadcasti128Emitter<A, B> {
531        <Self as Vbroadcasti128Emitter<A, B>>::vbroadcasti128(self, op0, op1);
532    }
533    /// `VEXTRACTI128` (VEXTRACTI128). 
534    /// VEXTRACTI128/VEXTRACTI32x4 and VEXTRACTI64x2 extract 128-bits of doubleword integer values from the source operand (the second operand) and store to the low 128-bit of the destination operand (the first operand). The 128-bit data extraction occurs at an 128-bit granular offset specified by imm8[0] (256-bit) or imm8[1:0] as the multiply factor. The destination may be either a vector register or an 128-bit memory location.
535    ///
536    ///
537    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VEXTRACTI128%3AVEXTRACTI32x4%3AVEXTRACTI64x2%3AVEXTRACTI32x8%3AVEXTRACTI64x4.html).
538    ///
539    /// Supported operand variants:
540    ///
541    /// ```text
542    /// +---+---------------+
543    /// | # | Operands      |
544    /// +---+---------------+
545    /// | 1 | Mem, Ymm, Imm |
546    /// | 2 | Xmm, Ymm, Imm |
547    /// +---+---------------+
548    /// ```
549    #[inline]
550    pub fn vextracti128<A, B, C>(&mut self, op0: A, op1: B, op2: C)
551    where Assembler<'a>: Vextracti128Emitter<A, B, C> {
552        <Self as Vextracti128Emitter<A, B, C>>::vextracti128(self, op0, op1, op2);
553    }
554    /// `VGATHERDPD` (VGATHERDPD). 
555    /// The instruction conditionally loads up to 2 or 4 double precision floating-point values from memory addresses specified by the memory operand (the second operand) and using qword indices. The memory operand uses the VSIB form of the SIB byte to specify a general purpose register operand as the common base, a vector register for an array of indices relative to the base and a constant scale factor.
556    ///
557    ///
558    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VGATHERDPD%3AVGATHERQPD.html).
559    ///
560    /// Supported operand variants:
561    ///
562    /// ```text
563    /// +---+---------------+
564    /// | # | Operands      |
565    /// +---+---------------+
566    /// | 1 | Xmm, Mem, Xmm |
567    /// | 2 | Ymm, Mem, Ymm |
568    /// +---+---------------+
569    /// ```
570    #[inline]
571    pub fn vgatherdpd_3<A, B, C>(&mut self, op0: A, op1: B, op2: C)
572    where Assembler<'a>: VgatherdpdEmitter_3<A, B, C> {
573        <Self as VgatherdpdEmitter_3<A, B, C>>::vgatherdpd_3(self, op0, op1, op2);
574    }
575    /// `VGATHERDPS` (VGATHERDPS). 
576    /// The instruction conditionally loads up to 4 or 8 single-precision floating-point values from memory addresses specified by the memory operand (the second operand) and using dword indices. The memory operand uses the VSIB form of the SIB byte to specify a general purpose register operand as the common base, a vector register for an array of indices relative to the base and a constant scale factor.
577    ///
578    ///
579    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VGATHERDPS%3AVGATHERQPS.html).
580    ///
581    /// Supported operand variants:
582    ///
583    /// ```text
584    /// +---+---------------+
585    /// | # | Operands      |
586    /// +---+---------------+
587    /// | 1 | Xmm, Mem, Xmm |
588    /// | 2 | Ymm, Mem, Ymm |
589    /// +---+---------------+
590    /// ```
591    #[inline]
592    pub fn vgatherdps_3<A, B, C>(&mut self, op0: A, op1: B, op2: C)
593    where Assembler<'a>: VgatherdpsEmitter_3<A, B, C> {
594        <Self as VgatherdpsEmitter_3<A, B, C>>::vgatherdps_3(self, op0, op1, op2);
595    }
596    /// `VGATHERQPD` (VGATHERQPD). 
597    /// The instruction conditionally loads up to 2 or 4 double precision floating-point values from memory addresses specified by the memory operand (the second operand) and using qword indices. The memory operand uses the VSIB form of the SIB byte to specify a general purpose register operand as the common base, a vector register for an array of indices relative to the base and a constant scale factor.
598    ///
599    ///
600    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VGATHERDPD%3AVGATHERQPD.html).
601    ///
602    /// Supported operand variants:
603    ///
604    /// ```text
605    /// +---+---------------+
606    /// | # | Operands      |
607    /// +---+---------------+
608    /// | 1 | Xmm, Mem, Xmm |
609    /// | 2 | Ymm, Mem, Ymm |
610    /// +---+---------------+
611    /// ```
612    #[inline]
613    pub fn vgatherqpd_3<A, B, C>(&mut self, op0: A, op1: B, op2: C)
614    where Assembler<'a>: VgatherqpdEmitter_3<A, B, C> {
615        <Self as VgatherqpdEmitter_3<A, B, C>>::vgatherqpd_3(self, op0, op1, op2);
616    }
617    /// `VGATHERQPS` (VGATHERQPS). 
618    /// The instruction conditionally loads up to 4 or 8 single-precision floating-point values from memory addresses specified by the memory operand (the second operand) and using dword indices. The memory operand uses the VSIB form of the SIB byte to specify a general purpose register operand as the common base, a vector register for an array of indices relative to the base and a constant scale factor.
619    ///
620    ///
621    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VGATHERDPS%3AVGATHERQPS.html).
622    ///
623    /// Supported operand variants:
624    ///
625    /// ```text
626    /// +---+---------------+
627    /// | # | Operands      |
628    /// +---+---------------+
629    /// | 1 | Xmm, Mem, Xmm |
630    /// +---+---------------+
631    /// ```
632    #[inline]
633    pub fn vgatherqps_3<A, B, C>(&mut self, op0: A, op1: B, op2: C)
634    where Assembler<'a>: VgatherqpsEmitter_3<A, B, C> {
635        <Self as VgatherqpsEmitter_3<A, B, C>>::vgatherqps_3(self, op0, op1, op2);
636    }
637    /// `VINSERTI128` (VINSERTI128). 
638    /// VINSERTI32x4 and VINSERTI64x2 inserts 128-bits of packed integer values from the second source operand (the third operand) into the destination operand (the first operand) at an 128-bit granular offset multiplied by imm8[0] (256-bit) or imm8[1:0]. The remaining portions of the destination are copied from the corresponding fields of the first source operand (the second operand). The second source operand can be either an XMM register or a 128-bit memory location. The high 6/7bits of the immediate are ignored. The destination operand is a ZMM/YMM register and updated at 32 and 64-bit granularity according to the writemask.
639    ///
640    ///
641    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VINSERTI128%3AVINSERTI32x4%3AVINSERTI64x2%3AVINSERTI32x8%3AVINSERTI64x4.html).
642    ///
643    /// Supported operand variants:
644    ///
645    /// ```text
646    /// +---+--------------------+
647    /// | # | Operands           |
648    /// +---+--------------------+
649    /// | 1 | Ymm, Ymm, Mem, Imm |
650    /// | 2 | Ymm, Ymm, Xmm, Imm |
651    /// +---+--------------------+
652    /// ```
653    #[inline]
654    pub fn vinserti128<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
655    where Assembler<'a>: Vinserti128Emitter<A, B, C, D> {
656        <Self as Vinserti128Emitter<A, B, C, D>>::vinserti128(self, op0, op1, op2, op3);
657    }
658    /// `VPBLENDD` (VPBLENDD). 
659    /// Dword elements from the source operand (second operand) are conditionally written to the destination operand (first operand) depending on bits in the immediate operand (third operand). The immediate bits (bits 7:0) form a mask that determines whether the corresponding dword in the destination is copied from the source. If a bit in the mask, corresponding to a dword, is “1", then the dword is copied, else the dword is unchanged.
660    ///
661    ///
662    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPBLENDD.html).
663    ///
664    /// Supported operand variants:
665    ///
666    /// ```text
667    /// +---+--------------------+
668    /// | # | Operands           |
669    /// +---+--------------------+
670    /// | 1 | Xmm, Xmm, Mem, Imm |
671    /// | 2 | Xmm, Xmm, Xmm, Imm |
672    /// | 3 | Ymm, Ymm, Mem, Imm |
673    /// | 4 | Ymm, Ymm, Ymm, Imm |
674    /// +---+--------------------+
675    /// ```
676    #[inline]
677    pub fn vpblendd<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
678    where Assembler<'a>: VpblenddEmitter<A, B, C, D> {
679        <Self as VpblenddEmitter<A, B, C, D>>::vpblendd(self, op0, op1, op2, op3);
680    }
681    /// `VPERM2I128` (VPERM2I128). 
682    /// Permute 128 bit integer data from the first source operand (second operand) and second source operand (third operand) using bits in the 8-bit immediate and store results in the destination operand (first operand). The first source operand is a YMM register, the second source operand is a YMM register or a 256-bit memory location, and the destination operand is a YMM register.
683    ///
684    ///
685    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPERM2I128.html).
686    ///
687    /// Supported operand variants:
688    ///
689    /// ```text
690    /// +---+--------------------+
691    /// | # | Operands           |
692    /// +---+--------------------+
693    /// | 1 | Ymm, Ymm, Mem, Imm |
694    /// | 2 | Ymm, Ymm, Ymm, Imm |
695    /// +---+--------------------+
696    /// ```
697    #[inline]
698    pub fn vperm2i128<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
699    where Assembler<'a>: Vperm2i128Emitter<A, B, C, D> {
700        <Self as Vperm2i128Emitter<A, B, C, D>>::vperm2i128(self, op0, op1, op2, op3);
701    }
702    /// `VPGATHERDD` (VPGATHERDD). 
703    /// A set of 16 or 8 doubleword/quadword memory locations pointed to by base address BASE_ADDR and index vector VINDEX with scale SCALE are gathered. The result is written into vector zmm1. The elements are specified via the VSIB (i.e., the index register is a zmm, holding packed indices). Elements will only be loaded if their corresponding mask bit is one. If an element’s mask bit is not set, the corresponding element of the destination register (zmm1) is left unchanged. The entire mask register will be set to zero by this instruction unless it triggers an exception.
704    ///
705    ///
706    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPGATHERDD%3AVPGATHERDQ.html).
707    ///
708    /// Supported operand variants:
709    ///
710    /// ```text
711    /// +---+---------------+
712    /// | # | Operands      |
713    /// +---+---------------+
714    /// | 1 | Xmm, Mem, Xmm |
715    /// | 2 | Ymm, Mem, Ymm |
716    /// +---+---------------+
717    /// ```
718    #[inline]
719    pub fn vpgatherdd_3<A, B, C>(&mut self, op0: A, op1: B, op2: C)
720    where Assembler<'a>: VpgatherddEmitter_3<A, B, C> {
721        <Self as VpgatherddEmitter_3<A, B, C>>::vpgatherdd_3(self, op0, op1, op2);
722    }
723    /// `VPGATHERDQ` (VPGATHERDQ). 
724    /// A set of 16 or 8 doubleword/quadword memory locations pointed to by base address BASE_ADDR and index vector VINDEX with scale SCALE are gathered. The result is written into vector zmm1. The elements are specified via the VSIB (i.e., the index register is a zmm, holding packed indices). Elements will only be loaded if their corresponding mask bit is one. If an element’s mask bit is not set, the corresponding element of the destination register (zmm1) is left unchanged. The entire mask register will be set to zero by this instruction unless it triggers an exception.
725    ///
726    ///
727    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPGATHERDD%3AVPGATHERDQ.html).
728    ///
729    /// Supported operand variants:
730    ///
731    /// ```text
732    /// +---+---------------+
733    /// | # | Operands      |
734    /// +---+---------------+
735    /// | 1 | Xmm, Mem, Xmm |
736    /// | 2 | Ymm, Mem, Ymm |
737    /// +---+---------------+
738    /// ```
739    #[inline]
740    pub fn vpgatherdq_3<A, B, C>(&mut self, op0: A, op1: B, op2: C)
741    where Assembler<'a>: VpgatherdqEmitter_3<A, B, C> {
742        <Self as VpgatherdqEmitter_3<A, B, C>>::vpgatherdq_3(self, op0, op1, op2);
743    }
744    /// `VPGATHERQD` (VPGATHERQD). 
745    /// A set of 8 doubleword/quadword memory locations pointed to by base address BASE_ADDR and index vector VINDEX with scale SCALE are gathered. The result is written into a vector register. The elements are specified via the VSIB (i.e., the index register is a vector register, holding packed indices). Elements will only be loaded if their corresponding mask bit is one. If an element’s mask bit is not set, the corresponding element of the destination register is left unchanged. The entire mask register will be set to zero by this instruction unless it triggers an exception.
746    ///
747    ///
748    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPGATHERQD%3AVPGATHERQQ.html).
749    ///
750    /// Supported operand variants:
751    ///
752    /// ```text
753    /// +---+---------------+
754    /// | # | Operands      |
755    /// +---+---------------+
756    /// | 1 | Xmm, Mem, Xmm |
757    /// +---+---------------+
758    /// ```
759    #[inline]
760    pub fn vpgatherqd_3<A, B, C>(&mut self, op0: A, op1: B, op2: C)
761    where Assembler<'a>: VpgatherqdEmitter_3<A, B, C> {
762        <Self as VpgatherqdEmitter_3<A, B, C>>::vpgatherqd_3(self, op0, op1, op2);
763    }
764    /// `VPGATHERQQ` (VPGATHERQQ). 
765    /// A set of 8 doubleword/quadword memory locations pointed to by base address BASE_ADDR and index vector VINDEX with scale SCALE are gathered. The result is written into a vector register. The elements are specified via the VSIB (i.e., the index register is a vector register, holding packed indices). Elements will only be loaded if their corresponding mask bit is one. If an element’s mask bit is not set, the corresponding element of the destination register is left unchanged. The entire mask register will be set to zero by this instruction unless it triggers an exception.
766    ///
767    ///
768    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPGATHERQD%3AVPGATHERQQ.html).
769    ///
770    /// Supported operand variants:
771    ///
772    /// ```text
773    /// +---+---------------+
774    /// | # | Operands      |
775    /// +---+---------------+
776    /// | 1 | Xmm, Mem, Xmm |
777    /// | 2 | Ymm, Mem, Ymm |
778    /// +---+---------------+
779    /// ```
780    #[inline]
781    pub fn vpgatherqq_3<A, B, C>(&mut self, op0: A, op1: B, op2: C)
782    where Assembler<'a>: VpgatherqqEmitter_3<A, B, C> {
783        <Self as VpgatherqqEmitter_3<A, B, C>>::vpgatherqq_3(self, op0, op1, op2);
784    }
785    /// `VPMASKMOVD` (VPMASKMOVD). 
786    /// Conditionally moves packed data elements from the second source operand into the corresponding data element of the destination operand, depending on the mask bits associated with each data element. The mask bits are specified in the first source operand.
787    ///
788    ///
789    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPMASKMOV.html).
790    ///
791    /// Supported operand variants:
792    ///
793    /// ```text
794    /// +---+---------------+
795    /// | # | Operands      |
796    /// +---+---------------+
797    /// | 1 | Mem, Xmm, Xmm |
798    /// | 2 | Mem, Ymm, Ymm |
799    /// | 3 | Xmm, Xmm, Mem |
800    /// | 4 | Ymm, Ymm, Mem |
801    /// +---+---------------+
802    /// ```
803    #[inline]
804    pub fn vpmaskmovd<A, B, C>(&mut self, op0: A, op1: B, op2: C)
805    where Assembler<'a>: VpmaskmovdEmitter<A, B, C> {
806        <Self as VpmaskmovdEmitter<A, B, C>>::vpmaskmovd(self, op0, op1, op2);
807    }
808    /// `VPMASKMOVQ` (VPMASKMOVQ). 
809    /// Conditionally moves packed data elements from the second source operand into the corresponding data element of the destination operand, depending on the mask bits associated with each data element. The mask bits are specified in the first source operand.
810    ///
811    ///
812    /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPMASKMOV.html).
813    ///
814    /// Supported operand variants:
815    ///
816    /// ```text
817    /// +---+---------------+
818    /// | # | Operands      |
819    /// +---+---------------+
820    /// | 1 | Mem, Xmm, Xmm |
821    /// | 2 | Mem, Ymm, Ymm |
822    /// | 3 | Xmm, Xmm, Mem |
823    /// | 4 | Ymm, Ymm, Mem |
824    /// +---+---------------+
825    /// ```
826    #[inline]
827    pub fn vpmaskmovq<A, B, C>(&mut self, op0: A, op1: B, op2: C)
828    where Assembler<'a>: VpmaskmovqEmitter<A, B, C> {
829        <Self as VpmaskmovqEmitter<A, B, C>>::vpmaskmovq(self, op0, op1, op2);
830    }
831}