asmkit/x86/features/AVX2.rs
1use crate::x86::assembler::*;
2use crate::x86::operands::*;
3use super::super::opcodes::*;
4use crate::core::emitter::*;
5use crate::core::operand::*;
6
7/// A dummy operand that represents no register. Here just for simplicity.
8const NOREG: Operand = Operand::new();
9
10/// `VBROADCASTI128` (VBROADCASTI128).
11/// Load integer data from the source operand (the second operand) and broadcast to all elements of the destination operand (the first operand).
12///
13///
14/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPBROADCAST.html).
15///
16/// Supported operand variants:
17///
18/// ```text
19/// +---+----------+
20/// | # | Operands |
21/// +---+----------+
22/// | 1 | Ymm, Mem |
23/// +---+----------+
24/// ```
25pub trait Vbroadcasti128Emitter<A, B> {
26 fn vbroadcasti128(&mut self, op0: A, op1: B);
27}
28
29impl<'a> Vbroadcasti128Emitter<Ymm, Mem> for Assembler<'a> {
30 fn vbroadcasti128(&mut self, op0: Ymm, op1: Mem) {
31 self.emit(VBROADCASTI128RM, op0.as_operand(), op1.as_operand(), &NOREG, &NOREG);
32 }
33}
34
35/// `VEXTRACTI128` (VEXTRACTI128).
36/// VEXTRACTI128/VEXTRACTI32x4 and VEXTRACTI64x2 extract 128-bits of doubleword integer values from the source operand (the second operand) and store to the low 128-bit of the destination operand (the first operand). The 128-bit data extraction occurs at an 128-bit granular offset specified by imm8[0] (256-bit) or imm8[1:0] as the multiply factor. The destination may be either a vector register or an 128-bit memory location.
37///
38///
39/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VEXTRACTI128%3AVEXTRACTI32x4%3AVEXTRACTI64x2%3AVEXTRACTI32x8%3AVEXTRACTI64x4.html).
40///
41/// Supported operand variants:
42///
43/// ```text
44/// +---+---------------+
45/// | # | Operands |
46/// +---+---------------+
47/// | 1 | Mem, Ymm, Imm |
48/// | 2 | Xmm, Ymm, Imm |
49/// +---+---------------+
50/// ```
51pub trait Vextracti128Emitter<A, B, C> {
52 fn vextracti128(&mut self, op0: A, op1: B, op2: C);
53}
54
55impl<'a> Vextracti128Emitter<Xmm, Ymm, Imm> for Assembler<'a> {
56 fn vextracti128(&mut self, op0: Xmm, op1: Ymm, op2: Imm) {
57 self.emit(VEXTRACTI128RRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
58 }
59}
60
61impl<'a> Vextracti128Emitter<Mem, Ymm, Imm> for Assembler<'a> {
62 fn vextracti128(&mut self, op0: Mem, op1: Ymm, op2: Imm) {
63 self.emit(VEXTRACTI128MRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
64 }
65}
66
67/// `VGATHERDPD` (VGATHERDPD).
68/// The instruction conditionally loads up to 2 or 4 double precision floating-point values from memory addresses specified by the memory operand (the second operand) and using qword indices. The memory operand uses the VSIB form of the SIB byte to specify a general purpose register operand as the common base, a vector register for an array of indices relative to the base and a constant scale factor.
69///
70///
71/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VGATHERDPD%3AVGATHERQPD.html).
72///
73/// Supported operand variants:
74///
75/// ```text
76/// +---+---------------+
77/// | # | Operands |
78/// +---+---------------+
79/// | 1 | Xmm, Mem, Xmm |
80/// | 2 | Ymm, Mem, Ymm |
81/// +---+---------------+
82/// ```
83pub trait VgatherdpdEmitter_3<A, B, C> {
84 fn vgatherdpd_3(&mut self, op0: A, op1: B, op2: C);
85}
86
87impl<'a> VgatherdpdEmitter_3<Xmm, Mem, Xmm> for Assembler<'a> {
88 fn vgatherdpd_3(&mut self, op0: Xmm, op1: Mem, op2: Xmm) {
89 self.emit(VGATHERDPD128RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
90 }
91}
92
93impl<'a> VgatherdpdEmitter_3<Ymm, Mem, Ymm> for Assembler<'a> {
94 fn vgatherdpd_3(&mut self, op0: Ymm, op1: Mem, op2: Ymm) {
95 self.emit(VGATHERDPD256RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
96 }
97}
98
99/// `VGATHERDPS` (VGATHERDPS).
100/// The instruction conditionally loads up to 4 or 8 single-precision floating-point values from memory addresses specified by the memory operand (the second operand) and using dword indices. The memory operand uses the VSIB form of the SIB byte to specify a general purpose register operand as the common base, a vector register for an array of indices relative to the base and a constant scale factor.
101///
102///
103/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VGATHERDPS%3AVGATHERQPS.html).
104///
105/// Supported operand variants:
106///
107/// ```text
108/// +---+---------------+
109/// | # | Operands |
110/// +---+---------------+
111/// | 1 | Xmm, Mem, Xmm |
112/// | 2 | Ymm, Mem, Ymm |
113/// +---+---------------+
114/// ```
115pub trait VgatherdpsEmitter_3<A, B, C> {
116 fn vgatherdps_3(&mut self, op0: A, op1: B, op2: C);
117}
118
119impl<'a> VgatherdpsEmitter_3<Xmm, Mem, Xmm> for Assembler<'a> {
120 fn vgatherdps_3(&mut self, op0: Xmm, op1: Mem, op2: Xmm) {
121 self.emit(VGATHERDPS128RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
122 }
123}
124
125impl<'a> VgatherdpsEmitter_3<Ymm, Mem, Ymm> for Assembler<'a> {
126 fn vgatherdps_3(&mut self, op0: Ymm, op1: Mem, op2: Ymm) {
127 self.emit(VGATHERDPS256RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
128 }
129}
130
131/// `VGATHERQPD` (VGATHERQPD).
132/// The instruction conditionally loads up to 2 or 4 double precision floating-point values from memory addresses specified by the memory operand (the second operand) and using qword indices. The memory operand uses the VSIB form of the SIB byte to specify a general purpose register operand as the common base, a vector register for an array of indices relative to the base and a constant scale factor.
133///
134///
135/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VGATHERDPD%3AVGATHERQPD.html).
136///
137/// Supported operand variants:
138///
139/// ```text
140/// +---+---------------+
141/// | # | Operands |
142/// +---+---------------+
143/// | 1 | Xmm, Mem, Xmm |
144/// | 2 | Ymm, Mem, Ymm |
145/// +---+---------------+
146/// ```
147pub trait VgatherqpdEmitter_3<A, B, C> {
148 fn vgatherqpd_3(&mut self, op0: A, op1: B, op2: C);
149}
150
151impl<'a> VgatherqpdEmitter_3<Xmm, Mem, Xmm> for Assembler<'a> {
152 fn vgatherqpd_3(&mut self, op0: Xmm, op1: Mem, op2: Xmm) {
153 self.emit(VGATHERQPD128RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
154 }
155}
156
157impl<'a> VgatherqpdEmitter_3<Ymm, Mem, Ymm> for Assembler<'a> {
158 fn vgatherqpd_3(&mut self, op0: Ymm, op1: Mem, op2: Ymm) {
159 self.emit(VGATHERQPD256RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
160 }
161}
162
163/// `VGATHERQPS` (VGATHERQPS).
164/// The instruction conditionally loads up to 4 or 8 single-precision floating-point values from memory addresses specified by the memory operand (the second operand) and using dword indices. The memory operand uses the VSIB form of the SIB byte to specify a general purpose register operand as the common base, a vector register for an array of indices relative to the base and a constant scale factor.
165///
166///
167/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VGATHERDPS%3AVGATHERQPS.html).
168///
169/// Supported operand variants:
170///
171/// ```text
172/// +---+---------------+
173/// | # | Operands |
174/// +---+---------------+
175/// | 1 | Xmm, Mem, Xmm |
176/// +---+---------------+
177/// ```
178pub trait VgatherqpsEmitter_3<A, B, C> {
179 fn vgatherqps_3(&mut self, op0: A, op1: B, op2: C);
180}
181
182impl<'a> VgatherqpsEmitter_3<Xmm, Mem, Xmm> for Assembler<'a> {
183 fn vgatherqps_3(&mut self, op0: Xmm, op1: Mem, op2: Xmm) {
184 self.emit(VGATHERQPS128RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
185 }
186}
187
188/// `VINSERTI128` (VINSERTI128).
189/// VINSERTI32x4 and VINSERTI64x2 inserts 128-bits of packed integer values from the second source operand (the third operand) into the destination operand (the first operand) at an 128-bit granular offset multiplied by imm8[0] (256-bit) or imm8[1:0]. The remaining portions of the destination are copied from the corresponding fields of the first source operand (the second operand). The second source operand can be either an XMM register or a 128-bit memory location. The high 6/7bits of the immediate are ignored. The destination operand is a ZMM/YMM register and updated at 32 and 64-bit granularity according to the writemask.
190///
191///
192/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VINSERTI128%3AVINSERTI32x4%3AVINSERTI64x2%3AVINSERTI32x8%3AVINSERTI64x4.html).
193///
194/// Supported operand variants:
195///
196/// ```text
197/// +---+--------------------+
198/// | # | Operands |
199/// +---+--------------------+
200/// | 1 | Ymm, Ymm, Mem, Imm |
201/// | 2 | Ymm, Ymm, Xmm, Imm |
202/// +---+--------------------+
203/// ```
204pub trait Vinserti128Emitter<A, B, C, D> {
205 fn vinserti128(&mut self, op0: A, op1: B, op2: C, op3: D);
206}
207
208impl<'a> Vinserti128Emitter<Ymm, Ymm, Xmm, Imm> for Assembler<'a> {
209 fn vinserti128(&mut self, op0: Ymm, op1: Ymm, op2: Xmm, op3: Imm) {
210 self.emit(VINSERTI128RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
211 }
212}
213
214impl<'a> Vinserti128Emitter<Ymm, Ymm, Mem, Imm> for Assembler<'a> {
215 fn vinserti128(&mut self, op0: Ymm, op1: Ymm, op2: Mem, op3: Imm) {
216 self.emit(VINSERTI128RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
217 }
218}
219
220/// `VPBLENDD` (VPBLENDD).
221/// Dword elements from the source operand (second operand) are conditionally written to the destination operand (first operand) depending on bits in the immediate operand (third operand). The immediate bits (bits 7:0) form a mask that determines whether the corresponding dword in the destination is copied from the source. If a bit in the mask, corresponding to a dword, is “1", then the dword is copied, else the dword is unchanged.
222///
223///
224/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPBLENDD.html).
225///
226/// Supported operand variants:
227///
228/// ```text
229/// +---+--------------------+
230/// | # | Operands |
231/// +---+--------------------+
232/// | 1 | Xmm, Xmm, Mem, Imm |
233/// | 2 | Xmm, Xmm, Xmm, Imm |
234/// | 3 | Ymm, Ymm, Mem, Imm |
235/// | 4 | Ymm, Ymm, Ymm, Imm |
236/// +---+--------------------+
237/// ```
238pub trait VpblenddEmitter<A, B, C, D> {
239 fn vpblendd(&mut self, op0: A, op1: B, op2: C, op3: D);
240}
241
242impl<'a> VpblenddEmitter<Xmm, Xmm, Xmm, Imm> for Assembler<'a> {
243 fn vpblendd(&mut self, op0: Xmm, op1: Xmm, op2: Xmm, op3: Imm) {
244 self.emit(VPBLENDD128RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
245 }
246}
247
248impl<'a> VpblenddEmitter<Xmm, Xmm, Mem, Imm> for Assembler<'a> {
249 fn vpblendd(&mut self, op0: Xmm, op1: Xmm, op2: Mem, op3: Imm) {
250 self.emit(VPBLENDD128RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
251 }
252}
253
254impl<'a> VpblenddEmitter<Ymm, Ymm, Ymm, Imm> for Assembler<'a> {
255 fn vpblendd(&mut self, op0: Ymm, op1: Ymm, op2: Ymm, op3: Imm) {
256 self.emit(VPBLENDD256RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
257 }
258}
259
260impl<'a> VpblenddEmitter<Ymm, Ymm, Mem, Imm> for Assembler<'a> {
261 fn vpblendd(&mut self, op0: Ymm, op1: Ymm, op2: Mem, op3: Imm) {
262 self.emit(VPBLENDD256RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
263 }
264}
265
266/// `VPERM2I128` (VPERM2I128).
267/// Permute 128 bit integer data from the first source operand (second operand) and second source operand (third operand) using bits in the 8-bit immediate and store results in the destination operand (first operand). The first source operand is a YMM register, the second source operand is a YMM register or a 256-bit memory location, and the destination operand is a YMM register.
268///
269///
270/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPERM2I128.html).
271///
272/// Supported operand variants:
273///
274/// ```text
275/// +---+--------------------+
276/// | # | Operands |
277/// +---+--------------------+
278/// | 1 | Ymm, Ymm, Mem, Imm |
279/// | 2 | Ymm, Ymm, Ymm, Imm |
280/// +---+--------------------+
281/// ```
282pub trait Vperm2i128Emitter<A, B, C, D> {
283 fn vperm2i128(&mut self, op0: A, op1: B, op2: C, op3: D);
284}
285
286impl<'a> Vperm2i128Emitter<Ymm, Ymm, Ymm, Imm> for Assembler<'a> {
287 fn vperm2i128(&mut self, op0: Ymm, op1: Ymm, op2: Ymm, op3: Imm) {
288 self.emit(VPERM2I128_256RRRI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
289 }
290}
291
292impl<'a> Vperm2i128Emitter<Ymm, Ymm, Mem, Imm> for Assembler<'a> {
293 fn vperm2i128(&mut self, op0: Ymm, op1: Ymm, op2: Mem, op3: Imm) {
294 self.emit(VPERM2I128_256RRMI, op0.as_operand(), op1.as_operand(), op2.as_operand(), op3.as_operand());
295 }
296}
297
298/// `VPGATHERDD` (VPGATHERDD).
299/// A set of 16 or 8 doubleword/quadword memory locations pointed to by base address BASE_ADDR and index vector VINDEX with scale SCALE are gathered. The result is written into vector zmm1. The elements are specified via the VSIB (i.e., the index register is a zmm, holding packed indices). Elements will only be loaded if their corresponding mask bit is one. If an element’s mask bit is not set, the corresponding element of the destination register (zmm1) is left unchanged. The entire mask register will be set to zero by this instruction unless it triggers an exception.
300///
301///
302/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPGATHERDD%3AVPGATHERDQ.html).
303///
304/// Supported operand variants:
305///
306/// ```text
307/// +---+---------------+
308/// | # | Operands |
309/// +---+---------------+
310/// | 1 | Xmm, Mem, Xmm |
311/// | 2 | Ymm, Mem, Ymm |
312/// +---+---------------+
313/// ```
314pub trait VpgatherddEmitter_3<A, B, C> {
315 fn vpgatherdd_3(&mut self, op0: A, op1: B, op2: C);
316}
317
318impl<'a> VpgatherddEmitter_3<Xmm, Mem, Xmm> for Assembler<'a> {
319 fn vpgatherdd_3(&mut self, op0: Xmm, op1: Mem, op2: Xmm) {
320 self.emit(VPGATHERDD128RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
321 }
322}
323
324impl<'a> VpgatherddEmitter_3<Ymm, Mem, Ymm> for Assembler<'a> {
325 fn vpgatherdd_3(&mut self, op0: Ymm, op1: Mem, op2: Ymm) {
326 self.emit(VPGATHERDD256RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
327 }
328}
329
330/// `VPGATHERDQ` (VPGATHERDQ).
331/// A set of 16 or 8 doubleword/quadword memory locations pointed to by base address BASE_ADDR and index vector VINDEX with scale SCALE are gathered. The result is written into vector zmm1. The elements are specified via the VSIB (i.e., the index register is a zmm, holding packed indices). Elements will only be loaded if their corresponding mask bit is one. If an element’s mask bit is not set, the corresponding element of the destination register (zmm1) is left unchanged. The entire mask register will be set to zero by this instruction unless it triggers an exception.
332///
333///
334/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPGATHERDD%3AVPGATHERDQ.html).
335///
336/// Supported operand variants:
337///
338/// ```text
339/// +---+---------------+
340/// | # | Operands |
341/// +---+---------------+
342/// | 1 | Xmm, Mem, Xmm |
343/// | 2 | Ymm, Mem, Ymm |
344/// +---+---------------+
345/// ```
346pub trait VpgatherdqEmitter_3<A, B, C> {
347 fn vpgatherdq_3(&mut self, op0: A, op1: B, op2: C);
348}
349
350impl<'a> VpgatherdqEmitter_3<Xmm, Mem, Xmm> for Assembler<'a> {
351 fn vpgatherdq_3(&mut self, op0: Xmm, op1: Mem, op2: Xmm) {
352 self.emit(VPGATHERDQ128RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
353 }
354}
355
356impl<'a> VpgatherdqEmitter_3<Ymm, Mem, Ymm> for Assembler<'a> {
357 fn vpgatherdq_3(&mut self, op0: Ymm, op1: Mem, op2: Ymm) {
358 self.emit(VPGATHERDQ256RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
359 }
360}
361
362/// `VPGATHERQD` (VPGATHERQD).
363/// A set of 8 doubleword/quadword memory locations pointed to by base address BASE_ADDR and index vector VINDEX with scale SCALE are gathered. The result is written into a vector register. The elements are specified via the VSIB (i.e., the index register is a vector register, holding packed indices). Elements will only be loaded if their corresponding mask bit is one. If an element’s mask bit is not set, the corresponding element of the destination register is left unchanged. The entire mask register will be set to zero by this instruction unless it triggers an exception.
364///
365///
366/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPGATHERQD%3AVPGATHERQQ.html).
367///
368/// Supported operand variants:
369///
370/// ```text
371/// +---+---------------+
372/// | # | Operands |
373/// +---+---------------+
374/// | 1 | Xmm, Mem, Xmm |
375/// +---+---------------+
376/// ```
377pub trait VpgatherqdEmitter_3<A, B, C> {
378 fn vpgatherqd_3(&mut self, op0: A, op1: B, op2: C);
379}
380
381impl<'a> VpgatherqdEmitter_3<Xmm, Mem, Xmm> for Assembler<'a> {
382 fn vpgatherqd_3(&mut self, op0: Xmm, op1: Mem, op2: Xmm) {
383 self.emit(VPGATHERQD128RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
384 }
385}
386
387/// `VPGATHERQQ` (VPGATHERQQ).
388/// A set of 8 doubleword/quadword memory locations pointed to by base address BASE_ADDR and index vector VINDEX with scale SCALE are gathered. The result is written into a vector register. The elements are specified via the VSIB (i.e., the index register is a vector register, holding packed indices). Elements will only be loaded if their corresponding mask bit is one. If an element’s mask bit is not set, the corresponding element of the destination register is left unchanged. The entire mask register will be set to zero by this instruction unless it triggers an exception.
389///
390///
391/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPGATHERQD%3AVPGATHERQQ.html).
392///
393/// Supported operand variants:
394///
395/// ```text
396/// +---+---------------+
397/// | # | Operands |
398/// +---+---------------+
399/// | 1 | Xmm, Mem, Xmm |
400/// | 2 | Ymm, Mem, Ymm |
401/// +---+---------------+
402/// ```
403pub trait VpgatherqqEmitter_3<A, B, C> {
404 fn vpgatherqq_3(&mut self, op0: A, op1: B, op2: C);
405}
406
407impl<'a> VpgatherqqEmitter_3<Xmm, Mem, Xmm> for Assembler<'a> {
408 fn vpgatherqq_3(&mut self, op0: Xmm, op1: Mem, op2: Xmm) {
409 self.emit(VPGATHERQQ128RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
410 }
411}
412
413impl<'a> VpgatherqqEmitter_3<Ymm, Mem, Ymm> for Assembler<'a> {
414 fn vpgatherqq_3(&mut self, op0: Ymm, op1: Mem, op2: Ymm) {
415 self.emit(VPGATHERQQ256RMR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
416 }
417}
418
419/// `VPMASKMOVD` (VPMASKMOVD).
420/// Conditionally moves packed data elements from the second source operand into the corresponding data element of the destination operand, depending on the mask bits associated with each data element. The mask bits are specified in the first source operand.
421///
422///
423/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPMASKMOV.html).
424///
425/// Supported operand variants:
426///
427/// ```text
428/// +---+---------------+
429/// | # | Operands |
430/// +---+---------------+
431/// | 1 | Mem, Xmm, Xmm |
432/// | 2 | Mem, Ymm, Ymm |
433/// | 3 | Xmm, Xmm, Mem |
434/// | 4 | Ymm, Ymm, Mem |
435/// +---+---------------+
436/// ```
437pub trait VpmaskmovdEmitter<A, B, C> {
438 fn vpmaskmovd(&mut self, op0: A, op1: B, op2: C);
439}
440
441impl<'a> VpmaskmovdEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
442 fn vpmaskmovd(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
443 self.emit(VPMASKMOVD128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
444 }
445}
446
447impl<'a> VpmaskmovdEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
448 fn vpmaskmovd(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
449 self.emit(VPMASKMOVD256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
450 }
451}
452
453impl<'a> VpmaskmovdEmitter<Mem, Xmm, Xmm> for Assembler<'a> {
454 fn vpmaskmovd(&mut self, op0: Mem, op1: Xmm, op2: Xmm) {
455 self.emit(VPMASKMOVD128MRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
456 }
457}
458
459impl<'a> VpmaskmovdEmitter<Mem, Ymm, Ymm> for Assembler<'a> {
460 fn vpmaskmovd(&mut self, op0: Mem, op1: Ymm, op2: Ymm) {
461 self.emit(VPMASKMOVD256MRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
462 }
463}
464
465/// `VPMASKMOVQ` (VPMASKMOVQ).
466/// Conditionally moves packed data elements from the second source operand into the corresponding data element of the destination operand, depending on the mask bits associated with each data element. The mask bits are specified in the first source operand.
467///
468///
469/// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPMASKMOV.html).
470///
471/// Supported operand variants:
472///
473/// ```text
474/// +---+---------------+
475/// | # | Operands |
476/// +---+---------------+
477/// | 1 | Mem, Xmm, Xmm |
478/// | 2 | Mem, Ymm, Ymm |
479/// | 3 | Xmm, Xmm, Mem |
480/// | 4 | Ymm, Ymm, Mem |
481/// +---+---------------+
482/// ```
483pub trait VpmaskmovqEmitter<A, B, C> {
484 fn vpmaskmovq(&mut self, op0: A, op1: B, op2: C);
485}
486
487impl<'a> VpmaskmovqEmitter<Xmm, Xmm, Mem> for Assembler<'a> {
488 fn vpmaskmovq(&mut self, op0: Xmm, op1: Xmm, op2: Mem) {
489 self.emit(VPMASKMOVQ128RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
490 }
491}
492
493impl<'a> VpmaskmovqEmitter<Ymm, Ymm, Mem> for Assembler<'a> {
494 fn vpmaskmovq(&mut self, op0: Ymm, op1: Ymm, op2: Mem) {
495 self.emit(VPMASKMOVQ256RRM, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
496 }
497}
498
499impl<'a> VpmaskmovqEmitter<Mem, Xmm, Xmm> for Assembler<'a> {
500 fn vpmaskmovq(&mut self, op0: Mem, op1: Xmm, op2: Xmm) {
501 self.emit(VPMASKMOVQ128MRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
502 }
503}
504
505impl<'a> VpmaskmovqEmitter<Mem, Ymm, Ymm> for Assembler<'a> {
506 fn vpmaskmovq(&mut self, op0: Mem, op1: Ymm, op2: Ymm) {
507 self.emit(VPMASKMOVQ256MRR, op0.as_operand(), op1.as_operand(), op2.as_operand(), &NOREG);
508 }
509}
510
511
512impl<'a> Assembler<'a> {
513 /// `VBROADCASTI128` (VBROADCASTI128).
514 /// Load integer data from the source operand (the second operand) and broadcast to all elements of the destination operand (the first operand).
515 ///
516 ///
517 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPBROADCAST.html).
518 ///
519 /// Supported operand variants:
520 ///
521 /// ```text
522 /// +---+----------+
523 /// | # | Operands |
524 /// +---+----------+
525 /// | 1 | Ymm, Mem |
526 /// +---+----------+
527 /// ```
528 #[inline]
529 pub fn vbroadcasti128<A, B>(&mut self, op0: A, op1: B)
530 where Assembler<'a>: Vbroadcasti128Emitter<A, B> {
531 <Self as Vbroadcasti128Emitter<A, B>>::vbroadcasti128(self, op0, op1);
532 }
533 /// `VEXTRACTI128` (VEXTRACTI128).
534 /// VEXTRACTI128/VEXTRACTI32x4 and VEXTRACTI64x2 extract 128-bits of doubleword integer values from the source operand (the second operand) and store to the low 128-bit of the destination operand (the first operand). The 128-bit data extraction occurs at an 128-bit granular offset specified by imm8[0] (256-bit) or imm8[1:0] as the multiply factor. The destination may be either a vector register or an 128-bit memory location.
535 ///
536 ///
537 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VEXTRACTI128%3AVEXTRACTI32x4%3AVEXTRACTI64x2%3AVEXTRACTI32x8%3AVEXTRACTI64x4.html).
538 ///
539 /// Supported operand variants:
540 ///
541 /// ```text
542 /// +---+---------------+
543 /// | # | Operands |
544 /// +---+---------------+
545 /// | 1 | Mem, Ymm, Imm |
546 /// | 2 | Xmm, Ymm, Imm |
547 /// +---+---------------+
548 /// ```
549 #[inline]
550 pub fn vextracti128<A, B, C>(&mut self, op0: A, op1: B, op2: C)
551 where Assembler<'a>: Vextracti128Emitter<A, B, C> {
552 <Self as Vextracti128Emitter<A, B, C>>::vextracti128(self, op0, op1, op2);
553 }
554 /// `VGATHERDPD` (VGATHERDPD).
555 /// The instruction conditionally loads up to 2 or 4 double precision floating-point values from memory addresses specified by the memory operand (the second operand) and using qword indices. The memory operand uses the VSIB form of the SIB byte to specify a general purpose register operand as the common base, a vector register for an array of indices relative to the base and a constant scale factor.
556 ///
557 ///
558 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VGATHERDPD%3AVGATHERQPD.html).
559 ///
560 /// Supported operand variants:
561 ///
562 /// ```text
563 /// +---+---------------+
564 /// | # | Operands |
565 /// +---+---------------+
566 /// | 1 | Xmm, Mem, Xmm |
567 /// | 2 | Ymm, Mem, Ymm |
568 /// +---+---------------+
569 /// ```
570 #[inline]
571 pub fn vgatherdpd_3<A, B, C>(&mut self, op0: A, op1: B, op2: C)
572 where Assembler<'a>: VgatherdpdEmitter_3<A, B, C> {
573 <Self as VgatherdpdEmitter_3<A, B, C>>::vgatherdpd_3(self, op0, op1, op2);
574 }
575 /// `VGATHERDPS` (VGATHERDPS).
576 /// The instruction conditionally loads up to 4 or 8 single-precision floating-point values from memory addresses specified by the memory operand (the second operand) and using dword indices. The memory operand uses the VSIB form of the SIB byte to specify a general purpose register operand as the common base, a vector register for an array of indices relative to the base and a constant scale factor.
577 ///
578 ///
579 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VGATHERDPS%3AVGATHERQPS.html).
580 ///
581 /// Supported operand variants:
582 ///
583 /// ```text
584 /// +---+---------------+
585 /// | # | Operands |
586 /// +---+---------------+
587 /// | 1 | Xmm, Mem, Xmm |
588 /// | 2 | Ymm, Mem, Ymm |
589 /// +---+---------------+
590 /// ```
591 #[inline]
592 pub fn vgatherdps_3<A, B, C>(&mut self, op0: A, op1: B, op2: C)
593 where Assembler<'a>: VgatherdpsEmitter_3<A, B, C> {
594 <Self as VgatherdpsEmitter_3<A, B, C>>::vgatherdps_3(self, op0, op1, op2);
595 }
596 /// `VGATHERQPD` (VGATHERQPD).
597 /// The instruction conditionally loads up to 2 or 4 double precision floating-point values from memory addresses specified by the memory operand (the second operand) and using qword indices. The memory operand uses the VSIB form of the SIB byte to specify a general purpose register operand as the common base, a vector register for an array of indices relative to the base and a constant scale factor.
598 ///
599 ///
600 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VGATHERDPD%3AVGATHERQPD.html).
601 ///
602 /// Supported operand variants:
603 ///
604 /// ```text
605 /// +---+---------------+
606 /// | # | Operands |
607 /// +---+---------------+
608 /// | 1 | Xmm, Mem, Xmm |
609 /// | 2 | Ymm, Mem, Ymm |
610 /// +---+---------------+
611 /// ```
612 #[inline]
613 pub fn vgatherqpd_3<A, B, C>(&mut self, op0: A, op1: B, op2: C)
614 where Assembler<'a>: VgatherqpdEmitter_3<A, B, C> {
615 <Self as VgatherqpdEmitter_3<A, B, C>>::vgatherqpd_3(self, op0, op1, op2);
616 }
617 /// `VGATHERQPS` (VGATHERQPS).
618 /// The instruction conditionally loads up to 4 or 8 single-precision floating-point values from memory addresses specified by the memory operand (the second operand) and using dword indices. The memory operand uses the VSIB form of the SIB byte to specify a general purpose register operand as the common base, a vector register for an array of indices relative to the base and a constant scale factor.
619 ///
620 ///
621 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VGATHERDPS%3AVGATHERQPS.html).
622 ///
623 /// Supported operand variants:
624 ///
625 /// ```text
626 /// +---+---------------+
627 /// | # | Operands |
628 /// +---+---------------+
629 /// | 1 | Xmm, Mem, Xmm |
630 /// +---+---------------+
631 /// ```
632 #[inline]
633 pub fn vgatherqps_3<A, B, C>(&mut self, op0: A, op1: B, op2: C)
634 where Assembler<'a>: VgatherqpsEmitter_3<A, B, C> {
635 <Self as VgatherqpsEmitter_3<A, B, C>>::vgatherqps_3(self, op0, op1, op2);
636 }
637 /// `VINSERTI128` (VINSERTI128).
638 /// VINSERTI32x4 and VINSERTI64x2 inserts 128-bits of packed integer values from the second source operand (the third operand) into the destination operand (the first operand) at an 128-bit granular offset multiplied by imm8[0] (256-bit) or imm8[1:0]. The remaining portions of the destination are copied from the corresponding fields of the first source operand (the second operand). The second source operand can be either an XMM register or a 128-bit memory location. The high 6/7bits of the immediate are ignored. The destination operand is a ZMM/YMM register and updated at 32 and 64-bit granularity according to the writemask.
639 ///
640 ///
641 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VINSERTI128%3AVINSERTI32x4%3AVINSERTI64x2%3AVINSERTI32x8%3AVINSERTI64x4.html).
642 ///
643 /// Supported operand variants:
644 ///
645 /// ```text
646 /// +---+--------------------+
647 /// | # | Operands |
648 /// +---+--------------------+
649 /// | 1 | Ymm, Ymm, Mem, Imm |
650 /// | 2 | Ymm, Ymm, Xmm, Imm |
651 /// +---+--------------------+
652 /// ```
653 #[inline]
654 pub fn vinserti128<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
655 where Assembler<'a>: Vinserti128Emitter<A, B, C, D> {
656 <Self as Vinserti128Emitter<A, B, C, D>>::vinserti128(self, op0, op1, op2, op3);
657 }
658 /// `VPBLENDD` (VPBLENDD).
659 /// Dword elements from the source operand (second operand) are conditionally written to the destination operand (first operand) depending on bits in the immediate operand (third operand). The immediate bits (bits 7:0) form a mask that determines whether the corresponding dword in the destination is copied from the source. If a bit in the mask, corresponding to a dword, is “1", then the dword is copied, else the dword is unchanged.
660 ///
661 ///
662 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPBLENDD.html).
663 ///
664 /// Supported operand variants:
665 ///
666 /// ```text
667 /// +---+--------------------+
668 /// | # | Operands |
669 /// +---+--------------------+
670 /// | 1 | Xmm, Xmm, Mem, Imm |
671 /// | 2 | Xmm, Xmm, Xmm, Imm |
672 /// | 3 | Ymm, Ymm, Mem, Imm |
673 /// | 4 | Ymm, Ymm, Ymm, Imm |
674 /// +---+--------------------+
675 /// ```
676 #[inline]
677 pub fn vpblendd<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
678 where Assembler<'a>: VpblenddEmitter<A, B, C, D> {
679 <Self as VpblenddEmitter<A, B, C, D>>::vpblendd(self, op0, op1, op2, op3);
680 }
681 /// `VPERM2I128` (VPERM2I128).
682 /// Permute 128 bit integer data from the first source operand (second operand) and second source operand (third operand) using bits in the 8-bit immediate and store results in the destination operand (first operand). The first source operand is a YMM register, the second source operand is a YMM register or a 256-bit memory location, and the destination operand is a YMM register.
683 ///
684 ///
685 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPERM2I128.html).
686 ///
687 /// Supported operand variants:
688 ///
689 /// ```text
690 /// +---+--------------------+
691 /// | # | Operands |
692 /// +---+--------------------+
693 /// | 1 | Ymm, Ymm, Mem, Imm |
694 /// | 2 | Ymm, Ymm, Ymm, Imm |
695 /// +---+--------------------+
696 /// ```
697 #[inline]
698 pub fn vperm2i128<A, B, C, D>(&mut self, op0: A, op1: B, op2: C, op3: D)
699 where Assembler<'a>: Vperm2i128Emitter<A, B, C, D> {
700 <Self as Vperm2i128Emitter<A, B, C, D>>::vperm2i128(self, op0, op1, op2, op3);
701 }
702 /// `VPGATHERDD` (VPGATHERDD).
703 /// A set of 16 or 8 doubleword/quadword memory locations pointed to by base address BASE_ADDR and index vector VINDEX with scale SCALE are gathered. The result is written into vector zmm1. The elements are specified via the VSIB (i.e., the index register is a zmm, holding packed indices). Elements will only be loaded if their corresponding mask bit is one. If an element’s mask bit is not set, the corresponding element of the destination register (zmm1) is left unchanged. The entire mask register will be set to zero by this instruction unless it triggers an exception.
704 ///
705 ///
706 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPGATHERDD%3AVPGATHERDQ.html).
707 ///
708 /// Supported operand variants:
709 ///
710 /// ```text
711 /// +---+---------------+
712 /// | # | Operands |
713 /// +---+---------------+
714 /// | 1 | Xmm, Mem, Xmm |
715 /// | 2 | Ymm, Mem, Ymm |
716 /// +---+---------------+
717 /// ```
718 #[inline]
719 pub fn vpgatherdd_3<A, B, C>(&mut self, op0: A, op1: B, op2: C)
720 where Assembler<'a>: VpgatherddEmitter_3<A, B, C> {
721 <Self as VpgatherddEmitter_3<A, B, C>>::vpgatherdd_3(self, op0, op1, op2);
722 }
723 /// `VPGATHERDQ` (VPGATHERDQ).
724 /// A set of 16 or 8 doubleword/quadword memory locations pointed to by base address BASE_ADDR and index vector VINDEX with scale SCALE are gathered. The result is written into vector zmm1. The elements are specified via the VSIB (i.e., the index register is a zmm, holding packed indices). Elements will only be loaded if their corresponding mask bit is one. If an element’s mask bit is not set, the corresponding element of the destination register (zmm1) is left unchanged. The entire mask register will be set to zero by this instruction unless it triggers an exception.
725 ///
726 ///
727 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPGATHERDD%3AVPGATHERDQ.html).
728 ///
729 /// Supported operand variants:
730 ///
731 /// ```text
732 /// +---+---------------+
733 /// | # | Operands |
734 /// +---+---------------+
735 /// | 1 | Xmm, Mem, Xmm |
736 /// | 2 | Ymm, Mem, Ymm |
737 /// +---+---------------+
738 /// ```
739 #[inline]
740 pub fn vpgatherdq_3<A, B, C>(&mut self, op0: A, op1: B, op2: C)
741 where Assembler<'a>: VpgatherdqEmitter_3<A, B, C> {
742 <Self as VpgatherdqEmitter_3<A, B, C>>::vpgatherdq_3(self, op0, op1, op2);
743 }
744 /// `VPGATHERQD` (VPGATHERQD).
745 /// A set of 8 doubleword/quadword memory locations pointed to by base address BASE_ADDR and index vector VINDEX with scale SCALE are gathered. The result is written into a vector register. The elements are specified via the VSIB (i.e., the index register is a vector register, holding packed indices). Elements will only be loaded if their corresponding mask bit is one. If an element’s mask bit is not set, the corresponding element of the destination register is left unchanged. The entire mask register will be set to zero by this instruction unless it triggers an exception.
746 ///
747 ///
748 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPGATHERQD%3AVPGATHERQQ.html).
749 ///
750 /// Supported operand variants:
751 ///
752 /// ```text
753 /// +---+---------------+
754 /// | # | Operands |
755 /// +---+---------------+
756 /// | 1 | Xmm, Mem, Xmm |
757 /// +---+---------------+
758 /// ```
759 #[inline]
760 pub fn vpgatherqd_3<A, B, C>(&mut self, op0: A, op1: B, op2: C)
761 where Assembler<'a>: VpgatherqdEmitter_3<A, B, C> {
762 <Self as VpgatherqdEmitter_3<A, B, C>>::vpgatherqd_3(self, op0, op1, op2);
763 }
764 /// `VPGATHERQQ` (VPGATHERQQ).
765 /// A set of 8 doubleword/quadword memory locations pointed to by base address BASE_ADDR and index vector VINDEX with scale SCALE are gathered. The result is written into a vector register. The elements are specified via the VSIB (i.e., the index register is a vector register, holding packed indices). Elements will only be loaded if their corresponding mask bit is one. If an element’s mask bit is not set, the corresponding element of the destination register is left unchanged. The entire mask register will be set to zero by this instruction unless it triggers an exception.
766 ///
767 ///
768 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPGATHERQD%3AVPGATHERQQ.html).
769 ///
770 /// Supported operand variants:
771 ///
772 /// ```text
773 /// +---+---------------+
774 /// | # | Operands |
775 /// +---+---------------+
776 /// | 1 | Xmm, Mem, Xmm |
777 /// | 2 | Ymm, Mem, Ymm |
778 /// +---+---------------+
779 /// ```
780 #[inline]
781 pub fn vpgatherqq_3<A, B, C>(&mut self, op0: A, op1: B, op2: C)
782 where Assembler<'a>: VpgatherqqEmitter_3<A, B, C> {
783 <Self as VpgatherqqEmitter_3<A, B, C>>::vpgatherqq_3(self, op0, op1, op2);
784 }
785 /// `VPMASKMOVD` (VPMASKMOVD).
786 /// Conditionally moves packed data elements from the second source operand into the corresponding data element of the destination operand, depending on the mask bits associated with each data element. The mask bits are specified in the first source operand.
787 ///
788 ///
789 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPMASKMOV.html).
790 ///
791 /// Supported operand variants:
792 ///
793 /// ```text
794 /// +---+---------------+
795 /// | # | Operands |
796 /// +---+---------------+
797 /// | 1 | Mem, Xmm, Xmm |
798 /// | 2 | Mem, Ymm, Ymm |
799 /// | 3 | Xmm, Xmm, Mem |
800 /// | 4 | Ymm, Ymm, Mem |
801 /// +---+---------------+
802 /// ```
803 #[inline]
804 pub fn vpmaskmovd<A, B, C>(&mut self, op0: A, op1: B, op2: C)
805 where Assembler<'a>: VpmaskmovdEmitter<A, B, C> {
806 <Self as VpmaskmovdEmitter<A, B, C>>::vpmaskmovd(self, op0, op1, op2);
807 }
808 /// `VPMASKMOVQ` (VPMASKMOVQ).
809 /// Conditionally moves packed data elements from the second source operand into the corresponding data element of the destination operand, depending on the mask bits associated with each data element. The mask bits are specified in the first source operand.
810 ///
811 ///
812 /// For more details, see the [Intel manual](https://www.felixcloutier.com/x86/VPMASKMOV.html).
813 ///
814 /// Supported operand variants:
815 ///
816 /// ```text
817 /// +---+---------------+
818 /// | # | Operands |
819 /// +---+---------------+
820 /// | 1 | Mem, Xmm, Xmm |
821 /// | 2 | Mem, Ymm, Ymm |
822 /// | 3 | Xmm, Xmm, Mem |
823 /// | 4 | Ymm, Ymm, Mem |
824 /// +---+---------------+
825 /// ```
826 #[inline]
827 pub fn vpmaskmovq<A, B, C>(&mut self, op0: A, op1: B, op2: C)
828 where Assembler<'a>: VpmaskmovqEmitter<A, B, C> {
829 <Self as VpmaskmovqEmitter<A, B, C>>::vpmaskmovq(self, op0, op1, op2);
830 }
831}